Spaces:
Sleeping
Sleeping
File size: 2,934 Bytes
cbab4ab 56cf024 cbab4ab 56cf024 cbab4ab 56cf024 cbab4ab 03fc7c5 cbab4ab 03fc7c5 cbab4ab 56cf024 cbab4ab 56cf024 cbab4ab 56cf024 cbab4ab 56cf024 cbab4ab 56cf024 cbab4ab 56cf024 cbab4ab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
from transformers import pipeline
import torch
import os
import openai
from dotenv import load_dotenv
from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub
from fairseq.models.text_to_speech.hub_interface import TTSHubInterface
model_id = "openai/whisper-base"
pipe = pipeline("automatic-speech-recognition", model=model_id)
def transcribe_speech(filepath):
output = pipe(
filepath,
max_new_tokens=256,
generate_kwargs={
"task": "transcribe",
"language": "spanish",
}, # update with the language you've fine-tuned on
chunk_length_s=30,
batch_size=8,
)
return output["text"]
# Load environment variables from the .env file de forma local
load_dotenv()
openai.api_key = os.environ['OPENAI_API_KEY']
def clear_chat():
global chat_history
chat_history=[]
def query_chatgpt(message,chat_history):
chat_history.append({'role': 'user', 'content': '{}'.format(message)})
print("Preguntando "+message)
print("historial", chat_history)
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages= chat_history,
temperature=0.5,
max_tokens=256
).choices[0].message.content
chat_history.append({'role': 'assistant', 'content': '{}'.format(response)})
return response, chat_history
# models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
# "facebook/tts_transformer-es-css10",
# arg_overrides={"vocoder": "hifigan", "fp16": False}
# )
# model = models[0]
# TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
# generator = task.build_generator([model], cfg)
# text = "Había una vez."
# sample = TTSHubInterface.get_model_input(task, text)
# wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample)
# ipd.Audio(wav, rate=rate)
from tts import synthesize
# def syn_facebookmms(text):
# sample = TTSHubInterface.get_model_input(task, text)
# wav,rate = TTSHubInterface.get_prediction(task, model, generator, sample)
# return wav,rate
def answer_question(filepath,chat_history):
transcription = transcribe_speech(filepath)
response,chat_history = query_chatgpt(transcription,chat_history)
print("historial",chat_history)
# audio = synthesise(response)
# audio, rate = syn_facebookmms(response)
rate,audio = synthesize(response,1,"spa")
print(audio)
return rate,audio
def reset_state(chat_history):
chat_history = []
return chat_history
import gradio as gr
with gr.Blocks() as demo:
chat_history = gr.State([])
entrada = gr.Audio(source="microphone",type="filepath")
boton = gr.Button("Responder")
button = gr.Button("Reset State")
salida = gr.Audio()
boton.click(answer_question,[entrada,chat_history],salida)
button.click(reset_state,chat_history,chat_history)
demo.launch(debug=True) |