File size: 2,934 Bytes
cbab4ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56cf024
 
 
 
 
 
 
 
 
 
cbab4ab
 
56cf024
cbab4ab
 
 
56cf024
 
cbab4ab
 
 
 
03fc7c5
 
 
 
 
 
 
cbab4ab
 
 
 
 
 
 
 
 
 
03fc7c5
 
 
 
cbab4ab
56cf024
cbab4ab
56cf024
 
cbab4ab
 
 
 
 
 
56cf024
 
 
 
cbab4ab
 
 
56cf024
cbab4ab
 
56cf024
cbab4ab
56cf024
 
 
cbab4ab
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from transformers import pipeline
import torch

import os
import openai
from dotenv import load_dotenv
from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub
from fairseq.models.text_to_speech.hub_interface import TTSHubInterface

model_id = "openai/whisper-base" 
pipe = pipeline("automatic-speech-recognition", model=model_id)

def transcribe_speech(filepath):
    output = pipe(
        filepath,
        max_new_tokens=256,
        generate_kwargs={
            "task": "transcribe",
            "language": "spanish",
        },  # update with the language you've fine-tuned on
        chunk_length_s=30,
        batch_size=8,
    )
    return output["text"]


# Load environment variables from the .env file de forma local
load_dotenv()
openai.api_key = os.environ['OPENAI_API_KEY']


def clear_chat():
     global chat_history
     chat_history=[]


def query_chatgpt(message,chat_history):
    chat_history.append({'role': 'user', 'content': '{}'.format(message)})
    print("Preguntando "+message)
    print("historial", chat_history)
    response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages= chat_history,
            temperature=0.5,
            max_tokens=256
            ).choices[0].message.content
    chat_history.append({'role': 'assistant', 'content': '{}'.format(response)})
    return response, chat_history




# models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
#     "facebook/tts_transformer-es-css10",
#     arg_overrides={"vocoder": "hifigan", "fp16": False}
# )
# model = models[0]
# TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
# generator = task.build_generator([model], cfg)

# text = "Había una vez."

# sample = TTSHubInterface.get_model_input(task, text)
# wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample)

# ipd.Audio(wav, rate=rate)
from tts import synthesize


# def syn_facebookmms(text):
#     sample = TTSHubInterface.get_model_input(task, text)
#     wav,rate = TTSHubInterface.get_prediction(task, model, generator, sample)
#     return wav,rate

def answer_question(filepath,chat_history):
    transcription = transcribe_speech(filepath)
    response,chat_history = query_chatgpt(transcription,chat_history)
    print("historial",chat_history)
    # audio = synthesise(response)
    # audio, rate = syn_facebookmms(response)
    rate,audio = synthesize(response,1,"spa")
    print(audio)
    return rate,audio

def reset_state(chat_history):
    chat_history = []
    return chat_history


import gradio as gr
with gr.Blocks() as demo:
    chat_history = gr.State([])
    entrada = gr.Audio(source="microphone",type="filepath")
    boton = gr.Button("Responder")
    button = gr.Button("Reset State")
    salida = gr.Audio()
    boton.click(answer_question,[entrada,chat_history],salida)
    button.click(reset_state,chat_history,chat_history)
  
demo.launch(debug=True)