File size: 2,940 Bytes
5e3d8aa
abe4e34
f6513f7
63523f5
5949d34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73497ab
5949d34
 
 
 
 
 
 
 
 
 
 
 
5e3d8aa
a7b8833
63523f5
5e3d8aa
 
63523f5
5e3d8aa
2b94432
63523f5
 
5e3d8aa
63523f5
5e3d8aa
 
 
 
 
63523f5
abe4e34
1f43cd4
63523f5
23a51eb
63523f5
5949d34
ed1617d
63523f5
23a51eb
 
 
5949d34
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from transformers import pipeline
import gradio as gr
from IPython.display import Audio

# Create pipelines for text-to-speech and speech-to-text
tts = pipeline("text-to-speech", model="facebook/mms-tts-eng")
stt = pipeline("automatic-speech-recognition", model="openai/whisper-medium")

# Create pipeline for text generation, considering using a model trained for dialogue
chat = pipeline("text-generation", model="facebook/bart-base-conversational")

def handle_user_input(user_text, user_voice):
    if user_text:
        user_text = user_text.strip()  # Remove leading/trailing whitespace
        input_type = "text"
    else:
        try:
            user_text = stt(user_voice)["text"]
        except:
            user_text = ""
        input_type = "voice"

    # Generate response
    messages = [
        {"role": "system", "content": "Hi! How can I help you today?"},
        {"role": "user", "content": user_text},
    ]
    chat_reply = chat(messages=messages, max_length=100, top_p=0.95, temperature=0.7)[0]["generated_text"]
    messages.append({"role": "assistant", "content": chat_reply})

    # Generate audio output (only if input was text)
    audio = None
    if input_type == "text":
        audio = tts(chat_reply)["audio"]

    return chat_reply, audio


# Create and launch the Gradio interface
iface = gr.Interface(
    fn=handle_user_input,
    inputs=[gr.Textbox(label="Enter your text (optional)"), gr.Audio(sources=["microphone"], type="filepath")],
    outputs=[gr.Textbox(label="Assistant Text"), gr.Audio(label="Assistant Voice (if text input)")],
    live=True,
    title="AI Voice Assistant",
)
iface.launch(debug=True)


"""
from transformers import pipeline
import gradio as gr
from IPython.display import Audio

# Create a pipeline for text-to-speech
tts = pipeline("text-to-speech", model="facebook/mms-tts-eng")

# Create a pipeline for speech-to-text
stt = pipeline("automatic-speech-recognition", model="openai/whisper-medium")

# Create a pipeline for text generation
chat = pipeline("text-generation", model="openai-community/gpt2-xl")

def voice_chat(user_voice):
    user_text = stt(user_voice)["text"]
    messages = [{"role": "system", "content": "You are a kind helpful assistant."}]
    messages.append({"role": "user", "content": user_text})
    chat_reply = chat(messages=messages, max_length=100, top_p=0.95, temperature=0.7)[0]["generated_text"]
    messages.append({"role": "assistant", "content": chat_reply})
    audio = tts(chat_reply)["audio"]
    return chat_reply, audio

text_reply = gr.Textbox(label="ChatGPT Text") 
voice_reply = gr.Audio(type="filepath")

iface = gr.Interface(
    fn=voice_chat,
    inputs=[gr.Textbox(label="Enter your text"), gr.Audio(sources=["microphone"], type="filepath")],
    outputs=[gr.Textbox(label="ChatGPT Text") , gr.Audio(label = "ChatGPT Voice")],
    live=True,
    title="AI Voice Assistant with ChatGPT AI",
)

iface.launch(debug=True)
"""