Spaces:
Runtime error
Runtime error
File size: 2,940 Bytes
5e3d8aa abe4e34 f6513f7 63523f5 5949d34 73497ab 5949d34 5e3d8aa a7b8833 63523f5 5e3d8aa 63523f5 5e3d8aa 2b94432 63523f5 5e3d8aa 63523f5 5e3d8aa 63523f5 abe4e34 1f43cd4 63523f5 23a51eb 63523f5 5949d34 ed1617d 63523f5 23a51eb 5949d34 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
from transformers import pipeline
import gradio as gr
from IPython.display import Audio
# Create pipelines for text-to-speech and speech-to-text
tts = pipeline("text-to-speech", model="facebook/mms-tts-eng")
stt = pipeline("automatic-speech-recognition", model="openai/whisper-medium")
# Create pipeline for text generation, considering using a model trained for dialogue
chat = pipeline("text-generation", model="facebook/bart-base-conversational")
def handle_user_input(user_text, user_voice):
if user_text:
user_text = user_text.strip() # Remove leading/trailing whitespace
input_type = "text"
else:
try:
user_text = stt(user_voice)["text"]
except:
user_text = ""
input_type = "voice"
# Generate response
messages = [
{"role": "system", "content": "Hi! How can I help you today?"},
{"role": "user", "content": user_text},
]
chat_reply = chat(messages=messages, max_length=100, top_p=0.95, temperature=0.7)[0]["generated_text"]
messages.append({"role": "assistant", "content": chat_reply})
# Generate audio output (only if input was text)
audio = None
if input_type == "text":
audio = tts(chat_reply)["audio"]
return chat_reply, audio
# Create and launch the Gradio interface
iface = gr.Interface(
fn=handle_user_input,
inputs=[gr.Textbox(label="Enter your text (optional)"), gr.Audio(sources=["microphone"], type="filepath")],
outputs=[gr.Textbox(label="Assistant Text"), gr.Audio(label="Assistant Voice (if text input)")],
live=True,
title="AI Voice Assistant",
)
iface.launch(debug=True)
"""
from transformers import pipeline
import gradio as gr
from IPython.display import Audio
# Create a pipeline for text-to-speech
tts = pipeline("text-to-speech", model="facebook/mms-tts-eng")
# Create a pipeline for speech-to-text
stt = pipeline("automatic-speech-recognition", model="openai/whisper-medium")
# Create a pipeline for text generation
chat = pipeline("text-generation", model="openai-community/gpt2-xl")
def voice_chat(user_voice):
user_text = stt(user_voice)["text"]
messages = [{"role": "system", "content": "You are a kind helpful assistant."}]
messages.append({"role": "user", "content": user_text})
chat_reply = chat(messages=messages, max_length=100, top_p=0.95, temperature=0.7)[0]["generated_text"]
messages.append({"role": "assistant", "content": chat_reply})
audio = tts(chat_reply)["audio"]
return chat_reply, audio
text_reply = gr.Textbox(label="ChatGPT Text")
voice_reply = gr.Audio(type="filepath")
iface = gr.Interface(
fn=voice_chat,
inputs=[gr.Textbox(label="Enter your text"), gr.Audio(sources=["microphone"], type="filepath")],
outputs=[gr.Textbox(label="ChatGPT Text") , gr.Audio(label = "ChatGPT Voice")],
live=True,
title="AI Voice Assistant with ChatGPT AI",
)
iface.launch(debug=True)
""" |