Spaces:
Sleeping
Sleeping
import gradio as gr | |
import speech_recognition as sr | |
import torch | |
import os | |
from transformers import pipeline | |
from gtts import gTTS | |
import time | |
# Load ASR Model (Whisper) | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
speech_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=0 if device == "cuda" else -1) | |
# Initialize Speech Recognition | |
recognizer = sr.Recognizer() | |
# Function to Play Audio Prompt | |
def play_audio(text): | |
tts = gTTS(text=text, lang='en') | |
filename = "prompt.mp3" | |
tts.save(filename) | |
os.system(f"mpg321 {filename}" if os.name != "nt" else f"start {filename}") # Works on Linux & Windows | |
time.sleep(2) # Give some time for the speech to play | |
# Function to Capture Name | |
def capture_name(audio): | |
play_audio("Tell me your name") | |
try: | |
text = speech_to_text(audio)["text"] | |
return f"π€ Name Captured: {text}", "Please provide your email address." | |
except Exception as e: | |
return f"β Error: {str(e)}", "" | |
# Function to Capture Email | |
def capture_email(audio): | |
play_audio("Please provide your email address") | |
try: | |
text = speech_to_text(audio)["text"] | |
return f"π§ Email Captured: {text}" | |
except Exception as e: | |
return f"β Error: {str(e)}" | |
# Gradio Interface | |
def gradio_interface(): | |
with gr.Blocks() as demo: | |
gr.Markdown("<h1 style='text-align: center;'>π½οΈ AI Dining Assistant</h1>") | |
with gr.Column(): | |
gr.Image("/mnt/data/image.png", elem_id="header_image", show_label=False) # Upload the image you provided | |
gr.Markdown("<p style='text-align: center;'>Press the mic button to start...</p>") | |
gr.Markdown("#### π€ Step 1: Tell me your name") | |
mic_button = gr.Button("ποΈ Tap to Speak Your Name") | |
audio_input_name = gr.Audio(type="filepath", visible=False) | |
name_output = gr.Textbox(label="Your Name:") | |
email_prompt_output = gr.Textbox(label="Next Step:", interactive=False) | |
mic_button.click(capture_name, inputs=audio_input_name, outputs=[name_output, email_prompt_output]) | |
gr.Markdown("#### π€ Step 2: Provide your email") | |
mic_button_email = gr.Button("ποΈ Tap to Speak Your Email") | |
audio_input_email = gr.Audio(type="filepath", visible=False) | |
email_output = gr.Textbox(label="Your Email:") | |
mic_button_email.click(capture_email, inputs=audio_input_email, outputs=email_output) | |
return demo | |
# Launch the Gradio Interface | |
demo = gradio_interface() | |
demo.launch(debug=True) | |