voicemenu143 / app.py
geethareddy's picture
Update app.py
adb5e2a verified
raw
history blame
2.63 kB
import gradio as gr
import speech_recognition as sr
import torch
import os
from transformers import pipeline
from gtts import gTTS
import time
# Load ASR Model (Whisper)
device = "cuda" if torch.cuda.is_available() else "cpu"
speech_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=0 if device == "cuda" else -1)
# Initialize Speech Recognition
recognizer = sr.Recognizer()
# Function to Play Audio Prompt
def play_audio(text):
tts = gTTS(text=text, lang='en')
filename = "prompt.mp3"
tts.save(filename)
os.system(f"mpg321 {filename}" if os.name != "nt" else f"start {filename}") # Works on Linux & Windows
time.sleep(2) # Give some time for the speech to play
# Function to Capture Name
def capture_name(audio):
play_audio("Tell me your name")
try:
text = speech_to_text(audio)["text"]
return f"πŸ‘€ Name Captured: {text}", "Please provide your email address."
except Exception as e:
return f"❌ Error: {str(e)}", ""
# Function to Capture Email
def capture_email(audio):
play_audio("Please provide your email address")
try:
text = speech_to_text(audio)["text"]
return f"πŸ“§ Email Captured: {text}"
except Exception as e:
return f"❌ Error: {str(e)}"
# Gradio Interface
def gradio_interface():
with gr.Blocks() as demo:
gr.Markdown("<h1 style='text-align: center;'>🍽️ AI Dining Assistant</h1>")
with gr.Column():
gr.Image("/mnt/data/image.png", elem_id="header_image", show_label=False) # Upload the image you provided
gr.Markdown("<p style='text-align: center;'>Press the mic button to start...</p>")
gr.Markdown("#### 🎀 Step 1: Tell me your name")
mic_button = gr.Button("πŸŽ™οΈ Tap to Speak Your Name")
audio_input_name = gr.Audio(type="filepath", visible=False)
name_output = gr.Textbox(label="Your Name:")
email_prompt_output = gr.Textbox(label="Next Step:", interactive=False)
mic_button.click(capture_name, inputs=audio_input_name, outputs=[name_output, email_prompt_output])
gr.Markdown("#### 🎀 Step 2: Provide your email")
mic_button_email = gr.Button("πŸŽ™οΈ Tap to Speak Your Email")
audio_input_email = gr.Audio(type="filepath", visible=False)
email_output = gr.Textbox(label="Your Email:")
mic_button_email.click(capture_email, inputs=audio_input_email, outputs=email_output)
return demo
# Launch the Gradio Interface
demo = gradio_interface()
demo.launch(debug=True)