voicemenu1433

Sleeping

App Files Files Community

voicemenu1433 / app.py

geethareddy

Update app.py

adb5e2a verified about 1 month ago

raw

history blame

2.63 kB

	import gradio as gr
	import speech_recognition as sr
	import torch
	import os
	from transformers import pipeline
	from gtts import gTTS
	import time

	# Load ASR Model (Whisper)
	device = "cuda" if torch.cuda.is_available() else "cpu"
	speech_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=0 if device == "cuda" else -1)

	# Initialize Speech Recognition
	recognizer = sr.Recognizer()

	# Function to Play Audio Prompt
	def play_audio(text):
	tts = gTTS(text=text, lang='en')
	filename = "prompt.mp3"
	tts.save(filename)
	os.system(f"mpg321 {filename}" if os.name != "nt" else f"start {filename}") # Works on Linux & Windows
	time.sleep(2) # Give some time for the speech to play

	# Function to Capture Name
	def capture_name(audio):
	play_audio("Tell me your name")
	try:
	text = speech_to_text(audio)["text"]
	return f"👤 Name Captured: {text}", "Please provide your email address."
	except Exception as e:
	return f"❌ Error: {str(e)}", ""

	# Function to Capture Email
	def capture_email(audio):
	play_audio("Please provide your email address")
	try:
	text = speech_to_text(audio)["text"]
	return f"📧 Email Captured: {text}"
	except Exception as e:
	return f"❌ Error: {str(e)}"

	# Gradio Interface
	def gradio_interface():
	with gr.Blocks() as demo:
	gr.Markdown("<h1 style='text-align: center;'>🍽️ AI Dining Assistant</h1>")

	with gr.Column():
	gr.Image("/mnt/data/image.png", elem_id="header_image", show_label=False) # Upload the image you provided
	gr.Markdown("<p style='text-align: center;'>Press the mic button to start...</p>")

	gr.Markdown("#### 🎤 Step 1: Tell me your name")
	mic_button = gr.Button("🎙️ Tap to Speak Your Name")
	audio_input_name = gr.Audio(type="filepath", visible=False)
	name_output = gr.Textbox(label="Your Name:")
	email_prompt_output = gr.Textbox(label="Next Step:", interactive=False)

	mic_button.click(capture_name, inputs=audio_input_name, outputs=[name_output, email_prompt_output])

	gr.Markdown("#### 🎤 Step 2: Provide your email")
	mic_button_email = gr.Button("🎙️ Tap to Speak Your Email")
	audio_input_email = gr.Audio(type="filepath", visible=False)
	email_output = gr.Textbox(label="Your Email:")

	mic_button_email.click(capture_email, inputs=audio_input_email, outputs=email_output)

	return demo

	# Launch the Gradio Interface
	demo = gradio_interface()
	demo.launch(debug=True)