File size: 2,631 Bytes
0197ed3
 
7494646
adb5e2a
0197ed3
adb5e2a
 
0197ed3
adb5e2a
7494646
 
 
 
 
 
adb5e2a
 
 
 
 
 
 
 
7494646
 
adb5e2a
7494646
 
adb5e2a
7494646
adb5e2a
7494646
 
 
adb5e2a
7494646
 
adb5e2a
7494646
adb5e2a
7494646
 
0197ed3
 
adb5e2a
 
0197ed3
adb5e2a
 
 
 
 
 
0197ed3
7494646
 
adb5e2a
 
 
 
 
0197ed3
adb5e2a
 
0197ed3
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import gradio as gr
import speech_recognition as sr
import torch
import os
from transformers import pipeline
from gtts import gTTS
import time

# Load ASR Model (Whisper)
device = "cuda" if torch.cuda.is_available() else "cpu"
speech_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=0 if device == "cuda" else -1)

# Initialize Speech Recognition
recognizer = sr.Recognizer()

# Function to Play Audio Prompt
def play_audio(text):
    tts = gTTS(text=text, lang='en')
    filename = "prompt.mp3"
    tts.save(filename)
    os.system(f"mpg321 {filename}" if os.name != "nt" else f"start {filename}")  # Works on Linux & Windows
    time.sleep(2)  # Give some time for the speech to play

# Function to Capture Name
def capture_name(audio):
    play_audio("Tell me your name")
    try:
        text = speech_to_text(audio)["text"]
        return f"πŸ‘€ Name Captured: {text}", "Please provide your email address."
    except Exception as e:
        return f"❌ Error: {str(e)}", ""

# Function to Capture Email
def capture_email(audio):
    play_audio("Please provide your email address")
    try:
        text = speech_to_text(audio)["text"]
        return f"πŸ“§ Email Captured: {text}"
    except Exception as e:
        return f"❌ Error: {str(e)}"

# Gradio Interface
def gradio_interface():
    with gr.Blocks() as demo:
        gr.Markdown("<h1 style='text-align: center;'>🍽️ AI Dining Assistant</h1>")
        
        with gr.Column():
            gr.Image("/mnt/data/image.png", elem_id="header_image", show_label=False)  # Upload the image you provided
            gr.Markdown("<p style='text-align: center;'>Press the mic button to start...</p>")

            gr.Markdown("#### 🎀 Step 1: Tell me your name")
            mic_button = gr.Button("πŸŽ™οΈ Tap to Speak Your Name")
            audio_input_name = gr.Audio(type="filepath", visible=False)
            name_output = gr.Textbox(label="Your Name:")
            email_prompt_output = gr.Textbox(label="Next Step:", interactive=False)

            mic_button.click(capture_name, inputs=audio_input_name, outputs=[name_output, email_prompt_output])

            gr.Markdown("#### 🎀 Step 2: Provide your email")
            mic_button_email = gr.Button("πŸŽ™οΈ Tap to Speak Your Email")
            audio_input_email = gr.Audio(type="filepath", visible=False)
            email_output = gr.Textbox(label="Your Email:")

            mic_button_email.click(capture_email, inputs=audio_input_email, outputs=email_output)

    return demo

# Launch the Gradio Interface
demo = gradio_interface()
demo.launch(debug=True)