import gradio as gr from gtts import gTTS import os import tempfile import json import speech_recognition as sr # Store cart in a temporary storage cart = [] # Define the menu items dynamically menu_items = { "Pizza": 10.99, "Burger": 8.49, "Pasta": 12.99, "Salad": 7.99, "Soda": 2.49 } def generate_voice_response(text): tts = gTTS(text) temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") temp_file.close() tts.save(temp_file.name) return temp_file.name def calculate_total(cart): return sum(menu_items[item] for item in cart) def restaurant_voice_assistant(audio, state_json): global cart state = json.loads(state_json) if state_json else {} response = "" voice_path = None # Convert audio input to text input_text = "" if audio: recognizer = sr.Recognizer() with sr.AudioFile(audio) as source: try: input_text = recognizer.recognize_google(recognizer.record(source)).lower() except sr.UnknownValueError: input_text = "" if not state.get("menu_shown", False): # Show menu dynamically response = "Welcome to our restaurant! Here is our menu:\n" for item, price in menu_items.items(): response += f"{item}: ${price:.2f}\n" response += "\nPlease tell me the item you would like to add to your cart." state["menu_shown"] = True elif not input_text.strip(): # Wait for valid input without responding if no input is provided return "", None, json.dumps(state) else: # Match the input text with menu items matched_items = [item for item in menu_items if item.lower() in input_text and item not in state.get("current_items", [])] if len(matched_items) == 1: item = matched_items[0] cart.append(item) state.setdefault("current_items", []).append(item) # Track items added in the current cycle total = calculate_total(cart) response = f"{item} has been added to your cart. Your current cart includes:\n" for cart_item in cart: response += f"- {cart_item}: ${menu_items[cart_item]:.2f}\n" response += f"\nTotal: ${total:.2f}. Would you like to add anything else?" elif len(matched_items) > 1: response = f"I detected multiple items in your input: {', '.join(matched_items)}. Please mention one item at a time." elif "menu" in input_text: response = "Here is our menu again:\n" for item, price in menu_items.items(): response += f"{item}: ${price:.2f}\n" response += "\nWhat would you like to add to your cart?" elif "final order" in input_text or "submit order" in input_text: if cart: total = calculate_total(cart) response = "Your final order includes:\n" for item in cart: response += f"- {item}: ${menu_items[item]:.2f}\n" response += f"\nTotal: ${total:.2f}.\nThank you for ordering!" cart = [] # Clear cart after finalizing order state["current_items"] = [] # Clear current cycle tracking else: response = "Your cart is empty. Would you like to order something?" else: response = "I didn’t quite catch that. Please tell me what you’d like to order." voice_path = generate_voice_response(response) return response, voice_path, json.dumps(state) with gr.Blocks() as demo: state = gr.State(value=json.dumps({})) with gr.Row(): user_audio = gr.Audio(type="filepath", label="Your Voice Input") output_text = gr.Textbox(label="Response Text") with gr.Row(): voice_output = gr.Audio(label="Response Audio", autoplay=True) # Automatically process audio when recording stops user_audio.change(restaurant_voice_assistant, inputs=[user_audio, state], outputs=[output_text, voice_output, state]) demo.launch()