nagasurendra commited on
Commit
cf344c7
·
verified ·
1 Parent(s): 49e8f5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -112
app.py CHANGED
@@ -1,128 +1,97 @@
1
  import gradio as gr
2
- import edge_tts
3
- import asyncio
4
- import tempfile
5
- import numpy as np
6
- from pydub import AudioSegment
7
- import torch
8
- import sentencepiece as spm
9
- import onnxruntime as ort
10
- from huggingface_hub import hf_hub_download
11
-
12
- # Dynamic Menu Items
13
- MENU = {
14
- "Pizza": 10.99,
15
- "Burger": 6.99,
16
- "Pasta": 8.49,
17
- "Salad": 5.49,
18
- "Soda": 1.99,
19
- "Coffee": 2.99
20
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- cart = [] # To store cart items
23
-
24
- # Speech Recognition Model Configuration
25
- model_name = "neongeckocom/stt_en_citrinet_512_gamma_0_25"
26
- sample_rate = 16000
27
-
28
- # Download preprocessor, encoder, and tokenizer
29
- preprocessor = torch.jit.load(hf_hub_download(model_name, "preprocessor.ts", subfolder="onnx"))
30
- encoder = ort.InferenceSession(hf_hub_download(model_name, "model.onnx", subfolder="onnx"))
31
- tokenizer = spm.SentencePieceProcessor(hf_hub_download(model_name, "tokenizer.spm", subfolder="onnx"))
32
-
33
- async def text_to_speech(text):
34
- communicate = edge_tts.Communicate(text)
35
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
36
- tmp_path = tmp_file.name
37
- await communicate.save(tmp_path)
38
- return tmp_path
39
-
40
- def resample(audio_fp32, sr):
41
- return soxr.resample(audio_fp32, sr, sample_rate)
42
-
43
- def to_float32(audio_buffer):
44
- return np.divide(audio_buffer, np.iinfo(audio_buffer.dtype).max, dtype=np.float32)
45
-
46
- def transcribe(audio_path):
47
- audio_file = AudioSegment.from_file(audio_path)
48
- sr = audio_file.frame_rate
49
- audio_buffer = np.array(audio_file.get_array_of_samples())
50
-
51
- audio_fp32 = to_float32(audio_buffer)
52
- audio_16k = resample(audio_fp32, sr)
53
-
54
- input_signal = torch.tensor(audio_16k).unsqueeze(0)
55
- length = torch.tensor(len(audio_16k)).unsqueeze(0)
56
- processed_signal, _ = preprocessor.forward(input_signal=input_signal, length=length)
57
-
58
- logits = encoder.run(None, {'audio_signal': processed_signal.numpy(), 'length': length.numpy()})[0][0]
59
-
60
- blank_id = tokenizer.vocab_size()
61
- decoded_prediction = [p for p in logits.argmax(axis=1).tolist() if p != blank_id]
62
- text = tokenizer.decode_ids(decoded_prediction)
63
-
64
- return text
65
-
66
- def generate_menu():
67
- menu_text = "Here is our menu:\n"
68
- for item, price in MENU.items():
69
- menu_text += f"{item}: ${price:.2f}\n"
70
- menu_text += "What would you like to order?"
71
- return menu_text
72
-
73
- def handle_cart(command):
74
- global cart
75
- response = ""
76
-
77
- # Check for menu-related commands
78
- if "menu" in command.lower():
79
- response = generate_menu()
80
 
81
- # Check for add-to-cart commands
82
- else:
83
- for item in MENU.keys():
84
- if item.lower() in command.lower():
85
  cart.append(item)
86
- response = f"{item} has been added to your cart."
87
- break
88
 
89
- # If user asks for cart
90
- if "cart" in command.lower():
91
- if cart:
92
- response = "Your cart contains:\n" + ", ".join(cart)
93
  else:
94
- response = "Your cart is empty."
 
95
 
96
- # If user confirms order
97
- if "submit" in command.lower() or "done" in command.lower():
98
- if cart:
99
- response = "Your final order is:\n" + ", ".join(cart) + ". Thank you for your order!"
100
- cart = [] # Clear the cart
101
  else:
102
- response = "Your cart is empty. Add some items before submitting."
 
 
103
 
104
- return response
 
105
 
106
- async def respond(audio):
107
- try:
108
- user_command = transcribe(audio)
109
- reply = handle_cart(user_command)
110
- reply_audio_path = await text_to_speech(reply)
111
- return user_command, reply, reply_audio_path
112
- except Exception as e:
113
- return "Error: Could not transcribe audio.", "Error: Could not process your request.", None
114
 
115
- with gr.Blocks() as demo:
116
- with gr.Row():
117
- audio_input = gr.Audio(label="Speak Here", type="filepath")
118
- submit = gr.Button("Submit")
 
119
 
120
- with gr.Row():
121
- transcribed_text = gr.Textbox(label="Transcribed Text")
122
- response_text = gr.Textbox(label="GPT Response")
123
- response_audio = gr.Audio(label="Response Audio")
124
 
125
- submit.click(fn=respond, inputs=[audio_input], outputs=[transcribed_text, response_text, response_audio])
126
 
127
  if __name__ == "__main__":
128
- demo.queue().launch()
 
1
  import gradio as gr
2
+ from gtts import gTTS
3
+ import os
4
+ import speech_recognition as sr
5
+
6
+ # Initialize recognizer
7
+ recognizer = sr.Recognizer()
8
+
9
+ # Menu items
10
+ menu_items = {
11
+ "biryani": ["Chicken Biryani", "Mutton Biryani", "Vegetable Biryani", "Egg Biryani"],
12
+ "starters": ["Chicken Tikka", "Paneer Tikka", "Fish Fry", "Veg Manchurian"],
13
+ "drinks": ["Coke", "Pepsi", "Lemonade", "Mango Juice", "Water"]
 
 
 
 
 
 
14
  }
15
+ cart = []
16
+
17
+ # Text-to-Speech Function
18
+ def text_to_speech(text):
19
+ """Convert text to speech and provide audio file."""
20
+ tts = gTTS(text=text, lang='en')
21
+ file_path = "response.mp3"
22
+ tts.save(file_path)
23
+ return file_path
24
+
25
+ # Read Menu Function
26
+ def read_menu():
27
+ """Generate the menu text and read it aloud."""
28
+ menu_text = "Here is the menu. Starting with Biryani options: "
29
+ for item in menu_items["biryani"]:
30
+ menu_text += item + ". "
31
+ menu_text += "Now the Starters: "
32
+ for item in menu_items["starters"]:
33
+ menu_text += item + ". "
34
+ menu_text += "Finally, Drinks: "
35
+ for item in menu_items["drinks"]:
36
+ menu_text += item + ". "
37
+ return menu_text, text_to_speech(menu_text)
38
+
39
+ # Process Voice Command
40
+ def process_command(audio_path):
41
+ """Process the user's voice command."""
42
+ try:
43
+ with sr.AudioFile(audio_path) as source:
44
+ audio_data = recognizer.record(source)
45
+ command = recognizer.recognize_google(audio_data).lower()
46
+ except Exception as e:
47
+ error_text = "Sorry, I could not process the audio."
48
+ return "Error", text_to_speech(error_text)
49
 
50
+ if "menu" in command:
51
+ menu_text, menu_audio = read_menu()
52
+ return menu_text, menu_audio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ for category, items in menu_items.items():
55
+ for item in items:
56
+ if item.lower() in command:
 
57
  cart.append(item)
58
+ response_text = f"{item} has been added to your cart."
59
+ return response_text, text_to_speech(response_text)
60
 
61
+ if "cart" in command:
62
+ if not cart:
63
+ response_text = "Your cart is empty."
 
64
  else:
65
+ response_text = "Your cart contains: " + ", ".join(cart)
66
+ return response_text, text_to_speech(response_text)
67
 
68
+ if "submit" in command or "done" in command:
69
+ if not cart:
70
+ response_text = "Your cart is empty. Add some items before submitting."
 
 
71
  else:
72
+ response_text = "Your final order is: " + ", ".join(cart) + ". Thank you for your order!"
73
+ cart.clear()
74
+ return response_text, text_to_speech(response_text)
75
 
76
+ error_text = "Sorry, I couldn't understand your request."
77
+ return error_text, text_to_speech(error_text)
78
 
79
+ # Gradio App
80
+ def app():
81
+ """Create the Gradio interface."""
82
+ with gr.Blocks() as demo:
83
+ gr.Markdown("# Voice-Activated Restaurant Menu System")
84
+ gr.Markdown("Speak your command to interact with the menu system dynamically.")
 
 
85
 
86
+ with gr.Row():
87
+ voice_input = gr.Audio(type="filepath", label="Speak Your Command")
88
+ transcribed_text = gr.Textbox(label="Transcribed Command")
89
+ response_text = gr.Textbox(label="Response Text")
90
+ audio_output = gr.Audio(label="Audio Response")
91
 
92
+ voice_input.change(fn=process_command, inputs=voice_input, outputs=[response_text, audio_output])
 
 
 
93
 
94
+ return demo
95
 
96
  if __name__ == "__main__":
97
+ app().launch()