navjotk commited on
Commit
b06881a
·
verified ·
1 Parent(s): 65b673e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -23
app.py CHANGED
@@ -1,14 +1,9 @@
1
  import gradio as gr
2
-
3
  from google.generativeai import GenerativeModel, configure
4
  from gtts import gTTS
5
  import speech_recognition as sr
6
  import os
7
  import tempfile
8
- import torch
9
- from torchvision import models, transforms
10
- from PIL import Image
11
- import json
12
 
13
  # ✅ Load API key from environment variable
14
  GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")
@@ -24,27 +19,28 @@ def transcribe_audio(audio_path):
24
  with sr.AudioFile(audio_path) as source:
25
  audio = recognizer.record(source)
26
  try:
27
- return recognizer.recognize_google(audio, language='pa-IN')
28
  except sr.UnknownValueError:
29
- return "❌ ਆਵਾਜ਼ ਨੂੰ ਸਮਝਿਆ ਨਹੀਂ ਜਾ ਸਕਿਆ।"
30
  except sr.RequestError:
31
- return "❌ ਗੂਗਲ ਸਪੀਚ ਐਪੀਆਈ ਨਾਲ ਕਨੇਕਟ ਨਹੀਂ ਹੋ ਸਕਿਆ।"
32
-
33
  def get_gemini_response(query):
34
  try:
35
- response = gemini_model.generate_content(f"ਪੰਜਾਬੀ ਵਿੱਚ ਜਵਾਬ ਦਿਓ: {query}")
 
36
  return response.text.replace('*', '')
37
  except Exception as e:
38
- return f"❌ Gemini ਤਰਫੋਂ ਗਲਤੀ: {str(e)}"
39
 
40
- def text_to_speech(text, lang='pa'):
41
  tts = gTTS(text=text, lang=lang)
42
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
43
  tts.save(temp_file.name)
44
  return temp_file.name
45
 
46
  # ---------------------------
47
- # Combined Function
48
  # ---------------------------
49
  def handle_voice_query(audio_file):
50
  query = transcribe_audio(audio_file)
@@ -53,15 +49,15 @@ def handle_voice_query(audio_file):
53
  return query, response, audio_path
54
 
55
  with gr.Blocks() as demo:
56
- gr.Markdown("# 🗣️ **ਆਵਾਜ਼ ਰਾਹੀਂ ਪੁੱਛੋ**")
57
- gr.Markdown("### ਆਪਣਾ ਸਵਾਲ ਆਵਾਜ਼ ਰਾਹੀਂ ਪੁੱਛੋ (ਪੰਜਾਬੀ ਵਿੱਚ)")
58
- audio_input = gr.Audio(type="filepath", label="🎤 ਸਵਾਲ ਬੋਲੋ")
59
- query_text = gr.Textbox(label="🔍 ਬੋਲਿਆ ਗਿਆ ਸਵਾਲ")
60
- gemini_response = gr.Textbox(label="📜 Gemini ਜਵਾਬ")
61
- audio_output = gr.Audio(label="🔊 ਆਵਾਜ਼ੀ ਜਵਾਬ")
62
- submit_btn = gr.Button("➡️ ਜਵਾਬ ਲਵੋ")
63
  submit_btn.click(fn=handle_voice_query,
64
- inputs=[audio_input],
65
- outputs=[query_text, gemini_response, audio_output])
66
 
67
- demo.launch()
 
1
  import gradio as gr
 
2
  from google.generativeai import GenerativeModel, configure
3
  from gtts import gTTS
4
  import speech_recognition as sr
5
  import os
6
  import tempfile
 
 
 
 
7
 
8
  # ✅ Load API key from environment variable
9
  GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")
 
19
  with sr.AudioFile(audio_path) as source:
20
  audio = recognizer.record(source)
21
  try:
22
+ return recognizer.recognize_google(audio, language='en-US')
23
  except sr.UnknownValueError:
24
+ return "❌ Could not understand the audio."
25
  except sr.RequestError:
26
+ return "❌ Could not connect to Google Speech API."
27
+
28
  def get_gemini_response(query):
29
  try:
30
+ # Request Gemini model to answer in English
31
+ response = gemini_model.generate_content(f"Answer in English: {query}")
32
  return response.text.replace('*', '')
33
  except Exception as e:
34
+ return f"❌ Error from Gemini: {str(e)}"
35
 
36
+ def text_to_speech(text, lang='en'):
37
  tts = gTTS(text=text, lang=lang)
38
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
39
  tts.save(temp_file.name)
40
  return temp_file.name
41
 
42
  # ---------------------------
43
+ # Combined function to handle voice query
44
  # ---------------------------
45
  def handle_voice_query(audio_file):
46
  query = transcribe_audio(audio_file)
 
49
  return query, response, audio_path
50
 
51
  with gr.Blocks() as demo:
52
+ gr.Markdown("# 🗣️ **Ask by Voice**")
53
+ gr.Markdown("### Speak your question aloud (in English)")
54
+ audio_input = gr.Audio(type="filepath", label="🎤 Speak your question")
55
+ query_text = gr.Textbox(label="🔍 Spoken Question")
56
+ gemini_response = gr.Textbox(label="📜 Gemini Response")
57
+ audio_output = gr.Audio(label="🔊 Voice Response")
58
+ submit_btn = gr.Button("➡️ Get Answer")
59
  submit_btn.click(fn=handle_voice_query,
60
+ inputs=[audio_input],
61
+ outputs=[query_text, gemini_response, audio_output])
62
 
63
+ demo.launch()