import gradio as gr from google.generativeai import GenerativeModel, configure from gtts import gTTS import speech_recognition as sr import os import tempfile # ✅ Load API key from environment variable GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY") if not GOOGLE_API_KEY: raise ValueError("❌ Missing API Key! Please set GEMINI_API_KEY as an environment variable.") # ✅ Configure Gemini securely configure(api_key=GOOGLE_API_KEY) gemini_model = GenerativeModel("models/gemini-1.5-flash") def transcribe_audio(audio_path): recognizer = sr.Recognizer() with sr.AudioFile(audio_path) as source: audio = recognizer.record(source) try: return recognizer.recognize_google(audio, language='en-US') except sr.UnknownValueError: return "❌ Could not understand the audio." except sr.RequestError: return "❌ Could not connect to Google Speech API." def get_gemini_response(query): try: # Request Gemini model to answer in English response = gemini_model.generate_content(f"Answer in English: {query}") return response.text.replace('*', '') except Exception as e: return f"❌ Error from Gemini: {str(e)}" def text_to_speech(text, lang='en'): tts = gTTS(text=text, lang=lang) temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") tts.save(temp_file.name) return temp_file.name # --------------------------- # Combined function to handle voice query # --------------------------- def handle_voice_query(audio_file): query = transcribe_audio(audio_file) response = get_gemini_response(query) audio_path = text_to_speech(response) return query, response, audio_path with gr.Blocks() as demo: gr.Markdown("# 🗣️ **Ask by Voice**") gr.Markdown("### Speak your question aloud (in English)") audio_input = gr.Audio(type="filepath", label="🎤 Speak your question") query_text = gr.Textbox(label="🔍 Spoken Question") gemini_response = gr.Textbox(label="📜 Gemini Response") audio_output = gr.Audio(label="🔊 Voice Response") submit_btn = gr.Button("➡️ Get Answer") submit_btn.click(fn=handle_voice_query, inputs=[audio_input], outputs=[query_text, gemini_response, audio_output]) demo.launch()