# app.py import os import gradio as gr from groq import Groq from gtts import gTTS import tempfile import whisper # Initialize Groq client GROQ_API_KEY = "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" client = Groq(api_key = GROQ_API_KEY) # Load the Whisper model whisper_model = whisper.load_model("base") # You can use "small", "medium", or "large" depending on your preference # Function to convert audio to text using OpenAI Whisper def audio_to_text(audio_file): audio = whisper.load_audio(audio_file) audio = whisper.pad_or_trim(audio) mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device) options = whisper.DecodingOptions(fp16=False) result = whisper.decode(whisper_model, mel, options) return result.text # Function to interact with Groq API and generate a response def interact_with_groq(user_input): try: chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": user_input}], model="llama3-8b-8192", # Use the appropriate model stream=False, ) return chat_completion.choices[0].message.content except Exception as e: return f"Error interacting with Groq API: {e}" # Function to convert text to speech using gTTS def text_to_audio(response_text): tts = gTTS(response_text) output_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name tts.save(output_path) return output_path # Main function for the chatbot def voice_to_voice(audio_file): try: # Step 1: Convert voice input to text print("Transcribing audio...") transcribed_text = audio_to_text(audio_file) print(f"Transcribed Text: {transcribed_text}") # Step 2: Interact with LLM via Groq API print("Getting LLM response...") response_text = interact_with_groq(transcribed_text) print(f"LLM Response: {response_text}") # Step 3: Convert LLM response to audio print("Generating audio response...") audio_response = text_to_audio(response_text) return transcribed_text, audio_response except Exception as e: return f"Error processing request: {e}", None # Gradio Interface interface = gr.Interface( fn=voice_to_voice, inputs=gr.Audio(type="filepath"), outputs=[gr.Textbox(label="Transcribed Text"), gr.Audio(label="Response Audio")], title="Real-Time Voice-to-Voice Chatbot", description="A real-time voice-to-voice chatbot using Whisper for transcription, Groq API for LLM, and gTTS for audio response.", ) # Launch the interface if __name__ == "__main__": interface.launch()