import os import whisper from groq import Groq from gtts import gTTS import gradio as gr # Initialize models and clients model = whisper.load_model("base") # Whisper model for speech-to-text GROG_API_KEY ="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" client = Groq(api_key = GROG_API_KEY) # Groq LLM client # Function to convert speech to text def speech_to_text(audio_file): result = model.transcribe(audio_file) return result['text'] # Function to generate response using Groq LLM def generate_response(prompt): chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama3-8b-8192", ) return chat_completion.choices[0].message.content # Function to convert text to speech using gTTS def text_to_speech(text, output_file="response.mp3"): tts = gTTS(text) tts.save(output_file) return output_file # Return the output file path for Gradio to play the audio # Gradio function to handle the full flow def process_audio(audio_file): # Convert speech to text user_input = speech_to_text(audio_file) # Generate response from Groq LLM response = generate_response(user_input) # Convert response text to speech and return the audio file output_audio_file = text_to_speech(response) return response, output_audio_file # Gradio Interface interface = gr.Interface( fn=process_audio, # Function to process the input inputs=gr.Audio(type="filepath"), # Allows users to record or upload audio outputs=[gr.Textbox(label="Generated Response"), gr.Audio(label="Response Audio")], # Outputs text and audio live=True ) # Launch the Gradio interface if __name__ == "__main__": interface.launch()