import os
import whisper
from groq import Groq
from gtts import gTTS
import gradio as gr


# Initialize models and clients
model = whisper.load_model("base")  # Whisper model for speech-to-text

GROG_API_KEY ="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"
client = Groq(api_key = GROG_API_KEY)  # Groq LLM client

# Function to convert speech to text
def speech_to_text(audio_file):
    result = model.transcribe(audio_file)
    return result['text']

# Function to generate response using Groq LLM
def generate_response(prompt):
    chat_completion = client.chat.completions.create(
        messages=[{"role": "user", "content": prompt}],
        model="llama3-8b-8192",
    )
    return chat_completion.choices[0].message.content

# Function to convert text to speech using gTTS
def text_to_speech(text, output_file="response.mp3"):
    tts = gTTS(text)
    tts.save(output_file)
    return output_file  # Return the output file path for Gradio to play the audio

# Gradio function to handle the full flow
def process_audio(audio_file):
    # Convert speech to text
    user_input = speech_to_text(audio_file)

    # Generate response from Groq LLM
    response = generate_response(user_input)

    # Convert response text to speech and return the audio file
    output_audio_file = text_to_speech(response)

    return response, output_audio_file

# Gradio Interface
interface = gr.Interface(
    fn=process_audio,  # Function to process the input
    inputs=gr.Audio(type="filepath"),  # Allows users to record or upload audio
    outputs=[gr.Textbox(label="Generated Response"), gr.Audio(label="Response Audio")],  # Outputs text and audio
    live=True
)
# Launch the Gradio interface
if __name__ == "__main__":
    interface.launch()