import gradio as gr
from transformers import pipeline
import torch
import os
import tempfile
import soundfile as sf

# Load the Whisper model once during startup
device = 0 if torch.cuda.is_available() else -1
asr_pipeline = pipeline(model="openai/whisper-small", device=device)

# Function to handle the transcription process
def transcribe_audio(audio_file):
    # Create a temporary file to save the uploaded audio
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
        temp_audio_file.write(audio_file.read())
        temp_file_path = temp_audio_file.name

    # Perform the transcription
    transcription = asr_pipeline(temp_file_path)
    
    # Remove the temporary file
    os.remove(temp_file_path)

    # Return the transcription result
    return transcription['text']

# Create Gradio interface
interface = gr.Interface(
    fn=transcribe_audio,  # The function to call when audio is uploaded
    inputs=gr.Audio(source="upload", type="file"),  # Input type: audio file
    outputs="text",  # Output type: text (transcription)
    title="Whisper Audio Transcription",  # Title of the Gradio interface
    description="Upload an audio file to get a transcription using OpenAI's Whisper model"
)

# Launch the Gradio interface
interface.launch()