import gradio as gr from transformers import pipeline import torch import os import tempfile import soundfile as sf # Load the Whisper model once during startup device = 0 if torch.cuda.is_available() else -1 asr_pipeline = pipeline(model="openai/whisper-small", device=device) # Function to handle the transcription process def transcribe_audio(audio_file): # Create a temporary file to save the uploaded audio with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file: temp_audio_file.write(audio_file.read()) temp_file_path = temp_audio_file.name # Perform the transcription transcription = asr_pipeline(temp_file_path) # Remove the temporary file os.remove(temp_file_path) # Return the transcription result return transcription['text'] # Create Gradio interface interface = gr.Interface( fn=transcribe_audio, # The function to call when audio is uploaded inputs=gr.Audio(source="upload", type="file"), # Input type: audio file outputs="text", # Output type: text (transcription) title="Whisper Audio Transcription", # Title of the Gradio interface description="Upload an audio file to get a transcription using OpenAI's Whisper model" ) # Launch the Gradio interface interface.launch()