import gradio as gr from transformers import pipeline import torch import librosa # Load ASR pipeline device = "cuda" if torch.cuda.is_available() else "cpu" asr_pipeline = pipeline("automatic-speech-recognition", model="monadical-labs/whisper-medium.en", device=device) def transcribe(audio): if audio is None: return "Error: No audio file received." # Load the audio file correctly audio_data, sr = librosa.load(audio, sr=16000) # Resample to 16kHz (Whisper requirement) # Process the audio text = asr_pipeline(audio_data)["text"] return text # Create Gradio interface demo = gr.Interface(fn=transcribe, inputs=gr.Audio(type="filepath"), outputs="text") demo.launch()