import gradio as gr import torch from faster_whisper import WhisperModel import tempfile import logging import os # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Mapping of model names to Whisper model sizes MODELS = { "Faster Whisper Medium": "Systran/faster-whisper-medium", # Use the medium model } def transcribe_live_audio(audio): """Transcribe live audio from the microphone.""" try: # Save the live audio to a temporary file with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio: temp_audio_path = temp_audio.name audio.save(temp_audio_path) # Save the audio file # Load the appropriate model device = "cuda" if torch.cuda.is_available() else "cpu" compute_type = "float32" if device == "cuda" else "int8" model = WhisperModel(MODELS["Faster Whisper Medium"], device=device, compute_type=compute_type) # Transcribe the live audio segments, info = model.transcribe( temp_audio_path, task="transcribe", word_timestamps=True, repetition_penalty=1.1, temperature=[0.0, 0.1, 0.2, 0.3, 0.4, 0.6, 0.8, 1.0], ) transcription = " ".join([segment.text for segment in segments]) # Clean up temporary audio file os.remove(temp_audio_path) return transcription except Exception as e: logger.error(f"Error in transcribe_live_audio: {str(e)}") return f"Error: {str(e)}" # Define the Gradio interface with gr.Blocks() as demo: gr.Markdown("# Live Audio Transcription") gr.Markdown("Transcribe live audio from your microphone using the **Systran/faster-whisper-medium** model.") # Live audio input live_audio_input = gr.Audio(type="filepath", label="Speak into the microphone") # Transcription output live_transcription_output = gr.Textbox(label="Live Transcription") # Button to start transcription live_transcribe_button = gr.Button("Start Transcription") # Link button to function live_transcribe_button.click( transcribe_live_audio, inputs=[live_audio_input], outputs=live_transcription_output ) # Launch the Gradio interface demo.launch()