Spaces:
Running
Running
import gradio as gr | |
import torch | |
from faster_whisper import WhisperModel | |
import tempfile | |
import logging | |
import os | |
# Set up logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# Mapping of model names to Whisper model sizes | |
MODELS = { | |
"Faster Whisper Medium": "Systran/faster-whisper-medium", # Use the medium model | |
} | |
def transcribe_live_audio(audio): | |
"""Transcribe live audio from the microphone.""" | |
try: | |
# Save the live audio to a temporary file | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio: | |
temp_audio_path = temp_audio.name | |
audio.save(temp_audio_path) # Save the audio file | |
# Load the appropriate model | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
compute_type = "float32" if device == "cuda" else "int8" | |
model = WhisperModel(MODELS["Faster Whisper Medium"], device=device, compute_type=compute_type) | |
# Transcribe the live audio | |
segments, info = model.transcribe( | |
temp_audio_path, | |
task="transcribe", | |
word_timestamps=True, | |
repetition_penalty=1.1, | |
temperature=[0.0, 0.1, 0.2, 0.3, 0.4, 0.6, 0.8, 1.0], | |
) | |
transcription = " ".join([segment.text for segment in segments]) | |
# Clean up temporary audio file | |
os.remove(temp_audio_path) | |
return transcription | |
except Exception as e: | |
logger.error(f"Error in transcribe_live_audio: {str(e)}") | |
return f"Error: {str(e)}" | |
# Define the Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# Live Audio Transcription") | |
gr.Markdown("Transcribe live audio from your microphone using the **Systran/faster-whisper-medium** model.") | |
# Live audio input | |
live_audio_input = gr.Audio(type="filepath", label="Speak into the microphone") | |
# Transcription output | |
live_transcription_output = gr.Textbox(label="Live Transcription") | |
# Button to start transcription | |
live_transcribe_button = gr.Button("Start Transcription") | |
# Link button to function | |
live_transcribe_button.click( | |
transcribe_live_audio, | |
inputs=[live_audio_input], | |
outputs=live_transcription_output | |
) | |
# Launch the Gradio interface | |
demo.launch() |