Spaces:

Athspi
/

Ai-live-transcript-free

Running

App Files Files Community

Athspi commited on 26 days ago

Commit

e0fdc1e

verified ·

1 Parent(s): c90c90d

Create app.py

Browse files

Files changed (1) hide show

app.py +70 -0

app.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import gradio as gr
+import torch
+from faster_whisper import WhisperModel
+import tempfile
+import logging
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Mapping of model names to Whisper model sizes
+MODELS = {
+    "Faster Whisper Medium": "Systran/faster-whisper-medium",  # Use the medium model
+}
+def transcribe_live_audio(audio, model_size="Faster Whisper Medium"):
+    """Transcribe live audio from the microphone."""
+    try:
+        # Save the live audio to a temporary file
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
+            temp_audio_path = temp_audio.name
+            audio.export(temp_audio_path, format="wav")
+        # Load the appropriate model
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        compute_type = "float32" if device == "cuda" else "int8"
+        model = WhisperModel(MODELS[model_size], device=device, compute_type=compute_type)
+        # Transcribe the live audio
+        segments, info = model.transcribe(
+            temp_audio_path,
+            task="transcribe",
+            word_timestamps=True,
+            repetition_penalty=1.1,
+            temperature=[0.0, 0.1, 0.2, 0.3, 0.4, 0.6, 0.8, 1.0],
+        )
+        transcription = " ".join([segment.text for segment in segments])
+        # Clean up temporary audio file
+        os.remove(temp_audio_path)
+        return transcription
+    except Exception as e:
+        logger.error(f"Error in transcribe_live_audio: {str(e)}")
+        return f"Error: {str(e)}"
+# Define the Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Live Audio Transcription")
+    gr.Markdown("Transcribe live audio from your microphone using the **Systran/faster-whisper-medium** model.")
+    # Live audio input
+    live_audio_input = gr.Audio(source="microphone", type="filepath", label="Speak into the microphone")
+    # Transcription output
+    live_transcription_output = gr.Textbox(label="Live Transcription")
+    # Button to start transcription
+    live_transcribe_button = gr.Button("Start Transcription")
+    # Link button to function
+    live_transcribe_button.click(
+        transcribe_live_audio,
+        inputs=[live_audio_input],
+        outputs=live_transcription_output
+    )
+# Launch the Gradio interface
+demo.launch()