Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import torch
|
3 |
+
from faster_whisper import WhisperModel
|
4 |
+
import tempfile
|
5 |
+
import logging
|
6 |
+
|
7 |
+
# Set up logging
|
8 |
+
logging.basicConfig(level=logging.INFO)
|
9 |
+
logger = logging.getLogger(__name__)
|
10 |
+
|
11 |
+
# Mapping of model names to Whisper model sizes
|
12 |
+
MODELS = {
|
13 |
+
"Faster Whisper Medium": "Systran/faster-whisper-medium", # Use the medium model
|
14 |
+
}
|
15 |
+
|
16 |
+
def transcribe_live_audio(audio, model_size="Faster Whisper Medium"):
|
17 |
+
"""Transcribe live audio from the microphone."""
|
18 |
+
try:
|
19 |
+
# Save the live audio to a temporary file
|
20 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
|
21 |
+
temp_audio_path = temp_audio.name
|
22 |
+
audio.export(temp_audio_path, format="wav")
|
23 |
+
|
24 |
+
# Load the appropriate model
|
25 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
26 |
+
compute_type = "float32" if device == "cuda" else "int8"
|
27 |
+
model = WhisperModel(MODELS[model_size], device=device, compute_type=compute_type)
|
28 |
+
|
29 |
+
# Transcribe the live audio
|
30 |
+
segments, info = model.transcribe(
|
31 |
+
temp_audio_path,
|
32 |
+
task="transcribe",
|
33 |
+
word_timestamps=True,
|
34 |
+
repetition_penalty=1.1,
|
35 |
+
temperature=[0.0, 0.1, 0.2, 0.3, 0.4, 0.6, 0.8, 1.0],
|
36 |
+
)
|
37 |
+
transcription = " ".join([segment.text for segment in segments])
|
38 |
+
|
39 |
+
# Clean up temporary audio file
|
40 |
+
os.remove(temp_audio_path)
|
41 |
+
|
42 |
+
return transcription
|
43 |
+
except Exception as e:
|
44 |
+
logger.error(f"Error in transcribe_live_audio: {str(e)}")
|
45 |
+
return f"Error: {str(e)}"
|
46 |
+
|
47 |
+
# Define the Gradio interface
|
48 |
+
with gr.Blocks() as demo:
|
49 |
+
gr.Markdown("# Live Audio Transcription")
|
50 |
+
|
51 |
+
gr.Markdown("Transcribe live audio from your microphone using the **Systran/faster-whisper-medium** model.")
|
52 |
+
|
53 |
+
# Live audio input
|
54 |
+
live_audio_input = gr.Audio(source="microphone", type="filepath", label="Speak into the microphone")
|
55 |
+
|
56 |
+
# Transcription output
|
57 |
+
live_transcription_output = gr.Textbox(label="Live Transcription")
|
58 |
+
|
59 |
+
# Button to start transcription
|
60 |
+
live_transcribe_button = gr.Button("Start Transcription")
|
61 |
+
|
62 |
+
# Link button to function
|
63 |
+
live_transcribe_button.click(
|
64 |
+
transcribe_live_audio,
|
65 |
+
inputs=[live_audio_input],
|
66 |
+
outputs=live_transcription_output
|
67 |
+
)
|
68 |
+
|
69 |
+
# Launch the Gradio interface
|
70 |
+
demo.launch()
|