Athspi commited on
Commit
e0fdc1e
·
verified ·
1 Parent(s): c90c90d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from faster_whisper import WhisperModel
4
+ import tempfile
5
+ import logging
6
+
7
+ # Set up logging
8
+ logging.basicConfig(level=logging.INFO)
9
+ logger = logging.getLogger(__name__)
10
+
11
+ # Mapping of model names to Whisper model sizes
12
+ MODELS = {
13
+ "Faster Whisper Medium": "Systran/faster-whisper-medium", # Use the medium model
14
+ }
15
+
16
+ def transcribe_live_audio(audio, model_size="Faster Whisper Medium"):
17
+ """Transcribe live audio from the microphone."""
18
+ try:
19
+ # Save the live audio to a temporary file
20
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
21
+ temp_audio_path = temp_audio.name
22
+ audio.export(temp_audio_path, format="wav")
23
+
24
+ # Load the appropriate model
25
+ device = "cuda" if torch.cuda.is_available() else "cpu"
26
+ compute_type = "float32" if device == "cuda" else "int8"
27
+ model = WhisperModel(MODELS[model_size], device=device, compute_type=compute_type)
28
+
29
+ # Transcribe the live audio
30
+ segments, info = model.transcribe(
31
+ temp_audio_path,
32
+ task="transcribe",
33
+ word_timestamps=True,
34
+ repetition_penalty=1.1,
35
+ temperature=[0.0, 0.1, 0.2, 0.3, 0.4, 0.6, 0.8, 1.0],
36
+ )
37
+ transcription = " ".join([segment.text for segment in segments])
38
+
39
+ # Clean up temporary audio file
40
+ os.remove(temp_audio_path)
41
+
42
+ return transcription
43
+ except Exception as e:
44
+ logger.error(f"Error in transcribe_live_audio: {str(e)}")
45
+ return f"Error: {str(e)}"
46
+
47
+ # Define the Gradio interface
48
+ with gr.Blocks() as demo:
49
+ gr.Markdown("# Live Audio Transcription")
50
+
51
+ gr.Markdown("Transcribe live audio from your microphone using the **Systran/faster-whisper-medium** model.")
52
+
53
+ # Live audio input
54
+ live_audio_input = gr.Audio(source="microphone", type="filepath", label="Speak into the microphone")
55
+
56
+ # Transcription output
57
+ live_transcription_output = gr.Textbox(label="Live Transcription")
58
+
59
+ # Button to start transcription
60
+ live_transcribe_button = gr.Button("Start Transcription")
61
+
62
+ # Link button to function
63
+ live_transcribe_button.click(
64
+ transcribe_live_audio,
65
+ inputs=[live_audio_input],
66
+ outputs=live_transcription_output
67
+ )
68
+
69
+ # Launch the Gradio interface
70
+ demo.launch()