Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import gradio as gr
|
2 |
from pydub import AudioSegment
|
3 |
-
import tempfile
|
4 |
import os
|
5 |
import speech_recognition as sr
|
6 |
import concurrent.futures
|
@@ -21,12 +20,12 @@ def transcribe_chunk_indexed(indexed_chunk_language):
|
|
21 |
index, chunk, language = indexed_chunk_language
|
22 |
recognizer = sr.Recognizer()
|
23 |
try:
|
24 |
-
with
|
25 |
-
chunk.export(
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
os.remove(
|
30 |
return index, text
|
31 |
except sr.RequestError:
|
32 |
return index, "[Error: API unavailable or unresponsive]"
|
@@ -48,29 +47,19 @@ def transcribe_audio_with_google_parallel(audio_path, chunk_length_ms=60000, ove
|
|
48 |
|
49 |
return " ".join(transcription)
|
50 |
|
51 |
-
def transcribe(
|
52 |
-
if
|
53 |
return "Please upload an audio file."
|
54 |
|
55 |
-
# Save uploaded file temporarily with correct suffix
|
56 |
-
import pathlib
|
57 |
-
suffix = pathlib.Path(uploaded_file.name).suffix
|
58 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
|
59 |
-
temp_file.write(uploaded_file.read())
|
60 |
-
temp_path = temp_file.name
|
61 |
-
|
62 |
try:
|
63 |
-
converted_path =
|
64 |
-
convert_audio_to_wav(
|
65 |
-
os.remove(temp_path) # remove original temp file
|
66 |
temp_path = converted_path
|
67 |
except Exception as e:
|
68 |
return f"Error processing audio: {e}"
|
69 |
|
70 |
-
# Run transcription
|
71 |
transcription = transcribe_audio_with_google_parallel(temp_path, chunk_length_ms=60000, overlap_ms=2000, language=language)
|
72 |
|
73 |
-
# Clean up converted file
|
74 |
try:
|
75 |
os.remove(temp_path)
|
76 |
except Exception:
|
@@ -78,7 +67,6 @@ def transcribe(uploaded_file, language):
|
|
78 |
|
79 |
return transcription
|
80 |
|
81 |
-
# Language options
|
82 |
language_options = {
|
83 |
"English (US)": "en-US",
|
84 |
"Dutch": "nl-NL",
|
@@ -97,15 +85,15 @@ with gr.Blocks() as demo:
|
|
97 |
gr.Markdown("Upload an audio file, and we'll transcribe it into text using chunk processing.")
|
98 |
|
99 |
with gr.Row():
|
100 |
-
audio_input = gr.Audio(
|
101 |
language_dropdown = gr.Dropdown(list(language_options.keys()), label="Select language", value="English (US)")
|
102 |
|
103 |
transcribe_btn = gr.Button("Transcribe")
|
104 |
output_text = gr.Textbox(label="Transcription Output", lines=15)
|
105 |
|
106 |
-
def on_transcribe(
|
107 |
lang_code = language_options[lang_name]
|
108 |
-
return transcribe(
|
109 |
|
110 |
transcribe_btn.click(on_transcribe, inputs=[audio_input, language_dropdown], outputs=output_text)
|
111 |
|
|
|
1 |
import gradio as gr
|
2 |
from pydub import AudioSegment
|
|
|
3 |
import os
|
4 |
import speech_recognition as sr
|
5 |
import concurrent.futures
|
|
|
20 |
index, chunk, language = indexed_chunk_language
|
21 |
recognizer = sr.Recognizer()
|
22 |
try:
|
23 |
+
with open(f"chunk_{index}.wav", "wb") as f:
|
24 |
+
chunk.export(f.name, format="wav")
|
25 |
+
with sr.AudioFile(f"chunk_{index}.wav") as source:
|
26 |
+
audio_data = recognizer.record(source)
|
27 |
+
text = recognizer.recognize_google(audio_data, language=language)
|
28 |
+
os.remove(f"chunk_{index}.wav")
|
29 |
return index, text
|
30 |
except sr.RequestError:
|
31 |
return index, "[Error: API unavailable or unresponsive]"
|
|
|
47 |
|
48 |
return " ".join(transcription)
|
49 |
|
50 |
+
def transcribe(audio_file_path, language):
|
51 |
+
if audio_file_path is None:
|
52 |
return "Please upload an audio file."
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
try:
|
55 |
+
converted_path = audio_file_path + "_converted.wav"
|
56 |
+
convert_audio_to_wav(audio_file_path, converted_path)
|
|
|
57 |
temp_path = converted_path
|
58 |
except Exception as e:
|
59 |
return f"Error processing audio: {e}"
|
60 |
|
|
|
61 |
transcription = transcribe_audio_with_google_parallel(temp_path, chunk_length_ms=60000, overlap_ms=2000, language=language)
|
62 |
|
|
|
63 |
try:
|
64 |
os.remove(temp_path)
|
65 |
except Exception:
|
|
|
67 |
|
68 |
return transcription
|
69 |
|
|
|
70 |
language_options = {
|
71 |
"English (US)": "en-US",
|
72 |
"Dutch": "nl-NL",
|
|
|
85 |
gr.Markdown("Upload an audio file, and we'll transcribe it into text using chunk processing.")
|
86 |
|
87 |
with gr.Row():
|
88 |
+
audio_input = gr.Audio(type="filepath", label="Upload audio file (mp3, wav, m4a, ogg)")
|
89 |
language_dropdown = gr.Dropdown(list(language_options.keys()), label="Select language", value="English (US)")
|
90 |
|
91 |
transcribe_btn = gr.Button("Transcribe")
|
92 |
output_text = gr.Textbox(label="Transcription Output", lines=15)
|
93 |
|
94 |
+
def on_transcribe(audio_path, lang_name):
|
95 |
lang_code = language_options[lang_name]
|
96 |
+
return transcribe(audio_path, lang_code)
|
97 |
|
98 |
transcribe_btn.click(on_transcribe, inputs=[audio_input, language_dropdown], outputs=output_text)
|
99 |
|