Spaces:

ayazfau
/

SST-Speech-To-Text

Running

App Files Files Community

ayazfau commited on Jun 21

Commit

2dcb1c1

verified ·

1 Parent(s): 6d5709a

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -25

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import gradio as gr
 from pydub import AudioSegment
-import tempfile
 import os
 import speech_recognition as sr
 import concurrent.futures
@@ -21,12 +20,12 @@ def transcribe_chunk_indexed(indexed_chunk_language):
     index, chunk, language = indexed_chunk_language
     recognizer = sr.Recognizer()
     try:
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_chunk_file:
-            chunk.export(temp_chunk_file.name, format="wav")
-            with sr.AudioFile(temp_chunk_file.name) as source:
-                audio_data = recognizer.record(source)
-                text = recognizer.recognize_google(audio_data, language=language)
-        os.remove(temp_chunk_file.name)
         return index, text
     except sr.RequestError:
         return index, "[Error: API unavailable or unresponsive]"
@@ -48,29 +47,19 @@ def transcribe_audio_with_google_parallel(audio_path, chunk_length_ms=60000, ove
     return " ".join(transcription)
-def transcribe(uploaded_file, language):
-    if uploaded_file is None:
         return "Please upload an audio file."
-    # Save uploaded file temporarily with correct suffix
-    import pathlib
-    suffix = pathlib.Path(uploaded_file.name).suffix
-    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
-        temp_file.write(uploaded_file.read())
-        temp_path = temp_file.name
     try:
-        converted_path = temp_path + "_converted.wav"
-        convert_audio_to_wav(temp_path, converted_path)
-        os.remove(temp_path)  # remove original temp file
         temp_path = converted_path
     except Exception as e:
         return f"Error processing audio: {e}"
-    # Run transcription
     transcription = transcribe_audio_with_google_parallel(temp_path, chunk_length_ms=60000, overlap_ms=2000, language=language)
-    # Clean up converted file
     try:
         os.remove(temp_path)
     except Exception:
@@ -78,7 +67,6 @@ def transcribe(uploaded_file, language):
     return transcription
-# Language options
 language_options = {
     "English (US)": "en-US",
     "Dutch": "nl-NL",
@@ -97,15 +85,15 @@ with gr.Blocks() as demo:
     gr.Markdown("Upload an audio file, and we'll transcribe it into text using chunk processing.")
     with gr.Row():
-        audio_input = gr.Audio(source="upload", type="file", label="Upload audio file (mp3, wav, m4a, ogg)")
         language_dropdown = gr.Dropdown(list(language_options.keys()), label="Select language", value="English (US)")
     transcribe_btn = gr.Button("Transcribe")
     output_text = gr.Textbox(label="Transcription Output", lines=15)
-    def on_transcribe(uploaded_file, lang_name):
         lang_code = language_options[lang_name]
-        return transcribe(uploaded_file, lang_code)
     transcribe_btn.click(on_transcribe, inputs=[audio_input, language_dropdown], outputs=output_text)

 import gradio as gr
 from pydub import AudioSegment
 import os
 import speech_recognition as sr
 import concurrent.futures
     index, chunk, language = indexed_chunk_language
     recognizer = sr.Recognizer()
     try:
+        with open(f"chunk_{index}.wav", "wb") as f:
+            chunk.export(f.name, format="wav")
+        with sr.AudioFile(f"chunk_{index}.wav") as source:
+            audio_data = recognizer.record(source)
+            text = recognizer.recognize_google(audio_data, language=language)
+        os.remove(f"chunk_{index}.wav")
         return index, text
     except sr.RequestError:
         return index, "[Error: API unavailable or unresponsive]"
     return " ".join(transcription)
+def transcribe(audio_file_path, language):
+    if audio_file_path is None:
         return "Please upload an audio file."
     try:
+        converted_path = audio_file_path + "_converted.wav"
+        convert_audio_to_wav(audio_file_path, converted_path)
         temp_path = converted_path
     except Exception as e:
         return f"Error processing audio: {e}"
     transcription = transcribe_audio_with_google_parallel(temp_path, chunk_length_ms=60000, overlap_ms=2000, language=language)
     try:
         os.remove(temp_path)
     except Exception:
     return transcription
 language_options = {
     "English (US)": "en-US",
     "Dutch": "nl-NL",
     gr.Markdown("Upload an audio file, and we'll transcribe it into text using chunk processing.")
     with gr.Row():
+        audio_input = gr.Audio(type="filepath", label="Upload audio file (mp3, wav, m4a, ogg)")
         language_dropdown = gr.Dropdown(list(language_options.keys()), label="Select language", value="English (US)")
     transcribe_btn = gr.Button("Transcribe")
     output_text = gr.Textbox(label="Transcription Output", lines=15)
+    def on_transcribe(audio_path, lang_name):
         lang_code = language_options[lang_name]
+        return transcribe(audio_path, lang_code)
     transcribe_btn.click(on_transcribe, inputs=[audio_input, language_dropdown], outputs=output_text)