Spaces:

ayazfau
/

SST-Speech-To-Text

Running

App Files Files Community

ayazfau commited on Jun 21

Commit

e5b54f2

verified ·

1 Parent(s): 654dce7

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -52

app.py CHANGED Viewed

@@ -1,27 +1,10 @@
 import gradio as gr
-from dotenv import load_dotenv
-import os
 from pydub import AudioSegment
 import tempfile
 import speech_recognition as sr
 import concurrent.futures
-# Load environment variables
-load_dotenv()
-language_options = {
-    "English (US)": "en-US",
-    "Dutch": "nl-NL",
-    "English (UK)": "en-GB",
-    "Spanish": "es-ES",
-    "French": "fr-FR",
-    "German": "de-DE",
-    "Hindi": "hi-IN",
-    "Chinese (Mandarin)": "zh-CN",
-    "Arabic": "ar-SA",
-    "Turkish": "tr-TR",
-}
 def split_audio(audio_path, chunk_length_ms=60000, overlap_ms=2000):
     audio = AudioSegment.from_file(audio_path)
     chunks = []
@@ -52,22 +35,9 @@ def transcribe_chunk_indexed(indexed_chunk_language):
     except Exception as e:
         return index, f"[Error: {str(e)}]"
-def transcribe_audio(file, language_name):
-    language = language_options[language_name]
-    # Save uploaded file temporarily
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
-        temp_file.write(file.read())
-        temp_path = temp_file.name
-    # Convert to proper format
-    converted_path = temp_path + "_converted.wav"
-    convert_audio_to_wav(temp_path, converted_path)
-    temp_path = converted_path
-    chunks = split_audio(temp_path)
     indexed_chunks = [(i, chunk, language) for i, chunk in enumerate(chunks)]
     transcription = [""] * len(indexed_chunks)
     with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
@@ -76,22 +46,67 @@ def transcribe_audio(file, language_name):
             idx, text = future.result()
             transcription[idx] = text
-    final_text = " ".join(transcription)
-    return final_text, final_text
-# Gradio UI
-iface = gr.Interface(
-    fn=transcribe_audio,
-    inputs=[
-        gr.Audio(type="file", label="Upload Audio File"),
-        gr.Dropdown(choices=list(language_options.keys()), label="Select Language", value="English (US)")
-    ],
-    outputs=[
-        gr.Textbox(label="Transcription", lines=10),
-        gr.File(label="Download Transcription", file_types=[".txt"], type="text")
-    ],
-    title="Audio to Text Transcription",
-    description="Upload an audio file and get the transcribed text using Google Speech Recognition."
-)
-iface.launch()

 import gradio as gr
 from pydub import AudioSegment
 import tempfile
+import os
 import speech_recognition as sr
 import concurrent.futures
 def split_audio(audio_path, chunk_length_ms=60000, overlap_ms=2000):
     audio = AudioSegment.from_file(audio_path)
     chunks = []
     except Exception as e:
         return index, f"[Error: {str(e)}]"
+def transcribe_audio_with_google_parallel(audio_path, chunk_length_ms=60000, overlap_ms=2000, language="en-US"):
+    chunks = split_audio(audio_path, chunk_length_ms, overlap_ms)
     indexed_chunks = [(i, chunk, language) for i, chunk in enumerate(chunks)]
     transcription = [""] * len(indexed_chunks)
     with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
             idx, text = future.result()
             transcription[idx] = text
+    return " ".join(transcription)
+def transcribe(uploaded_file, language):
+    if uploaded_file is None:
+        return "Please upload an audio file."
+    # Save uploaded file temporarily with correct suffix
+    import pathlib
+    suffix = pathlib.Path(uploaded_file.name).suffix
+    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
+        temp_file.write(uploaded_file.read())
+        temp_path = temp_file.name
+    try:
+        converted_path = temp_path + "_converted.wav"
+        convert_audio_to_wav(temp_path, converted_path)
+        os.remove(temp_path)  # remove original temp file
+        temp_path = converted_path
+    except Exception as e:
+        return f"Error processing audio: {e}"
+    # Run transcription
+    transcription = transcribe_audio_with_google_parallel(temp_path, chunk_length_ms=60000, overlap_ms=2000, language=language)
+    # Clean up converted file
+    try:
+        os.remove(temp_path)
+    except Exception:
+        pass
+    return transcription
+# Language options
+language_options = {
+    "English (US)": "en-US",
+    "Dutch": "nl-NL",
+    "English (UK)": "en-GB",
+    "Spanish": "es-ES",
+    "French": "fr-FR",
+    "German": "de-DE",
+    "Hindi": "hi-IN",
+    "Chinese (Mandarin)": "zh-CN",
+    "Arabic": "ar-SA",
+    "Turkish": "tr-TR",
+}
+with gr.Blocks() as demo:
+    gr.Markdown("# Audio to Text Transcription")
+    gr.Markdown("Upload an audio file, and we'll transcribe it into text using chunk processing.")
+    with gr.Row():
+        audio_input = gr.Audio(source="upload", type="file", label="Upload audio file (mp3, wav, m4a, ogg)")
+        language_dropdown = gr.Dropdown(list(language_options.keys()), label="Select language", value="English (US)")
+    transcribe_btn = gr.Button("Transcribe")
+    output_text = gr.Textbox(label="Transcription Output", lines=15)
+    def on_transcribe(uploaded_file, lang_name):
+        lang_code = language_options[lang_name]
+        return transcribe(uploaded_file, lang_code)
+    transcribe_btn.click(on_transcribe, inputs=[audio_input, language_dropdown], outputs=output_text)
+demo.launch()