ayazfau commited on
Commit
e5b54f2
·
verified ·
1 Parent(s): 654dce7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -52
app.py CHANGED
@@ -1,27 +1,10 @@
1
  import gradio as gr
2
- from dotenv import load_dotenv
3
- import os
4
  from pydub import AudioSegment
5
  import tempfile
 
6
  import speech_recognition as sr
7
  import concurrent.futures
8
 
9
- # Load environment variables
10
- load_dotenv()
11
-
12
- language_options = {
13
- "English (US)": "en-US",
14
- "Dutch": "nl-NL",
15
- "English (UK)": "en-GB",
16
- "Spanish": "es-ES",
17
- "French": "fr-FR",
18
- "German": "de-DE",
19
- "Hindi": "hi-IN",
20
- "Chinese (Mandarin)": "zh-CN",
21
- "Arabic": "ar-SA",
22
- "Turkish": "tr-TR",
23
- }
24
-
25
  def split_audio(audio_path, chunk_length_ms=60000, overlap_ms=2000):
26
  audio = AudioSegment.from_file(audio_path)
27
  chunks = []
@@ -52,22 +35,9 @@ def transcribe_chunk_indexed(indexed_chunk_language):
52
  except Exception as e:
53
  return index, f"[Error: {str(e)}]"
54
 
55
- def transcribe_audio(file, language_name):
56
- language = language_options[language_name]
57
-
58
- # Save uploaded file temporarily
59
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
60
- temp_file.write(file.read())
61
- temp_path = temp_file.name
62
-
63
- # Convert to proper format
64
- converted_path = temp_path + "_converted.wav"
65
- convert_audio_to_wav(temp_path, converted_path)
66
- temp_path = converted_path
67
-
68
- chunks = split_audio(temp_path)
69
  indexed_chunks = [(i, chunk, language) for i, chunk in enumerate(chunks)]
70
-
71
  transcription = [""] * len(indexed_chunks)
72
 
73
  with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
@@ -76,22 +46,67 @@ def transcribe_audio(file, language_name):
76
  idx, text = future.result()
77
  transcription[idx] = text
78
 
79
- final_text = " ".join(transcription)
80
- return final_text, final_text
81
-
82
- # Gradio UI
83
- iface = gr.Interface(
84
- fn=transcribe_audio,
85
- inputs=[
86
- gr.Audio(type="file", label="Upload Audio File"),
87
- gr.Dropdown(choices=list(language_options.keys()), label="Select Language", value="English (US)")
88
- ],
89
- outputs=[
90
- gr.Textbox(label="Transcription", lines=10),
91
- gr.File(label="Download Transcription", file_types=[".txt"], type="text")
92
- ],
93
- title="Audio to Text Transcription",
94
- description="Upload an audio file and get the transcribed text using Google Speech Recognition."
95
- )
96
-
97
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
2
  from pydub import AudioSegment
3
  import tempfile
4
+ import os
5
  import speech_recognition as sr
6
  import concurrent.futures
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  def split_audio(audio_path, chunk_length_ms=60000, overlap_ms=2000):
9
  audio = AudioSegment.from_file(audio_path)
10
  chunks = []
 
35
  except Exception as e:
36
  return index, f"[Error: {str(e)}]"
37
 
38
+ def transcribe_audio_with_google_parallel(audio_path, chunk_length_ms=60000, overlap_ms=2000, language="en-US"):
39
+ chunks = split_audio(audio_path, chunk_length_ms, overlap_ms)
 
 
 
 
 
 
 
 
 
 
 
 
40
  indexed_chunks = [(i, chunk, language) for i, chunk in enumerate(chunks)]
 
41
  transcription = [""] * len(indexed_chunks)
42
 
43
  with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
 
46
  idx, text = future.result()
47
  transcription[idx] = text
48
 
49
+ return " ".join(transcription)
50
+
51
+ def transcribe(uploaded_file, language):
52
+ if uploaded_file is None:
53
+ return "Please upload an audio file."
54
+
55
+ # Save uploaded file temporarily with correct suffix
56
+ import pathlib
57
+ suffix = pathlib.Path(uploaded_file.name).suffix
58
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
59
+ temp_file.write(uploaded_file.read())
60
+ temp_path = temp_file.name
61
+
62
+ try:
63
+ converted_path = temp_path + "_converted.wav"
64
+ convert_audio_to_wav(temp_path, converted_path)
65
+ os.remove(temp_path) # remove original temp file
66
+ temp_path = converted_path
67
+ except Exception as e:
68
+ return f"Error processing audio: {e}"
69
+
70
+ # Run transcription
71
+ transcription = transcribe_audio_with_google_parallel(temp_path, chunk_length_ms=60000, overlap_ms=2000, language=language)
72
+
73
+ # Clean up converted file
74
+ try:
75
+ os.remove(temp_path)
76
+ except Exception:
77
+ pass
78
+
79
+ return transcription
80
+
81
+ # Language options
82
+ language_options = {
83
+ "English (US)": "en-US",
84
+ "Dutch": "nl-NL",
85
+ "English (UK)": "en-GB",
86
+ "Spanish": "es-ES",
87
+ "French": "fr-FR",
88
+ "German": "de-DE",
89
+ "Hindi": "hi-IN",
90
+ "Chinese (Mandarin)": "zh-CN",
91
+ "Arabic": "ar-SA",
92
+ "Turkish": "tr-TR",
93
+ }
94
+
95
+ with gr.Blocks() as demo:
96
+ gr.Markdown("# Audio to Text Transcription")
97
+ gr.Markdown("Upload an audio file, and we'll transcribe it into text using chunk processing.")
98
+
99
+ with gr.Row():
100
+ audio_input = gr.Audio(source="upload", type="file", label="Upload audio file (mp3, wav, m4a, ogg)")
101
+ language_dropdown = gr.Dropdown(list(language_options.keys()), label="Select language", value="English (US)")
102
+
103
+ transcribe_btn = gr.Button("Transcribe")
104
+ output_text = gr.Textbox(label="Transcription Output", lines=15)
105
+
106
+ def on_transcribe(uploaded_file, lang_name):
107
+ lang_code = language_options[lang_name]
108
+ return transcribe(uploaded_file, lang_code)
109
+
110
+ transcribe_btn.click(on_transcribe, inputs=[audio_input, language_dropdown], outputs=output_text)
111
+
112
+ demo.launch()