cdactvm commited on
Commit
bb285c0
·
verified ·
1 Parent(s): c07aefa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -36
app.py CHANGED
@@ -1,34 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import warnings
2
  warnings.filterwarnings("ignore")
3
 
4
- import os
5
- import re
6
- import pywt
7
- import librosa
8
- import webrtcvad
9
- import torchaudio
10
- import numpy as np
11
- import gradio as gr
12
- import scipy.signal
13
- import soundfile as sf
14
- from scipy.io.wavfile import write
15
- from transformers import pipeline
16
- from transformers import AutoProcessor
17
- from pyctcdecode import build_ctcdecoder
18
- from transformers import Wav2Vec2ProcessorWithLM
19
  from scipy.signal import butter, lfilter, wiener
20
- from text2int import text_to_int
21
- from isNumber import is_number
22
- from Text2List import text_to_list
23
  from convert2list import convert_to_list
24
  from processDoubles import process_doubles
25
  from replaceWords import replace_words
26
- # from applyVad import apply_vad
27
- # from wienerFilter import wiener_filter
28
- # from highPassFilter import high_pass_filter
29
- # from waveletDenoise import wavelet_denoise
30
- from scipy.signal import butter, lfilter, wiener
31
 
 
32
  asr_model = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-punjabi")
33
 
34
 
@@ -42,6 +121,7 @@ def high_pass_filter(audio, sr, cutoff=300):
42
 
43
  # Function to apply wavelet denoising
44
  def wavelet_denoise(audio, wavelet='db1', level=1):
 
45
  coeffs = pywt.wavedec(audio, wavelet, mode='per')
46
  sigma = np.median(np.abs(coeffs[-level])) / 0.5
47
  uthresh = sigma * np.sqrt(2 * np.log(len(audio)))
@@ -51,9 +131,9 @@ def wavelet_denoise(audio, wavelet='db1', level=1):
51
  # Function to apply a Wiener filter for noise reduction
52
  def apply_wiener_filter(audio):
53
  return wiener(audio)
54
-
55
-
56
- # # Function to handle speech recognition
57
  def recognize_speech(audio_file):
58
  audio, sr = librosa.load(audio_file, sr=16000)
59
  audio = high_pass_filter(audio, sr)
@@ -68,6 +148,7 @@ def recognize_speech(audio_file):
68
  converted_text = text_to_int(replaced_words)
69
  return converted_text
70
 
 
71
  def sel_lng(lng, mic=None, file=None):
72
  if mic is not None:
73
  audio = mic
@@ -79,17 +160,17 @@ def sel_lng(lng, mic=None, file=None):
79
  if lng == "model_1":
80
  return recognize_speech(audio)
81
 
82
- demo=gr.Interface(
 
 
83
  fn=sel_lng,
84
-
85
  inputs=[
86
- gr.Dropdown([
87
- "model_1"],label="Select Model"),
88
- gr.Audio(sources=["microphone","upload"], type="filepath"),
89
- ],
90
- outputs=[
91
- "textbox"
92
  ],
 
93
  title="Automatic Speech Recognition",
94
- description = "Demo for Automatic Speech Recognition. Use microphone to record speech. Please press Record button. Initially it will take some time to load the model. The recognized text will appear in the output textbox",
95
- ).launch()
 
 
 
1
+ # import warnings
2
+ # warnings.filterwarnings("ignore")
3
+
4
+ # import os # for handling of directories.
5
+ # import re # regular expression.
6
+ # import pywt # pywavelet for wavelet transformation. used in denoising.
7
+ # import librosa # used for audio processing.
8
+ # import webrtcvad
9
+ # import torchaudio # python library for audio processing.
10
+ # import numpy as np # for mathematical calculations
11
+ # import gradio as gr # for inferencing.
12
+ # import scipy.signal # used for signal filtering. used in denoising audio signals.
13
+ # import soundfile as sf # used for reading and processing audio files.
14
+ # from scipy.io.wavfile import write # used for saving audio files in wav format.
15
+ # from transformers import pipeline # used for inferencing.
16
+ # from transformers import AutoProcessor #used for processing of input data.
17
+ # from pyctcdecode import build_ctcdecoder
18
+ # from transformers import Wav2Vec2ProcessorWithLM
19
+ # from scipy.signal import butter, lfilter, wiener
20
+ # from text2int import text_to_int
21
+ # from isNumber import is_number
22
+ # from Text2List import text_to_list
23
+ # from convert2list import convert_to_list
24
+ # from processDoubles import process_doubles
25
+ # from replaceWords import replace_words
26
+ # # from applyVad import apply_vad
27
+ # # from wienerFilter import wiener_filter
28
+ # # from highPassFilter import high_pass_filter
29
+ # # from waveletDenoise import wavelet_denoise
30
+
31
+ # asr_model = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-punjabi")
32
+
33
+
34
+ # # Function to apply a high-pass filter
35
+ # def high_pass_filter(audio, sr, cutoff=300):
36
+ # nyquist = 0.5 * sr
37
+ # normal_cutoff = cutoff / nyquist
38
+ # b, a = butter(1, normal_cutoff, btype='high', analog=False)
39
+ # filtered_audio = lfilter(b, a, audio)
40
+ # return filtered_audio
41
+
42
+ # # Function to apply wavelet denoising
43
+ # def wavelet_denoise(audio, wavelet='db1', level=1):
44
+ # coeffs = pywt.wavedec(audio, wavelet, mode='per')
45
+ # sigma = np.median(np.abs(coeffs[-level])) / 0.5
46
+ # uthresh = sigma * np.sqrt(2 * np.log(len(audio)))
47
+ # coeffs[1:] = [pywt.threshold(i, value=uthresh, mode='soft') for i in coeffs[1:]]
48
+ # return pywt.waverec(coeffs, wavelet, mode='per')
49
+
50
+ # # Function to apply a Wiener filter for noise reduction
51
+ # def apply_wiener_filter(audio):
52
+ # return wiener(audio)
53
+
54
+
55
+ # # # Function to handle speech recognition
56
+ # def recognize_speech(audio_file):
57
+ # audio, sr = librosa.load(audio_file, sr=16000)
58
+ # audio = high_pass_filter(audio, sr)
59
+ # audio = apply_wiener_filter(audio)
60
+ # denoised_audio = wavelet_denoise(audio)
61
+ # result = asr_model(denoised_audio)
62
+ # text_value = result['text']
63
+ # cleaned_text = text_value.replace("[PAD]", "")
64
+ # converted_to_list = convert_to_list(cleaned_text, text_to_list())
65
+ # processed_doubles = process_doubles(converted_to_list)
66
+ # replaced_words = replace_words(processed_doubles)
67
+ # converted_text = text_to_int(replaced_words)
68
+ # return converted_text
69
+
70
+ # def sel_lng(lng, mic=None, file=None):
71
+ # if mic is not None:
72
+ # audio = mic
73
+ # elif file is not None:
74
+ # audio = file
75
+ # else:
76
+ # return "You must either provide a mic recording or a file"
77
+
78
+ # if lng == "model_1":
79
+ # return recognize_speech(audio)
80
+
81
+ # demo=gr.Interface(
82
+ # fn=sel_lng,
83
+
84
+ # inputs=[
85
+ # gr.Dropdown([
86
+ # "model_1"],label="Select Model"),
87
+ # gr.Audio(sources=["microphone","upload"], type="filepath"),
88
+ # ],
89
+ # outputs=[
90
+ # "textbox"
91
+ # ],
92
+ # title="Automatic Speech Recognition",
93
+ # description = "Demo for Automatic Speech Recognition. Use microphone to record speech. Please press Record button. Initially it will take some time to load the model. The recognized text will appear in the output textbox",
94
+ # ).launch()
95
+
96
+
97
  import warnings
98
  warnings.filterwarnings("ignore")
99
 
100
+ import librosa # used for audio processing.
101
+ import numpy as np # for mathematical calculations
102
+ import gradio as gr # for inferencing.
103
+ from transformers import pipeline # used for inferencing.
 
 
 
 
 
 
 
 
 
 
 
104
  from scipy.signal import butter, lfilter, wiener
 
 
 
105
  from convert2list import convert_to_list
106
  from processDoubles import process_doubles
107
  from replaceWords import replace_words
108
+ from text2int import text_to_int
 
 
 
 
109
 
110
+ # Initialize ASR model pipeline
111
  asr_model = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-punjabi")
112
 
113
 
 
121
 
122
  # Function to apply wavelet denoising
123
  def wavelet_denoise(audio, wavelet='db1', level=1):
124
+ import pywt
125
  coeffs = pywt.wavedec(audio, wavelet, mode='per')
126
  sigma = np.median(np.abs(coeffs[-level])) / 0.5
127
  uthresh = sigma * np.sqrt(2 * np.log(len(audio)))
 
131
  # Function to apply a Wiener filter for noise reduction
132
  def apply_wiener_filter(audio):
133
  return wiener(audio)
134
+
135
+
136
+ # Function to handle speech recognition
137
  def recognize_speech(audio_file):
138
  audio, sr = librosa.load(audio_file, sr=16000)
139
  audio = high_pass_filter(audio, sr)
 
148
  converted_text = text_to_int(replaced_words)
149
  return converted_text
150
 
151
+
152
  def sel_lng(lng, mic=None, file=None):
153
  if mic is not None:
154
  audio = mic
 
160
  if lng == "model_1":
161
  return recognize_speech(audio)
162
 
163
+
164
+ # Create a Gradio interface
165
+ demo = gr.Interface(
166
  fn=sel_lng,
 
167
  inputs=[
168
+ gr.Dropdown(["model_1"], label="Select Model"),
169
+ gr.Audio(sources=["microphone", "upload"], type="filepath"),
 
 
 
 
170
  ],
171
+ outputs=["textbox"],
172
  title="Automatic Speech Recognition",
173
+ description="Demo for Automatic Speech Recognition. Use microphone to record speech. Please press Record button. Initially, it will take some time to load the model. The recognized text will appear in the output textbox"
174
+ )
175
+
176
+ demo.launch()