Files changed (1) hide show
  1. app.py +159 -0
app.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+
4
+ import os
5
+ import re
6
+ import librosa
7
+ import webrtcvad
8
+ import nbimporter
9
+ import torchaudio
10
+ import numpy as np
11
+ import gradio as gr
12
+ import scipy.signal
13
+ import soundfile as sf
14
+ from scipy.io.wavfile import write
15
+ from transformers import pipeline
16
+ from transformers import AutoProcessor
17
+ from pyctcdecode import build_ctcdecoder
18
+ from transformers import Wav2Vec2ProcessorWithLM
19
+ from scipy.signal import butter, lfilter, wiener
20
+
21
+ from text2int import text_to_int
22
+ from isNumber import is_number
23
+ from Text2List import text_to_list
24
+ from convert2list import convert_to_list
25
+ from processDoubles import process_doubles
26
+ from replaceWords import replace_words
27
+ from applyVad import apply_vad
28
+ from wienerFilter import wiener_filter
29
+ from highPassFilter import high_pass_filter
30
+ from waveletDenoise import wavelet_denoise
31
+
32
+
33
+
34
+ transcriber_hindi_new = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-hindi_new")
35
+ transcriber_hindi_old = pipeline(task="automatic-speech-recognition", model="cdactvm/huggingface-hindi_model")
36
+ processor = AutoProcessor.from_pretrained("cdactvm/w2v-bert-2.0-hindi_new")
37
+ vocab_dict = processor.tokenizer.get_vocab()
38
+ sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}
39
+ decoder = build_ctcdecoder(
40
+ labels=list(sorted_vocab_dict.keys()),
41
+ kenlm_model_path="lm.binary",
42
+ )
43
+ processor_with_lm = Wav2Vec2ProcessorWithLM(
44
+ feature_extractor=processor.feature_extractor,
45
+ tokenizer=processor.tokenizer,
46
+ decoder=decoder
47
+ )
48
+ processor.feature_extractor._processor_class = "Wav2Vec2ProcessorWithLM"
49
+ transcriber_hindi_lm = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-hindi_new", tokenizer=processor_with_lm, feature_extractor=processor_with_lm.feature_extractor, decoder=processor_with_lm.decoder)
50
+
51
+
52
+ def transcribe_hindi_new(audio):
53
+ # # Process the audio file
54
+ transcript = transcriber_hindi_new(audio)
55
+ text_value = transcript['text']
56
+ processd_doubles=process_doubles(text_value)
57
+ replaced_words = replace_words(processd_doubles)
58
+ converted_text=text_to_int(replaced_words)
59
+ return converted_text
60
+
61
+ def transcribe_hindi_lm(audio):
62
+ # # Process the audio file
63
+ transcript = transcriber_hindi_lm(audio)
64
+ text_value = transcript['text']
65
+ processd_doubles=process_doubles(text_value)
66
+ replaced_words = replace_words(processd_doubles)
67
+ converted_text=text_to_int(replaced_words)
68
+ return converted_text
69
+
70
+ def transcribe_hindi_old(audio):
71
+ # # Process the audio file
72
+ transcript = transcriber_hindi_old(audio)
73
+ text_value = transcript['text']
74
+ cleaned_text=text_value.replace("<s>","")
75
+ processd_doubles=process_doubles(cleaned_text)
76
+ replaced_words = replace_words(processd_doubles)
77
+ converted_text=text_to_int(replaced_words)
78
+ return converted_text
79
+
80
+ ###############################################
81
+ # implementation of noise reduction techniques.
82
+
83
+ # Function to apply a Wiener filter for noise reduction
84
+ def apply_wiener_filter(audio):
85
+ return wiener(audio)
86
+
87
+ # Function to handle speech recognition
88
+ def Noise_cancellation_function(audio_file):
89
+ # Load the audio file using librosa
90
+ audio, sr = librosa.load(audio_file, sr=16000)
91
+
92
+ # Step 1: Apply a high-pass filter
93
+ audio = high_pass_filter(audio, sr)
94
+
95
+ # Step 2: Apply Wiener filter for noise reduction
96
+ audio = apply_wiener_filter(audio)
97
+
98
+ # Step 3: Apply wavelet denoising
99
+ denoised_audio = wavelet_denoise(audio)
100
+
101
+ # Save the denoised audio to a temporary file
102
+ temp_wav = "temp_denoised.wav"
103
+ write(temp_wav, sr, denoised_audio)
104
+
105
+ # Perform speech recognition on the denoised audio
106
+ transcript = transcriber_hindi_lm(temp_wav)
107
+ text_value = transcript['text']
108
+ cleaned_text=text_value.replace("<s>","")
109
+ processd_doubles=process_doubles(cleaned_text)
110
+ replaced_words = replace_words(processd_doubles)
111
+ converted_text=text_to_int(replaced_words)
112
+ return converted_text
113
+
114
+ #################################################
115
+
116
+ def sel_lng(lng, mic=None, file=None):
117
+ if mic is not None:
118
+ audio = mic
119
+ elif file is not None:
120
+ audio = file
121
+ else:
122
+ return "You must either provide a mic recording or a file"
123
+
124
+ if lng == "model_1":
125
+ return transcribe_hindi_old(audio)
126
+ elif lng == "model_2":
127
+ return transcribe_hindi_new(audio)
128
+ elif lng== "model_3":
129
+ return transcribe_hindi_lm(audio)
130
+ elif lng== "model_4":
131
+ return Noise_cancellation_function(audio)
132
+
133
+
134
+ # demo=gr.Interface(
135
+ # transcribe,
136
+ # inputs=[
137
+ # gr.Audio(sources=["microphone","upload"], type="filepath"),
138
+ # ],
139
+ # outputs=[
140
+ # "textbox"
141
+ # ],
142
+ # title="Automatic Speech Recognition",
143
+ # description = "Demo for Automatic Speech Recognition. Use microphone to record speech. Please press Record button. Initially it will take some time to load the model. The recognized text will appear in the output textbox",
144
+ # ).launch()
145
+
146
+ demo=gr.Interface(
147
+ fn=sel_lng,
148
+
149
+ inputs=[
150
+ gr.Dropdown([
151
+ "model_1","model_2","model_3","model_4"],label="Select Model"),
152
+ gr.Audio(sources=["microphone","upload"], type="filepath"),
153
+ ],
154
+ outputs=[
155
+ "textbox"
156
+ ],
157
+ title="Automatic Speech Recognition",
158
+ description = "Demo for Automatic Speech Recognition. Use microphone to record speech. Please press Record button. Initially it will take some time to load the model. The recognized text will appear in the output textbox",
159
+ ).launch()