Spaces:
Starting
Starting
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ warnings.filterwarnings("ignore")
|
|
3 |
|
4 |
import os
|
5 |
import re
|
|
|
6 |
import librosa
|
7 |
import webrtcvad
|
8 |
import nbimporter
|
@@ -28,71 +29,30 @@ from applyVad import apply_vad
|
|
28 |
from wienerFilter import wiener_filter
|
29 |
from highPassFilter import high_pass_filter
|
30 |
from waveletDenoise import wavelet_denoise
|
|
|
31 |
|
32 |
-
|
33 |
-
# transcriber_hindi_old = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new")
|
34 |
-
processor = AutoProcessor.from_pretrained("cdactvm/w2v-bert-tamil_new")
|
35 |
-
vocab_dict = processor.tokenizer.get_vocab()
|
36 |
-
sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}
|
37 |
-
# decoder = build_ctcdecoder(
|
38 |
-
# labels=list(sorted_vocab_dict.keys()),
|
39 |
-
# kenlm_model_path="lm.binary",
|
40 |
-
# )
|
41 |
-
# processor_with_lm = Wav2Vec2ProcessorWithLM(
|
42 |
-
# feature_extractor=processor.feature_extractor,
|
43 |
-
# tokenizer=processor.tokenizer,
|
44 |
-
# decoder=decoder
|
45 |
-
# )
|
46 |
-
processor.feature_extractor._processor_class = "Wav2Vec2ProcessorWithLM"
|
47 |
-
# transcriber_hindi_lm = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new", tokenizer=processor_with_lm, feature_extractor=processor_with_lm.feature_extractor, decoder=processor_with_lm.decoder)
|
48 |
-
|
49 |
|
50 |
-
#
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
# return converted_text
|
58 |
-
|
59 |
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
# Function to apply a Wiener filter for noise reduction
|
64 |
def apply_wiener_filter(audio):
|
65 |
return wiener(audio)
|
66 |
|
67 |
-
# Function to handle speech recognition
|
68 |
-
def Noise_cancellation_function(audio_file):
|
69 |
-
# Load the audio file using librosa
|
70 |
-
audio, sr = librosa.load(audio_file, sr=16000)
|
71 |
-
|
72 |
-
# Step 1: Apply a high-pass filter
|
73 |
-
audio = high_pass_filter(audio, sr)
|
74 |
-
|
75 |
-
# Step 2: Apply Wiener filter for noise reduction
|
76 |
-
audio = apply_wiener_filter(audio)
|
77 |
-
|
78 |
-
# Step 3: Apply wavelet denoising
|
79 |
-
denoised_audio = wavelet_denoise(audio)
|
80 |
-
|
81 |
-
# Save the denoised audio to a temporary file
|
82 |
-
temp_wav = "temp_denoised.wav"
|
83 |
-
write(temp_wav, sr, denoised_audio)
|
84 |
-
|
85 |
-
# Perform speech recognition on the denoised audio
|
86 |
-
transcript = transcriber_taml_new(temp_wav)
|
87 |
-
text_value = transcript['text']
|
88 |
-
cleaned_text=text_value.replace("<s>","")
|
89 |
-
processd_doubles=process_doubles(cleaned_text)
|
90 |
-
replaced_words = replace_words(processd_doubles)
|
91 |
-
converted_text=text_to_int(replaced_words)
|
92 |
-
return converted_text
|
93 |
-
|
94 |
-
#################################################
|
95 |
-
|
96 |
# Function to handle speech recognition
|
97 |
def recognize_speech(audio_file):
|
98 |
audio, sr = librosa.load(audio_file, sr=16000)
|
@@ -113,7 +73,6 @@ def recognize_speech(audio_file):
|
|
113 |
print(converted_text)
|
114 |
return converted_text
|
115 |
|
116 |
-
|
117 |
def sel_lng(lng, mic=None, file=None):
|
118 |
if mic is not None:
|
119 |
audio = mic
|
|
|
3 |
|
4 |
import os
|
5 |
import re
|
6 |
+
import pywt
|
7 |
import librosa
|
8 |
import webrtcvad
|
9 |
import nbimporter
|
|
|
29 |
from wienerFilter import wiener_filter
|
30 |
from highPassFilter import high_pass_filter
|
31 |
from waveletDenoise import wavelet_denoise
|
32 |
+
from scipy.signal import butter, lfilter, wiener
|
33 |
|
34 |
+
asr_model = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
+
# Function to apply a high-pass filter
|
37 |
+
def high_pass_filter(audio, sr, cutoff=300):
|
38 |
+
nyquist = 0.5 * sr
|
39 |
+
normal_cutoff = cutoff / nyquist
|
40 |
+
b, a = butter(1, normal_cutoff, btype='high', analog=False)
|
41 |
+
filtered_audio = lfilter(b, a, audio)
|
42 |
+
return filtered_audio
|
|
|
|
|
43 |
|
44 |
+
# Function to apply wavelet denoising
|
45 |
+
def wavelet_denoise(audio, wavelet='db1', level=1):
|
46 |
+
coeffs = pywt.wavedec(audio, wavelet, mode='per')
|
47 |
+
sigma = np.median(np.abs(coeffs[-level])) / 0.5
|
48 |
+
uthresh = sigma * np.sqrt(2 * np.log(len(audio)))
|
49 |
+
coeffs[1:] = [pywt.threshold(i, value=uthresh, mode='soft') for i in coeffs[1:]]
|
50 |
+
return pywt.waverec(coeffs, wavelet, mode='per')
|
51 |
|
52 |
# Function to apply a Wiener filter for noise reduction
|
53 |
def apply_wiener_filter(audio):
|
54 |
return wiener(audio)
|
55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
# Function to handle speech recognition
|
57 |
def recognize_speech(audio_file):
|
58 |
audio, sr = librosa.load(audio_file, sr=16000)
|
|
|
73 |
print(converted_text)
|
74 |
return converted_text
|
75 |
|
|
|
76 |
def sel_lng(lng, mic=None, file=None):
|
77 |
if mic is not None:
|
78 |
audio = mic
|