cdactvm commited on
Commit
45aba10
·
verified ·
1 Parent(s): fe9f5d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -58
app.py CHANGED
@@ -3,6 +3,7 @@ warnings.filterwarnings("ignore")
3
 
4
  import os
5
  import re
 
6
  import librosa
7
  import webrtcvad
8
  import nbimporter
@@ -28,71 +29,30 @@ from applyVad import apply_vad
28
  from wienerFilter import wiener_filter
29
  from highPassFilter import high_pass_filter
30
  from waveletDenoise import wavelet_denoise
 
31
 
32
- transcriber_taml_new = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new")
33
- # transcriber_hindi_old = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new")
34
- processor = AutoProcessor.from_pretrained("cdactvm/w2v-bert-tamil_new")
35
- vocab_dict = processor.tokenizer.get_vocab()
36
- sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}
37
- # decoder = build_ctcdecoder(
38
- # labels=list(sorted_vocab_dict.keys()),
39
- # kenlm_model_path="lm.binary",
40
- # )
41
- # processor_with_lm = Wav2Vec2ProcessorWithLM(
42
- # feature_extractor=processor.feature_extractor,
43
- # tokenizer=processor.tokenizer,
44
- # decoder=decoder
45
- # )
46
- processor.feature_extractor._processor_class = "Wav2Vec2ProcessorWithLM"
47
- # transcriber_hindi_lm = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new", tokenizer=processor_with_lm, feature_extractor=processor_with_lm.feature_extractor, decoder=processor_with_lm.decoder)
48
-
49
 
50
- # def transcribe_tamil_new(audio):
51
- # # # Process the audio file
52
- # transcript = transcriber_taml_new(audio)
53
- # text_value = transcript['text']
54
- # processd_doubles=process_doubles(text_value)
55
- # replaced_words = replace_words(processd_doubles)
56
- # converted_text=text_to_int(replaced_words)
57
- # return converted_text
58
-
59
 
60
- ###############################################
61
- # implementation of noise reduction techniques.
 
 
 
 
 
62
 
63
  # Function to apply a Wiener filter for noise reduction
64
  def apply_wiener_filter(audio):
65
  return wiener(audio)
66
 
67
- # Function to handle speech recognition
68
- def Noise_cancellation_function(audio_file):
69
- # Load the audio file using librosa
70
- audio, sr = librosa.load(audio_file, sr=16000)
71
-
72
- # Step 1: Apply a high-pass filter
73
- audio = high_pass_filter(audio, sr)
74
-
75
- # Step 2: Apply Wiener filter for noise reduction
76
- audio = apply_wiener_filter(audio)
77
-
78
- # Step 3: Apply wavelet denoising
79
- denoised_audio = wavelet_denoise(audio)
80
-
81
- # Save the denoised audio to a temporary file
82
- temp_wav = "temp_denoised.wav"
83
- write(temp_wav, sr, denoised_audio)
84
-
85
- # Perform speech recognition on the denoised audio
86
- transcript = transcriber_taml_new(temp_wav)
87
- text_value = transcript['text']
88
- cleaned_text=text_value.replace("<s>","")
89
- processd_doubles=process_doubles(cleaned_text)
90
- replaced_words = replace_words(processd_doubles)
91
- converted_text=text_to_int(replaced_words)
92
- return converted_text
93
-
94
- #################################################
95
-
96
  # Function to handle speech recognition
97
  def recognize_speech(audio_file):
98
  audio, sr = librosa.load(audio_file, sr=16000)
@@ -113,7 +73,6 @@ def recognize_speech(audio_file):
113
  print(converted_text)
114
  return converted_text
115
 
116
-
117
  def sel_lng(lng, mic=None, file=None):
118
  if mic is not None:
119
  audio = mic
 
3
 
4
  import os
5
  import re
6
+ import pywt
7
  import librosa
8
  import webrtcvad
9
  import nbimporter
 
29
  from wienerFilter import wiener_filter
30
  from highPassFilter import high_pass_filter
31
  from waveletDenoise import wavelet_denoise
32
+ from scipy.signal import butter, lfilter, wiener
33
 
34
+ asr_model = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ # Function to apply a high-pass filter
37
+ def high_pass_filter(audio, sr, cutoff=300):
38
+ nyquist = 0.5 * sr
39
+ normal_cutoff = cutoff / nyquist
40
+ b, a = butter(1, normal_cutoff, btype='high', analog=False)
41
+ filtered_audio = lfilter(b, a, audio)
42
+ return filtered_audio
 
 
43
 
44
+ # Function to apply wavelet denoising
45
+ def wavelet_denoise(audio, wavelet='db1', level=1):
46
+ coeffs = pywt.wavedec(audio, wavelet, mode='per')
47
+ sigma = np.median(np.abs(coeffs[-level])) / 0.5
48
+ uthresh = sigma * np.sqrt(2 * np.log(len(audio)))
49
+ coeffs[1:] = [pywt.threshold(i, value=uthresh, mode='soft') for i in coeffs[1:]]
50
+ return pywt.waverec(coeffs, wavelet, mode='per')
51
 
52
  # Function to apply a Wiener filter for noise reduction
53
  def apply_wiener_filter(audio):
54
  return wiener(audio)
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  # Function to handle speech recognition
57
  def recognize_speech(audio_file):
58
  audio, sr = librosa.load(audio_file, sr=16000)
 
73
  print(converted_text)
74
  return converted_text
75
 
 
76
  def sel_lng(lng, mic=None, file=None):
77
  if mic is not None:
78
  audio = mic