#!/usr/bin/env python # coding: utf-8 # In[1]: import gradio as gr import librosa import numpy as np import pywt import nbimporter from scipy.signal import butter, lfilter, wiener from scipy.io.wavfile import write from transformers import pipeline from text2int import text_to_int from isNumber import is_number from Text2List import text_to_list from convert2list import convert_to_list from processDoubles import process_doubles from replaceWords import replace_words asr_model = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new") # Function to apply a high-pass filter def high_pass_filter(audio, sr, cutoff=300): nyquist = 0.5 * sr normal_cutoff = cutoff / nyquist b, a = butter(1, normal_cutoff, btype='high', analog=False) filtered_audio = lfilter(b, a, audio) return filtered_audio # Function to apply wavelet denoising def wavelet_denoise(audio, wavelet='db1', level=1): coeffs = pywt.wavedec(audio, wavelet, mode='per') sigma = np.median(np.abs(coeffs[-level])) / 0.5 uthresh = sigma * np.sqrt(2 * np.log(len(audio))) coeffs[1:] = [pywt.threshold(i, value=uthresh, mode='soft') for i in coeffs[1:]] return pywt.waverec(coeffs, wavelet, mode='per') # Function to apply a Wiener filter for noise reduction def apply_wiener_filter(audio): return wiener(audio) # Function to handle speech recognition def recognize_speech(audio_file): audio, sr = librosa.load(audio_file, sr=16000) audio = high_pass_filter(audio, sr) audio = apply_wiener_filter(audio) denoised_audio = wavelet_denoise(audio) result = asr_model(denoised_audio) text_value = result['text'] cleaned_text = text_value.replace("", "") print(cleaned_text) converted_to_list = convert_to_list(cleaned_text, text_to_list()) print(converted_to_list) processed_doubles = process_doubles(converted_to_list) print(processed_doubles) replaced_words = replace_words(processed_doubles) print(replaced_words) converted_text = text_to_int(replaced_words) print(converted_text) return converted_text # Gradio Interface gr.Interface( fn=recognize_speech, inputs=gr.Audio(sources=["microphone","upload"], type="filepath"), outputs="text", title="Speech Recognition with Advanced Noise Reduction & Hindi ASR", description="Upload an audio file, and the system will use high-pass filtering, Wiener filtering, and wavelet-based denoising, then a Hindi ASR model will transcribe the clean audio." ).launch() # In[ ]: