Spaces:
Sleeping
Sleeping
#!/usr/bin/env python | |
# coding: utf-8 | |
# In[1]: | |
import gradio as gr | |
import librosa | |
import numpy as np | |
import pywt | |
import nbimporter | |
from scipy.signal import butter, lfilter, wiener | |
from scipy.io.wavfile import write | |
from transformers import pipeline | |
from text2int import text_to_int | |
from isNumber import is_number | |
from Text2List import text_to_list | |
from convert2list import convert_to_list | |
from processDoubles import process_doubles | |
from replaceWords import replace_words | |
asr_model = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new") | |
# Function to apply a high-pass filter | |
def high_pass_filter(audio, sr, cutoff=300): | |
nyquist = 0.5 * sr | |
normal_cutoff = cutoff / nyquist | |
b, a = butter(1, normal_cutoff, btype='high', analog=False) | |
filtered_audio = lfilter(b, a, audio) | |
return filtered_audio | |
# Function to apply wavelet denoising | |
def wavelet_denoise(audio, wavelet='db1', level=1): | |
coeffs = pywt.wavedec(audio, wavelet, mode='per') | |
sigma = np.median(np.abs(coeffs[-level])) / 0.5 | |
uthresh = sigma * np.sqrt(2 * np.log(len(audio))) | |
coeffs[1:] = [pywt.threshold(i, value=uthresh, mode='soft') for i in coeffs[1:]] | |
return pywt.waverec(coeffs, wavelet, mode='per') | |
# Function to apply a Wiener filter for noise reduction | |
def apply_wiener_filter(audio): | |
return wiener(audio) | |
# Function to handle speech recognition | |
def recognize_speech(audio_file): | |
audio, sr = librosa.load(audio_file, sr=16000) | |
audio = high_pass_filter(audio, sr) | |
audio = apply_wiener_filter(audio) | |
denoised_audio = wavelet_denoise(audio) | |
result = asr_model(denoised_audio) | |
text_value = result['text'] | |
cleaned_text = text_value.replace("<s>", "") | |
print(cleaned_text) | |
converted_to_list = convert_to_list(cleaned_text, text_to_list()) | |
print(converted_to_list) | |
processed_doubles = process_doubles(converted_to_list) | |
print(processed_doubles) | |
replaced_words = replace_words(processed_doubles) | |
print(replaced_words) | |
converted_text = text_to_int(replaced_words) | |
print(converted_text) | |
return converted_text | |
# Gradio Interface | |
gr.Interface( | |
fn=recognize_speech, | |
inputs=gr.Audio(sources=["microphone","upload"], type="filepath"), | |
outputs="text", | |
title="Speech Recognition with Advanced Noise Reduction & Hindi ASR", | |
description="Upload an audio file, and the system will use high-pass filtering, Wiener filtering, and wavelet-based denoising, then a Hindi ASR model will transcribe the clean audio." | |
).launch() | |
# In[ ]: | |