Spaces:
Sleeping
Sleeping
import warnings | |
warnings.filterwarnings("ignore") | |
import librosa # Library for loading and processing audio files. | |
import numpy as np # Library for numerical computations, used for signal processing. | |
import gradio as gr # Library for creating a web-based user interface for inference. | |
from transformers import pipeline # Import pipeline for automatic speech recognition (ASR). | |
# Importing custom utility functions for text processing. | |
from text2int import text_to_int # Converts text numbers (e.g., "one") into integers (e.g., 1). | |
from Text2List import text_to_list # Converts a text string into a list of words. | |
from convert2list import convert_to_list # Converts processed text into a structured list. | |
from processDoubles import process_doubles # Handles repeated words or numbers in speech recognition output. | |
from replaceWords import replace_words # Replaces specific words in the recognized text with alternatives. | |
from highPassFilter import high_pass_filter # filter noise by bypassing high frequency signals. | |
from waveletDenoise import wavelet_denoise # used for signal Denoising. | |
from applyWienerFilter import apply_wiener_filter # for Signal Denoising. | |
# Initialize ASR model pipeline | |
asr_model = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-punjabi") | |
# Function to handle speech recognition | |
def recognize_speech(audio_file): | |
audio, sr = librosa.load(audio_file, sr=16000) | |
audio = high_pass_filter(audio, sr) | |
audio = apply_wiener_filter(audio) | |
denoised_audio = wavelet_denoise(audio) | |
result = asr_model(denoised_audio) | |
text_value = result['text'] | |
cleaned_text = text_value.replace("[PAD]", "") | |
converted_to_list = convert_to_list(cleaned_text, text_to_list()) | |
processed_doubles = process_doubles(converted_to_list) | |
replaced_words = replace_words(processed_doubles) | |
converted_text = text_to_int(replaced_words) | |
return converted_text | |
def sel_lng(lng, mic=None, file=None): | |
if mic is not None: | |
audio = mic | |
elif file is not None: | |
audio = file | |
else: | |
return "You must either provide a mic recording or a file" | |
if lng == "model_1": | |
return recognize_speech(audio) | |
# Create a Gradio interface | |
demo = gr.Interface( | |
fn=sel_lng, | |
inputs=[ | |
gr.Dropdown(["model_1"], label="Select Model"), | |
gr.Audio(sources=["microphone", "upload"], type="filepath"), | |
], | |
outputs=["textbox"], | |
title="Automatic Speech Recognition" | |
) | |
demo.launch() | |