Spaces:
Sleeping
Sleeping
File size: 2,536 Bytes
aeac508 4e0c03c fb9d859 4e0c03c fb9d859 4e0c03c fb9d859 4e0c03c fb9d859 4e0c03c aeac508 bb285c0 b9a73f5 aeac508 bb285c0 aeac508 eaab174 d6f3644 aeac508 bb285c0 aeac508 bb285c0 aeac508 bb285c0 aeac508 bb285c0 4e0c03c bb285c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import warnings
warnings.filterwarnings("ignore")
import librosa # Library for loading and processing audio files.
import numpy as np # Library for numerical computations, used for signal processing.
import gradio as gr # Library for creating a web-based user interface for inference.
from transformers import pipeline # Import pipeline for automatic speech recognition (ASR).
# Importing custom utility functions for text processing.
from text2int import text_to_int # Converts text numbers (e.g., "one") into integers (e.g., 1).
from Text2List import text_to_list # Converts a text string into a list of words.
from convert2list import convert_to_list # Converts processed text into a structured list.
from processDoubles import process_doubles # Handles repeated words or numbers in speech recognition output.
from replaceWords import replace_words # Replaces specific words in the recognized text with alternatives.
from highPassFilter import high_pass_filter # filter noise by bypassing high frequency signals.
from waveletDenoise import wavelet_denoise # used for signal Denoising.
from applyWienerFilter import apply_wiener_filter # for Signal Denoising.
# Initialize ASR model pipeline
asr_model = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-punjabi")
# Function to handle speech recognition
def recognize_speech(audio_file):
audio, sr = librosa.load(audio_file, sr=16000)
audio = high_pass_filter(audio, sr)
audio = apply_wiener_filter(audio)
denoised_audio = wavelet_denoise(audio)
result = asr_model(denoised_audio)
text_value = result['text']
cleaned_text = text_value.replace("[PAD]", "")
converted_to_list = convert_to_list(cleaned_text, text_to_list())
processed_doubles = process_doubles(converted_to_list)
replaced_words = replace_words(processed_doubles)
converted_text = text_to_int(replaced_words)
return converted_text
def sel_lng(lng, mic=None, file=None):
if mic is not None:
audio = mic
elif file is not None:
audio = file
else:
return "You must either provide a mic recording or a file"
if lng == "model_1":
return recognize_speech(audio)
# Create a Gradio interface
demo = gr.Interface(
fn=sel_lng,
inputs=[
gr.Dropdown(["model_1"], label="Select Model"),
gr.Audio(sources=["microphone", "upload"], type="filepath"),
],
outputs=["textbox"],
title="Automatic Speech Recognition"
)
demo.launch()
|