|
import streamlit as st |
|
import speech_recognition as sr |
|
from pocketsphinx import pocketsphinx, Jsgf, FsgModel |
|
import requests |
|
import os |
|
|
|
|
|
st.title("Speech to text recognition") |
|
|
|
|
|
|
|
audio = st.file_uploader(label = "Upload your audio file here in .wav format") |
|
|
|
|
|
|
|
text_filename = "./subfolder/text_file" |
|
|
|
|
|
language_model = './language-model.lm.bin' |
|
acoustic_model = './acoustic-model' |
|
pronunciation_dict = './pronounciation-dictionary.dict' |
|
|
|
@st.cache |
|
def model(audio, text_filename): |
|
framerate = 100 |
|
config = pocketsphinx.Config() |
|
config.set_string('-hmm', acoustic_model) |
|
config.set_string('-lm', language_model) |
|
config.set_string('-dict', pronunciation_dict) |
|
decoder = pocketsphinx.Decoder(config) |
|
|
|
def recognize_sphinx(audio, show_all=True): |
|
decoder.start_utt() |
|
decoder.process_raw(audio.get_raw_data(), False, True) |
|
decoder.end_utt() |
|
hypothesis = decoder.hyp() |
|
return decoder, hypothesis.hypstr |
|
|
|
|
|
r = sr.Recognizer() |
|
|
|
|
|
r.recognize_sphinx = recognize_sphinx |
|
|
|
with sr.AudioFile(audio) as source: |
|
audio = r.record(source) |
|
sample_rate = audio.sample_rate |
|
decoder, recognized_text = r.recognize_sphinx(audio, show_all=True) |
|
|
|
|
|
with open(text_filename, 'w') as text_file: |
|
for seg in decoder.seg(): |
|
segment_info = (seg.word, seg.start_frame/sample_rate, seg.end_frame/sample_rate) |
|
text_file.write(str(segment_info) + "\n") |
|
return recognized_text |
|
|
|
|
|
if audio is not None: |
|
with st.spinner("code is at Working! "): |
|
segment_info = model(audio, text_filename) |
|
st.write(segment_info) |
|
st.balloons() |
|
else: |
|
st.write("Upload an audio") |