speech_recognition_tutorial

Sleeping

File size: 2,963 Bytes

import streamlit as st
from utils.levels import complete_level, render_page, initialize_level
from utils.login import initialize_login, get_login
import requests
import os
from audio_recorder_streamlit import audio_recorder


def check_sentence_similarity(source_sentence, sentence):
    response = requests.post(
        "https://api-inference.huggingface.co/models/sentence-transformers/all-MiniLM-L6-v2",
        headers={"Authorization": "Bearer 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"},
        json={
            "inputs": {"source_sentence": source_sentence, "sentences": [sentence]},
        },
    )
    return response.json()[0]


def get_audio_transcription(filename):
    with open(filename, "rb") as f:
        data = f.read()
    response = requests.post(
        "https://api-inference.huggingface.co/models/openai/whisper-medium",
        headers={"Authorization": "Bearer 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"},
        data=data,
    )
    return response.json()


LEVEL = 4

initialize_login()
initialize_level()


def step_page():
    st.header("Tryit Out")

    input_type = st.selectbox(
        "Input Type", ["Upload", "Record", "Sample", "Pilot 1", "Pilot 2"], index=2
    )
    audio_file = None
    if input_type == "Upload":
        uploaded_file = st.file_uploader("Upload a file", type=["wav", "mp3", "flac"])
        if uploaded_file:
            audio_file = os.path.join(
                ".sessions", get_login()["username"], "audio.flac"
            )
            with open(audio_file, "wb") as f:
                f.write(uploaded_file.getbuffer())
    elif input_type == "Record":
        audio_bytes = audio_recorder()
        if audio_bytes:
            audio_file = os.path.join(
                ".sessions", get_login()["username"], "audio.flac"
            )
            with open(audio_file, "wb") as f:
                f.write(audio_bytes)
    elif input_type == "Sample":
        audio_file = "assets/sample1.flac"
    elif input_type == "Pilot 1":
        audio_file = "assets/pilot1.flac"
    elif input_type == "Pilot 2":
        audio_file = "assets/pilot2.flac"

    if audio_file:
        st.audio(audio_file, format="audio/flac")

        transcript = st.text_input("What did you hear?")
        if st.button("Check") and transcript:
            with st.spinner("Checking..."):
                output = get_audio_transcription(audio_file)
            similarity = check_sentence_similarity(output["text"], transcript)
            if similarity > 0.75:
                st.success(
                    f"Correct! You are {similarity * 100}% similar to the original sentence."
                )
            else:
                st.error("You are not similar enough to the original sentence.")
                st.info(f"Original sentence: {output['text']}")

    st.info("Click on the button below to continue!")

    if st.button("Complete"):
        complete_level(LEVEL)


render_page(step_page, LEVEL)