Spaces:

peproject
/

pronounciationevaluation

Runtime error

App Files Files Community

bel32123 commited on Nov 1, 2023

Commit

13375b8

1 Parent(s): 6acf275

Add prompt generation feature and articulation videos lookup

Browse files

Files changed (1) hide show

app.py +100 -5

app.py CHANGED Viewed

@@ -3,8 +3,14 @@ from speechbrain.pretrained import GraphemeToPhoneme
 import os
 import torchaudio
 from wav2vecasr.MispronounciationDetector import MispronounciationDetector
-from wav2vecasr.PhonemeASRModel import Wav2Vec2PhonemeASRModel, Wav2Vec2OptimisedPhonemeASRModel, MultitaskPhonemeASRModel
-import torch
 @st.cache_resource
 def load_model():
@@ -34,6 +40,46 @@ def get_audio(saved_sound_filename):
     audio = audio.view(audio.shape[1])
     return audio
 def mispronounciation_detection_section():
     st.write('# Prediction')
     st.write('1. Upload a recording of you saying the text in .wav format')
@@ -52,11 +98,13 @@ def mispronounciation_detection_section():
             # load model
             mispronunciation_detector = load_model()
-            # start prediction
             st.write('# Detection Results')
             with st.spinner('Predicting...'):
                 raw_info = mispronunciation_detector.detect(audio, text, phoneme_error_threshold=0.25)
                 st.write('#### Phoneme Level Analysis')
                 st.write(f"Phoneme Error Rate: {round(raw_info['per'],2)}")
                 st.markdown(
@@ -76,9 +124,13 @@ def mispronounciation_detection_section():
                 )
                 st.divider()
                 md = []
                 for word, has_error in zip(raw_info["words"], raw_info["word_errors"]):
                     if has_error:
                         md.append(f"**{word}**")
                     else:
                         md.append(word)
@@ -86,19 +138,62 @@ def mispronounciation_detection_section():
                 st.write('#### Word Level Analysis')
                 st.write(f"Word Error Rate: {round(raw_info['wer'], 2)} and the following words in bold have errors:")
                 st.markdown(" ".join(md))
         else:
             st.error('The audio or text has not been properly input', icon="🚨")
     return
 if __name__ == '__main__':
     st.write('___')
     # create a sidebar
     st.sidebar.title('Pronounciation Evaluation')
-    select = st.sidebar.selectbox('', ['Main Page', 'Mispronounciation Detection'], key='1', label_visibility='collapsed')
     st.sidebar.write(select)
     if select=='Mispronounciation Detection':
         mispronounciation_detection_section()
-    # else: stay on the home page
     else:
         st.write('# Pronounciation Evaluation')
         st.write('This app is designed to detect mispronounciation of English words for English learners from Asian countries like Korean, Mandarin and Vietnameses.')

 import os
 import torchaudio
 from wav2vecasr.MispronounciationDetector import MispronounciationDetector
+from wav2vecasr.PhonemeASRModel import MultitaskPhonemeASRModel
+import json
+import os
+import random
+import openai
+openai.api_key = os.getenv("OPENAI_KEY")
 @st.cache_resource
 def load_model():
     audio = audio.view(audio.shape[1])
     return audio
+@st.cache_data
+def get_prompts():
+    prompts_path = os.path.join(os.getcwd(), "wav2vecasr", "data", "prompts.json")
+    f = open(prompts_path)
+    data = json.load(f)
+    prompts = data["prompts"]
+    return prompts
+@st.cache_data
+def get_articulation_videos():
+    # note -- not all arpabets could be mapped to a video with visualisation on articulation
+    path = os.path.join(os.getcwd(), "wav2vecasr", "data", "videos.json")
+    f = open(path)
+    data = json.load(f)
+    return data
+def get_prompts_from_l2_arctic(prompts, current_prompt, num_to_get):
+    selected_prompts = []
+    while len(selected_prompts) < num_to_get:
+        prompt = random.choice(prompts)
+        if prompt not in selected_prompts and prompt != current_prompt:
+            selected_prompts.append(prompt)
+    return selected_prompts
+def get_prompt_from_openai(words_with_error_list):
+    try:
+        words_with_errors = ", ".join(words_with_error_list)
+        response = openai.ChatCompletion.create(
+          model="gpt-3.5-turbo",
+          messages=[
+            {"role": "system", "content": "You are writing practice reading prompts for learners of English to practice pronunciation. These prompts should be short, easy to understand and useful."},
+            {"role": "user", "content": f"Write a short sentence of less than 10 words and include the following words in the sentence: {words_with_errors} No numbers."}
+          ]
+        )
+        return response['choices'][0]['message']['content']
+    except:
+        return ""
 def mispronounciation_detection_section():
     st.write('# Prediction')
     st.write('1. Upload a recording of you saying the text in .wav format')
             # load model
             mispronunciation_detector = load_model()
             st.write('# Detection Results')
             with st.spinner('Predicting...'):
+                # detect
                 raw_info = mispronunciation_detector.detect(audio, text, phoneme_error_threshold=0.25)
+                # display prediction results for phonemes
                 st.write('#### Phoneme Level Analysis')
                 st.write(f"Phoneme Error Rate: {round(raw_info['per'],2)}")
                 st.markdown(
                 )
                 st.divider()
+                # display word errors
                 md = []
+                words_with_errors = []
                 for word, has_error in zip(raw_info["words"], raw_info["word_errors"]):
                     if has_error:
+                        words_with_errors.append(word)
                         md.append(f"**{word}**")
                     else:
                         md.append(word)
                 st.write('#### Word Level Analysis')
                 st.write(f"Word Error Rate: {round(raw_info['wer'], 2)} and the following words in bold have errors:")
                 st.markdown(" ".join(md))
+                st.divider()
+                # display more prompts to practice -- 1 from ChatGPT -- based on user's mistakes, 2 from L2 Arctic
+                st.write('#### What is next?')
+                st.write('Here are some more prompts for you to practice:')
+                selected_prompts = []
+                unique_words_with_errors = list(set(words_with_errors))
+                prompt_for_mistakes_made = get_prompt_from_openai(unique_words_with_errors)
+                if prompt_for_mistakes_made:
+                    selected_prompts.append(prompt_for_mistakes_made)
+                prompts = get_prompts()
+                l2_arctic_prompts = get_prompts_from_l2_arctic(prompts, text, 3-len(selected_prompts))
+                selected_prompts.extend(l2_arctic_prompts)
+                for prompt in selected_prompts:
+                    st.code(f'''{prompt}''', language="python")
         else:
             st.error('The audio or text has not been properly input', icon="🚨")
     return
+def video_section():
+    st.write('# Get helpful videos on phoneme articulation')
+    problem_phoneme = st.text_input(
+        "Enter the phoneme you had problems with 👇"
+    )
+    arpabet_to_video_map = get_articulation_videos()
+    if st.button('Look up'):
+        if not problem_phoneme:
+            st.error('The audio or text has not been properly input', icon="🚨")
+        elif problem_phoneme in arpabet_to_video_map:
+            video_link = arpabet_to_video_map[problem_phoneme]["link"]
+            if video_link:
+                st.video(video_link)
+            else:
+                st.write("Sorry, we couldn't find a good enough video yet :(  we are working on it!")
 if __name__ == '__main__':
     st.write('___')
     # create a sidebar
     st.sidebar.title('Pronounciation Evaluation')
+    select = st.sidebar.selectbox('', ['Main Page', 'Mispronounciation Detection', 'Helpful Videos for Problem Phonemes'], key='1', label_visibility='collapsed')
     st.sidebar.write(select)
     if select=='Mispronounciation Detection':
         mispronounciation_detection_section()
+    elif select=="Helpful Videos for Problem Phonemes":
+        video_section()
     else:
         st.write('# Pronounciation Evaluation')
         st.write('This app is designed to detect mispronounciation of English words for English learners from Asian countries like Korean, Mandarin and Vietnameses.')