Update app.py
Browse files
app.py
CHANGED
|
@@ -53,20 +53,11 @@ def f1(langname,lang_aligner):
|
|
| 53 |
elif langname =="Faroese":
|
| 54 |
ds = datas.ds_f
|
| 55 |
|
| 56 |
-
|
| 57 |
-
#fig = plt.figure(figsize=(10,4))
|
| 58 |
-
#plt.axline((0,0),slope=1,color="darkgray")
|
| 59 |
-
#plt.xlabel("Vowel length (ms)")
|
| 60 |
-
#plt.ylabel("Consonant length (ms)")
|
| 61 |
-
|
| 62 |
-
|
| 63 |
maxdat=len(ds)
|
| 64 |
|
| 65 |
ds = ds.select([random.randint(maxdat-1)])
|
| 66 |
-
#print([th for th in ds.sample()])
|
| 67 |
sound_path = ds['audio'][0]['path'] # audio 0 array is the audio data itself
|
| 68 |
transcript = ds['normalized_text'][0]
|
| 69 |
-
#print('PLACE A:',lang_aligner)
|
| 70 |
return (graph.align_and_graph(sound_path,transcript,lang_aligner),sound_path)
|
| 71 |
|
| 72 |
|
|
@@ -74,7 +65,19 @@ bl = gr.Blocks()
|
|
| 74 |
|
| 75 |
with bl:
|
| 76 |
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
align_func = gr.State()#value=ctcalign.aligner(model_path="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h",model_word_separator = '|',model_blank_token = '[PAD]'))
|
| 80 |
|
|
@@ -84,8 +87,8 @@ with bl:
|
|
| 84 |
|
| 85 |
|
| 86 |
with gr.Row():
|
| 87 |
-
btn1 = gr.Button(value="
|
| 88 |
-
btn1.style(full_width=False
|
| 89 |
audio1 = gr.Audio(interactive=False)
|
| 90 |
|
| 91 |
pl1 = gr.Plot()
|
|
@@ -101,21 +104,20 @@ with bl:
|
|
| 101 |
gr.Markdown(
|
| 102 |
"""
|
| 103 |
# ABOUT
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
Icelandic uses the [samromur-asr](https://huggingface.co/datasets/language-and-voice-lab/samromur_asr) corpus, and Faroese uses [ravnursson-asr](https://huggingface.co/datasets/carlosdanielhernandezmena/ravnursson_asr).
|
| 107 |
-
|
| 108 |
-
After you select a language, a few example sentences from the corpus are displayed.
|
| 109 |
-
|
| 110 |
-
Click the button to view time-aligned prosody information for a random sentence - this could be any sentence, not only one of the ones shown above.
|
| 111 |
|
| 112 |
-
|
|
|
|
| 113 |
|
| 114 |
-
|
|
|
|
| 115 |
|
| 116 |
[ABOUT CTC ALIGNMENT - TODO]
|
| 117 |
|
| 118 |
-
|
|
|
|
|
|
|
| 119 |
"""
|
| 120 |
)
|
| 121 |
|
|
|
|
| 53 |
elif langname =="Faroese":
|
| 54 |
ds = datas.ds_f
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
maxdat=len(ds)
|
| 57 |
|
| 58 |
ds = ds.select([random.randint(maxdat-1)])
|
|
|
|
| 59 |
sound_path = ds['audio'][0]['path'] # audio 0 array is the audio data itself
|
| 60 |
transcript = ds['normalized_text'][0]
|
|
|
|
| 61 |
return (graph.align_and_graph(sound_path,transcript,lang_aligner),sound_path)
|
| 62 |
|
| 63 |
|
|
|
|
| 65 |
|
| 66 |
with bl:
|
| 67 |
|
| 68 |
+
with gr.Row():
|
| 69 |
+
gr.Markdown(
|
| 70 |
+
"""
|
| 71 |
+
# Demo under construction
|
| 72 |
+
## 1. Choose a language to load
|
| 73 |
+
## 2. See a small sample of the selected corpus
|
| 74 |
+
## 3. Click the button below to view time-aligned prosody information for a random example (from the whole corpus, not necessarily the shown sample)
|
| 75 |
+
|
| 76 |
+
Pitch is shown in dark blue and loudness is the light orange line.
|
| 77 |
+
The pitch estimation, and the time-alignment of words to audio, are completely automated and there will be some inaccuracy.
|
| 78 |
+
More information below.
|
| 79 |
+
""" )
|
| 80 |
+
lloadr = gr.Dropdown(["Faroese", "Icelandic"], label="Language")#, info="Loading the dataset takes some time")
|
| 81 |
|
| 82 |
align_func = gr.State()#value=ctcalign.aligner(model_path="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h",model_word_separator = '|',model_blank_token = '[PAD]'))
|
| 83 |
|
|
|
|
| 87 |
|
| 88 |
|
| 89 |
with gr.Row():
|
| 90 |
+
btn1 = gr.Button(value="CLICK HERE")
|
| 91 |
+
btn1.style(full_width=False)
|
| 92 |
audio1 = gr.Audio(interactive=False)
|
| 93 |
|
| 94 |
pl1 = gr.Plot()
|
|
|
|
| 104 |
gr.Markdown(
|
| 105 |
"""
|
| 106 |
# ABOUT
|
| 107 |
+
|
| 108 |
+
The Icelandic corpus is [samromur-asr](https://huggingface.co/datasets/language-and-voice-lab/samromur_asr), and Faroese uses [ravnursson-asr](https://huggingface.co/datasets/carlosdanielhernandezmena/ravnursson_asr).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
+
### Pitch tracking (F0 estimation)
|
| 111 |
+
Estimated pitch is shown in blue on the graphs, as tracked by [REAPER](https://github.com/google/REAPER).
|
| 112 |
|
| 113 |
+
### Intensity
|
| 114 |
+
The orange line is root mean squared energy, which reflects loudness and is also a good indication of syllable placement, as it should line up with vowels and similar sounds.
|
| 115 |
|
| 116 |
[ABOUT CTC ALIGNMENT - TODO]
|
| 117 |
|
| 118 |
+
This is a work-in-progress basic demo for automatic prosodic annotation in Faroese and Icelandic.
|
| 119 |
+
Contact [email protected] / https://github.com/catiR/ when things break, or with ideas/suggestions about how to apply this.
|
| 120 |
+
The source code is available under the Files tab at the top of the Space.
|
| 121 |
"""
|
| 122 |
)
|
| 123 |
|