Spaces:
Sleeping
Sleeping
catiR
commited on
Commit
·
1e483fc
1
Parent(s):
009ee74
data + demo
Browse files- .gitignore +2 -0
- Data/133_Annotated_Vowel_Lengths.pdf +0 -0
- Data/Length_in_spoken_icelandic.json +0 -0
- Data/Length_in_spoken_icelandic.tsv +0 -0
- README.md +20 -1
- app.py +204 -0
- requirements.txt +2 -0
- vowel_length.py +207 -0
.gitignore
CHANGED
@@ -169,3 +169,5 @@ cython_debug/
|
|
169 |
|
170 |
# PyPI configuration file
|
171 |
.pypirc
|
|
|
|
|
|
169 |
|
170 |
# PyPI configuration file
|
171 |
.pypirc
|
172 |
+
|
173 |
+
**/.DS_Store
|
Data/133_Annotated_Vowel_Lengths.pdf
ADDED
Binary file (166 kB). View file
|
|
Data/Length_in_spoken_icelandic.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Data/Length_in_spoken_icelandic.tsv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
README.md
CHANGED
@@ -1 +1,20 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Length contrasts in spoken Icelandic
|
3 |
+
emoji: 📊
|
4 |
+
colorFrom: gray
|
5 |
+
colorTo: green
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 5.15.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
---
|
11 |
+
|
12 |
+
## Assessed and Annotated Vowel Lengths in Spoken Icelandic Sentences for L1 and L2 Speakers: A Resource for Pronunciation Training
|
13 |
+
|
14 |
+
#### NoDaLiDa/Baltic-HLT 2025, Tallinn, Estonia
|
15 |
+
Authors: Caitlin Laura Richter, Kolbrún Friðriksdóttir, Kormákur Logi
|
16 |
+
Bergsson, Erik Anders Maher, Ragnheiður María Benediktsdóttir, Jon
|
17 |
+
Gudnason
|
18 |
+
|
19 |
+
### Get [the paper](https://github.com/catiR/length-contrast-data-isl/blob/main/Data/133_Annotated_Vowel_Lengths.pdf) and annotations from the Data directory,
|
20 |
+
### or [see the demo](https://huggingface.co/spaces/clr/length-contrast-data-isl)
|
app.py
ADDED
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import vowel_length as vln
|
3 |
+
|
4 |
+
|
5 |
+
|
6 |
+
|
7 |
+
annotation_json = 'Data/Length_in_spoken_icelandic.json'
|
8 |
+
|
9 |
+
menus, vdata = vln.setup(annotation_json)
|
10 |
+
|
11 |
+
|
12 |
+
grouplist = [g for g,ws in menus]
|
13 |
+
worddict = {g:ws for g,ws in menus}
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
|
18 |
+
def get_group_words(group):
|
19 |
+
if group == '[NONE]':
|
20 |
+
choices = ['[NONE]']
|
21 |
+
else:
|
22 |
+
choices = [ '[ALL]' ] + [n for n,v in worddict[group]]
|
23 |
+
return gr.Dropdown(choices = choices, value = choices[0], interactive=True)
|
24 |
+
|
25 |
+
def check_word_langs(word,cur_lang):
|
26 |
+
if ' [L' not in word:
|
27 |
+
return gr.Radio(value=cur_lang,interactive=True)
|
28 |
+
elif ' [L1]' in word:
|
29 |
+
return gr.Radio(value='L1',interactive=False)
|
30 |
+
else:
|
31 |
+
return gr.Radio(value='L2',interactive=False)
|
32 |
+
|
33 |
+
|
34 |
+
|
35 |
+
def subset_words_spks(g,w,l,s,wsets,db):
|
36 |
+
if w == '[ALL]':
|
37 |
+
swords = [v for n,v in wsets[g]]
|
38 |
+
labl = g
|
39 |
+
else:
|
40 |
+
labl = w.split(' ')[0]
|
41 |
+
swords = [labl]
|
42 |
+
|
43 |
+
if l == 'All':
|
44 |
+
slang = ['L1', 'L2']
|
45 |
+
labl += f'\n L1+L2, '
|
46 |
+
else:
|
47 |
+
slang = [l]
|
48 |
+
labl += f'\n {l}, '
|
49 |
+
|
50 |
+
labl += f'{s}'
|
51 |
+
|
52 |
+
db1 = db.copy()
|
53 |
+
db1 = db1.loc[ (db1['speaker_lang'].isin(slang)) & (db1['word'].isin(swords)) ]
|
54 |
+
db1.reset_index()
|
55 |
+
|
56 |
+
if s.lower() == 'mfa':
|
57 |
+
src = 'mfa'
|
58 |
+
else:
|
59 |
+
assert s[:3].lower() == 'ann'
|
60 |
+
src = 'gold'
|
61 |
+
|
62 |
+
return db1, src, labl
|
63 |
+
|
64 |
+
|
65 |
+
|
66 |
+
def plott(g1,w1,l1,s1,g2,w2,l2,s2):
|
67 |
+
|
68 |
+
dat1,src1,lab1 = subset_words_spks(g1,w1,l1,s1,worddict,vdata)
|
69 |
+
|
70 |
+
if '[NONE]' in [g2, w2]:
|
71 |
+
dat2, l2, src2, lab2 = None, None, None, None
|
72 |
+
else:
|
73 |
+
dat2,src2,lab2 = subset_words_spks(g2,w2,l2,s2,worddict,vdata)
|
74 |
+
|
75 |
+
fig = vln.vgraph(dat1,l1,src1,lab1,dat2,l2,src2,lab2)
|
76 |
+
|
77 |
+
return fig
|
78 |
+
|
79 |
+
|
80 |
+
|
81 |
+
bl = gr.Blocks(theme=gr.themes.Glass())
|
82 |
+
|
83 |
+
with bl:
|
84 |
+
|
85 |
+
with gr.Tabs():
|
86 |
+
|
87 |
+
with gr.TabItem("Vowel quantity"):
|
88 |
+
|
89 |
+
with gr.Row():
|
90 |
+
with gr.Column():
|
91 |
+
gr.Markdown(
|
92 |
+
"""
|
93 |
+
#### Select data (1)
|
94 |
+
"""
|
95 |
+
)
|
96 |
+
gmenu1 = gr.Dropdown(choices=grouplist,label="Group", value='AL:')
|
97 |
+
wmenu1 = gr.Dropdown(label="Word", choices=['[ALL]'] + [n for n,v in worddict['AL:']])
|
98 |
+
lmenu1 = gr.Radio(["L1", "L2","All"],label="Speaker group",value="L1")
|
99 |
+
smenu1 = gr.Dropdown(["Annotated", "MFA"],label="Source",value="Annotated")
|
100 |
+
|
101 |
+
gmenu1.change(get_group_words,inputs=[gmenu1],outputs = [wmenu1])
|
102 |
+
wmenu1.input(check_word_langs,inputs=[wmenu1,lmenu1],outputs = [lmenu1])
|
103 |
+
|
104 |
+
|
105 |
+
with gr.Column():
|
106 |
+
gr.Markdown(
|
107 |
+
"""
|
108 |
+
#### Select data (2)
|
109 |
+
"""
|
110 |
+
)
|
111 |
+
gmenu2 = gr.Dropdown(choices=['[NONE]'] + grouplist,label="Group", value='[NONE]')
|
112 |
+
wmenu2 = gr.Dropdown(label="Word", choices=['[NONE]'])
|
113 |
+
lmenu2 = gr.Radio(choices=["L1", "L2","All"],label="Speaker group",value="L1")
|
114 |
+
smenu2 = gr.Dropdown(["Annotated", "MFA"],label="Source",value="Annotated")
|
115 |
+
|
116 |
+
gmenu2.change(get_group_words,inputs=[gmenu2],outputs = [wmenu2])
|
117 |
+
wmenu2.input(check_word_langs,inputs=[wmenu2,lmenu2],outputs = [lmenu2])
|
118 |
+
|
119 |
+
|
120 |
+
btn = gr.Button(value="Update Plot")
|
121 |
+
plo = gr.Plot()
|
122 |
+
btn.click(plott, [gmenu1,wmenu1,lmenu1,smenu1,gmenu2,wmenu2,lmenu2,smenu2], plo)
|
123 |
+
|
124 |
+
|
125 |
+
|
126 |
+
|
127 |
+
gr.Markdown(
|
128 |
+
"""
|
129 |
+
# Long and short Icelandic vowels
|
130 |
+
Check the About tab for more info about the project.
|
131 |
+
"""
|
132 |
+
)
|
133 |
+
|
134 |
+
|
135 |
+
with gr.TabItem("About"):
|
136 |
+
gr.Markdown(
|
137 |
+
"""
|
138 |
+
## Assessed and Annotated Vowel Lengths in Spoken Icelandic Sentences\
|
139 |
+
for L1 and L2 Speakers: A Resource for Pronunciation Training
|
140 |
+
"""
|
141 |
+
)
|
142 |
+
|
143 |
+
gr.Markdown(
|
144 |
+
"""
|
145 |
+
## Demo: Viewing the data
|
146 |
+
Use the menus to choose words, speaker group, and data source.
|
147 |
+
Words are split into related groups and either the whole group or a single word can be selected.
|
148 |
+
Available speaker groups are native Icelandic speakers (L1), second-language speakers (L2), or all.
|
149 |
+
Data source options are gold (human) annotations or automated Montreal Forced Aligner (MFA).
|
150 |
+
|
151 |
+
The general expectation is that, all else being equal, syllables with long stressed vowels
|
152 |
+
followed by short consonants have a higher vowel:(vowel+consonant) duration ratio,
|
153 |
+
while syllables with short stressed vowels followed by long consonants have a lower ratio.
|
154 |
+
|
155 |
+
Many other factors also affect relative durations in any particular recorded token,
|
156 |
+
and these factors have considerable - not necessarily balanced - variation throughout this dataset.
|
157 |
+
This demo is provided to begin exploring the data and suggest hypotheses for follow-up.
|
158 |
+
See Pind 1999, 'Speech segment durations and quantity in Icelandic'
|
159 |
+
(J. Acoustical Society of America, 106(2)) for a review of the acoustics of Icelandic vowel duration.
|
160 |
+
"""
|
161 |
+
)
|
162 |
+
|
163 |
+
|
164 |
+
|
165 |
+
gr.Markdown(
|
166 |
+
"""
|
167 |
+
## Accessing the data
|
168 |
+
|
169 |
+
Annotations can be downloaded as
|
170 |
+
[json](https://github.com/catiR/length-contrast-data-isl/blob/main/Data/Length_in_spoken_icelandic.json)
|
171 |
+
or [tsv](https://github.com/catiR/length-contrast-data-isl/blob/main/Data/Length_in_spoken_icelandic.tsv) files.
|
172 |
+
See [the paper](https://github.com/catiR/length-contrast-data-isl/blob/main/Data/133_Annotated_Vowel_Lengths.pdf)
|
173 |
+
for complete information.
|
174 |
+
|
175 |
+
Audio is available from [Clarin](https://repository.clarin.is/repository/xmlui/) (Samrómur).
|
176 |
+
The 'collection' field plus recording filename in the annotations metadata
|
177 |
+
specify the original audio file, including which Samrómur collection it is found in.
|
178 |
+
"""
|
179 |
+
)
|
180 |
+
|
181 |
+
|
182 |
+
gr.Markdown(
|
183 |
+
"""
|
184 |
+
### About
|
185 |
+
|
186 |
+
This annotated data and its demo application accompany the paper
|
187 |
+
*Assessed and Annotated Vowel Lengths in Spoken Icelandic Sentences\
|
188 |
+
for L1 and L2 Speakers: A Resource for Pronunciation Training*, \
|
189 |
+
Caitlin Laura Richter, Kolbrún Friðriksdóttir, Kormákur Logi Bergsson, \
|
190 |
+
Erik Anders Maher, Ragnheiður María Benediktsdóttir, Jon Gudnason - NoDaLiDa/Baltic-HLT 2025, Tallinn, Estonia.
|
191 |
+
|
192 |
+
|
193 |
+
### Contact [email protected] about bugs, feedback, or collaboration!
|
194 |
+
|
195 |
+
"""
|
196 |
+
)
|
197 |
+
|
198 |
+
|
199 |
+
|
200 |
+
|
201 |
+
if __name__ == "__main__":
|
202 |
+
bl.launch()
|
203 |
+
|
204 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
scipy
|
2 |
+
matplotlib
|
vowel_length.py
ADDED
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os, json
|
2 |
+
import numpy as np
|
3 |
+
from collections import defaultdict
|
4 |
+
import pandas as pd
|
5 |
+
import matplotlib
|
6 |
+
matplotlib.use('Agg')
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
|
9 |
+
|
10 |
+
# make subsets of words for convenience
|
11 |
+
def make_sets(db,shorts,longs):
|
12 |
+
|
13 |
+
def _wspec(wd,l1,l2):
|
14 |
+
if (wd in l1) and (wd in l2):
|
15 |
+
return(wd,wd)
|
16 |
+
elif wd in l1:
|
17 |
+
return(f'{wd} [L1]',wd)
|
18 |
+
elif wd in l2:
|
19 |
+
return(f'{wd} [L2]',wd)
|
20 |
+
else:
|
21 |
+
return ('','')
|
22 |
+
|
23 |
+
def _ksrt(k):
|
24 |
+
if ' ' in k:
|
25 |
+
return((k[0],1/len(k)))
|
26 |
+
else:
|
27 |
+
return (k.replace(':',''),k[-1] )
|
28 |
+
|
29 |
+
words = set([(t['word'],t['speaker_lang']) for t in db])
|
30 |
+
l1 = [w for w,l in words if l == 'L1']
|
31 |
+
l2 = [w for w,l in words if l == 'L2']
|
32 |
+
words = set([w for w,l in words])
|
33 |
+
|
34 |
+
wdict = defaultdict(list)
|
35 |
+
for w in words:
|
36 |
+
if 'agg' in w:
|
37 |
+
wdict['AG:'].append(_wspec(w,l1,l2))
|
38 |
+
elif 'all' in w:
|
39 |
+
wdict['AL:'].append(_wspec(w,l1,l2))
|
40 |
+
elif 'egg' in w:
|
41 |
+
wdict['EG:'].append(_wspec(w,l1,l2))
|
42 |
+
elif 'eki' in w:
|
43 |
+
wdict['E:G'].append(_wspec(w,l1,l2))
|
44 |
+
elif 'aki' in w:
|
45 |
+
wdict['A:G'].append(_wspec(w,l1,l2))
|
46 |
+
elif 'ala' in w:
|
47 |
+
wdict['A:L'].append(_wspec(w,l1,l2))
|
48 |
+
elif w in shorts:
|
49 |
+
wdict['OTHER - SHORT'].append(_wspec(w,l1,l2))
|
50 |
+
elif w in longs:
|
51 |
+
wdict['OTHER - LONG'].append(_wspec(w,l1,l2))
|
52 |
+
else:
|
53 |
+
print(f'something should not have happened: {w}')
|
54 |
+
|
55 |
+
|
56 |
+
sets = [(k, sorted(wdict[k])) for k in sorted(list(wdict.keys()),key = _ksrt)]
|
57 |
+
|
58 |
+
return sets
|
59 |
+
|
60 |
+
|
61 |
+
# compile data for a token record
|
62 |
+
def get_tk_data(tk,shorts,longs):
|
63 |
+
|
64 |
+
# merge intervals
|
65 |
+
# from list of phones
|
66 |
+
# to word part
|
67 |
+
def _merge_intervals(plist):
|
68 |
+
if not plist:
|
69 |
+
return np.nan
|
70 |
+
tot_start, tot_end = plist[0]['start'],plist[-1]['end']
|
71 |
+
tot_dur = tot_end-tot_start
|
72 |
+
return tot_dur
|
73 |
+
|
74 |
+
tkdat = {}
|
75 |
+
tkdat['word'] = tk['word']
|
76 |
+
tkdat['speaker_lang'] = tk['speaker_lang']
|
77 |
+
tkdat['n_pre_phone'] = len(tk['gold_annotation']['prevowel'])
|
78 |
+
tkdat['n_post_phone'] = len(tk['gold_annotation']['postvowel'])
|
79 |
+
|
80 |
+
if tk['word'] in longs:
|
81 |
+
tkdat['vlen'] = 1
|
82 |
+
else:
|
83 |
+
assert tk['word'] in shorts
|
84 |
+
tkdat['vlen'] = 0
|
85 |
+
|
86 |
+
for s in ['gold','mfa']:
|
87 |
+
tkdat[f'{s}_pre_dur'] = _merge_intervals(tk[f'{s}_annotation']['prevowel'])
|
88 |
+
tkdat[f'{s}_v_dur'] = _merge_intervals(tk[f'{s}_annotation']['vowel'])
|
89 |
+
tkdat[f'{s}_post_dur'] = _merge_intervals(tk[f'{s}_annotation']['postvowel'])
|
90 |
+
tkdat[f'{s}_word_dur'] = tk[f'{s}_annotation']['target_word_end'] -\
|
91 |
+
tk[f'{s}_annotation']['target_word_start']
|
92 |
+
|
93 |
+
return tkdat
|
94 |
+
|
95 |
+
|
96 |
+
# code short vowels 0, long 1
|
97 |
+
def prep_dat(d):
|
98 |
+
df = d.copy()
|
99 |
+
for s in ['gold','mfa']:
|
100 |
+
df[f'{s}_ratio'] = df[f'{s}_v_dur'] / (df[f'{s}_v_dur']+df[f'{s}_post_dur'])
|
101 |
+
df[f'{s}_pre_dur'] = df[f'{s}_pre_dur'].fillna(0) # set absent onsets dur zero
|
102 |
+
df = df.convert_dtypes()
|
103 |
+
return df
|
104 |
+
|
105 |
+
|
106 |
+
def setup(annot_json):
|
107 |
+
|
108 |
+
longs = set(['aki', 'ala', 'baki', 'bera', 'betri', 'blaki', 'breki',
|
109 |
+
'brosir', 'dala', 'dreki', 'dvala', 'fala', 'fara', 'færa',
|
110 |
+
'færi', 'gala', 'hausinn', 'jónas', 'katrín', 'kisa', 'koma',
|
111 |
+
'leki', 'leyfa', 'maki', 'muna', 'nema', 'raki', 'sama',
|
112 |
+
'speki', 'svala', 'sækja', 'sömu', 'taki', 'tala', 'tvisvar',
|
113 |
+
'vala', 'veki', 'vinur', 'ása', 'þaki'])
|
114 |
+
|
115 |
+
shorts = set(['aggi', 'baggi', 'balla', 'beggi', 'eggi', 'farðu', 'fossinn',
|
116 |
+
'færði', 'galla', 'hausnum', 'herra', 'jónsson', 'kaggi', 'kalla',
|
117 |
+
'lalla', 'leggi', 'leyfðu', 'maggi', 'malla', 'mamma', 'missa',
|
118 |
+
'mömmu', 'nærri', 'palla', 'raggi', 'skeggi', 'snemma', 'sunna',
|
119 |
+
'tommi', 'veggi','vinnur', 'ásta'])
|
120 |
+
|
121 |
+
with open(annot_json, 'r') as handle:
|
122 |
+
db = json.load(handle)
|
123 |
+
|
124 |
+
sets = make_sets(db,shorts,longs)
|
125 |
+
|
126 |
+
db = [get_tk_data(tk,shorts,longs) for tk in db]
|
127 |
+
dat = pd.DataFrame.from_records(db)
|
128 |
+
dat = prep_dat(dat)
|
129 |
+
|
130 |
+
return sets,dat
|
131 |
+
|
132 |
+
|
133 |
+
|
134 |
+
def vgraph(dat1,l1,src1,lab1,dat2,l2,src2,lab2):
|
135 |
+
|
136 |
+
def _gprep(df,l,s):
|
137 |
+
|
138 |
+
# color by length + speaker group
|
139 |
+
ccs = { "lAll" : (0.0, 0.749, 1.0),
|
140 |
+
"lL1" : (0.122, 0.467, 0.706),
|
141 |
+
"lL2" : (0.282, 0.82, 0.8),
|
142 |
+
"sAll" :(0.89, 0.467, 0.761),
|
143 |
+
"sL1" : (0.863, 0.078, 0.235),
|
144 |
+
"sL2" : (0.859, 0.439, 0.576),
|
145 |
+
"xAll" : (0.988, 0.69, 0.004),
|
146 |
+
"xL1" : (0.984, 0.49, 0.027),
|
147 |
+
"xL2" : (0.969, 0.835, 0.376)}
|
148 |
+
|
149 |
+
vdurs = np.array(df[f'{s}_v_dur'])*1000
|
150 |
+
cdurs = np.array(df[f'{s}_post_dur'])*1000
|
151 |
+
rto = np.mean(df[f'{s}_ratio'])
|
152 |
+
|
153 |
+
if sum(df['vlen']) == 0:
|
154 |
+
vl = 's'
|
155 |
+
elif sum(df['vlen']) == df.shape[0]:
|
156 |
+
vl = 'l'
|
157 |
+
else:
|
158 |
+
vl = 'x'
|
159 |
+
|
160 |
+
cc = ccs[f'{vl}{l}']
|
161 |
+
|
162 |
+
return vdurs, cdurs, rto, cc
|
163 |
+
|
164 |
+
|
165 |
+
vd1,cd1,ra1,cl1 = _gprep(dat1,l1,src1)
|
166 |
+
lab1 += f'\n Ratio: {ra1:.3f}'
|
167 |
+
if src1 == 'gold':
|
168 |
+
mk1 = '^'
|
169 |
+
else:
|
170 |
+
mk1 = '<'
|
171 |
+
|
172 |
+
|
173 |
+
fig, ax = plt.subplots(figsize=(9,7))
|
174 |
+
ax.set_xlim(0.0,350)
|
175 |
+
ax.set_ylim(0.0,350)
|
176 |
+
|
177 |
+
ax.scatter(vd1,cd1,marker = mk1, label = lab1,
|
178 |
+
c = [cl1 + (.7,)], edgecolors = [cl1] )
|
179 |
+
|
180 |
+
if lab2:
|
181 |
+
vd2,cd2,ra2,cl2 = _gprep(dat2,l2,src2)
|
182 |
+
lab2 += f'\n Ratio: {ra2:.3f}'
|
183 |
+
if src2 == 'gold':
|
184 |
+
mk2 = 'v'
|
185 |
+
else:
|
186 |
+
mk2 = '>'
|
187 |
+
ax.scatter(vd2,cd2, marker = mk2, label = lab2,
|
188 |
+
c = [cl2 + (.05,)], edgecolors = [cl2] )
|
189 |
+
|
190 |
+
|
191 |
+
ax.set_title("Stressed vowel & following consonant(s) duration" )
|
192 |
+
ax.set_xlabel("Vowel duration (ms)")
|
193 |
+
ax.set_ylabel("Consonant duration (ms)")
|
194 |
+
#fig.legend(loc=8,ncols=2)
|
195 |
+
fig.legend(loc=7)
|
196 |
+
|
197 |
+
ax.axline((0,0),slope=1,color="darkgray")
|
198 |
+
|
199 |
+
fig.tight_layout()
|
200 |
+
#fig.subplots_adjust(bottom=0.15)
|
201 |
+
fig.subplots_adjust(right=0.75)
|
202 |
+
|
203 |
+
#plt.xticks(ticks=[50,100,150,200,250,300],labels=[])
|
204 |
+
#plt.yticks(ticks=[100,200,300],labels=[])
|
205 |
+
|
206 |
+
return fig
|
207 |
+
|