Spaces:
Sleeping
Sleeping
catiR
commited on
Commit
路
ecd5f69
1
Parent(s):
bd7c83f
densities
Browse files- app.py +75 -14
- vowel_length.py +83 -14
app.py
CHANGED
@@ -78,7 +78,7 @@ def plott(g1,w1,l1,s1,g2,w2,l2,s2):
|
|
78 |
|
79 |
|
80 |
|
81 |
-
bl = gr.Blocks(theme=gr.themes.Glass())
|
82 |
|
83 |
with bl:
|
84 |
|
@@ -108,8 +108,8 @@ with bl:
|
|
108 |
#### Select data (2)
|
109 |
"""
|
110 |
)
|
111 |
-
gmenu2 = gr.Dropdown(choices=['[NONE]'] + grouplist,label="Group", value='
|
112 |
-
wmenu2 = gr.Dropdown(label="Word", choices=['[
|
113 |
lmenu2 = gr.Radio(choices=["L1", "L2","All"],label="Speaker group",value="L1")
|
114 |
smenu2 = gr.Dropdown(["Annotated", "MFA"],label="Source",value="Annotated")
|
115 |
|
@@ -118,7 +118,7 @@ with bl:
|
|
118 |
|
119 |
|
120 |
btn = gr.Button(value="Update Plot")
|
121 |
-
plo = gr.Plot()
|
122 |
btn.click(plott, [gmenu1,wmenu1,lmenu1,smenu1,gmenu2,wmenu2,lmenu2,smenu2], plo)
|
123 |
|
124 |
|
@@ -140,6 +140,21 @@ with bl:
|
|
140 |
"""
|
141 |
)
|
142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
gr.Markdown(
|
144 |
"""
|
145 |
## Demo: Viewing the data
|
@@ -148,6 +163,9 @@ with bl:
|
|
148 |
Available speaker groups are native Icelandic speakers (L1), second-language speakers (L2), or all.
|
149 |
Data source options are gold (human) annotations or automated Montreal Forced Aligner (MFA).
|
150 |
|
|
|
|
|
|
|
151 |
The general expectation is that, all else being equal, syllables with long stressed vowels
|
152 |
followed by short consonants have a higher vowel:(vowel+consonant) duration ratio,
|
153 |
while syllables with short stressed vowels followed by long consonants have a lower ratio.
|
@@ -161,7 +179,6 @@ with bl:
|
|
161 |
)
|
162 |
|
163 |
|
164 |
-
|
165 |
gr.Markdown(
|
166 |
"""
|
167 |
## Accessing the data
|
@@ -171,25 +188,69 @@ with bl:
|
|
171 |
or [tsv](https://github.com/catiR/length-contrast-data-isl/blob/main/Data/Length_in_spoken_icelandic.tsv) files.
|
172 |
See [the paper](https://github.com/catiR/length-contrast-data-isl/blob/main/Data/133_Annotated_Vowel_Lengths.pdf)
|
173 |
for complete information.
|
|
|
|
|
174 |
|
|
|
|
|
175 |
Audio is available from [Clarin](https://repository.clarin.is/repository/xmlui/) (Samr贸mur).
|
176 |
The 'collection' field plus recording filename in the annotations metadata
|
177 |
specify the original audio file, including which Samr贸mur collection it is found in.
|
178 |
"""
|
179 |
)
|
180 |
|
181 |
-
|
182 |
gr.Markdown(
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
|
192 |
|
|
|
|
|
|
|
|
|
193 |
### Contact [email protected] about bugs, feedback, or collaboration!
|
194 |
|
195 |
"""
|
|
|
78 |
|
79 |
|
80 |
|
81 |
+
bl = gr.Blocks()#theme=gr.themes.Glass())
|
82 |
|
83 |
with bl:
|
84 |
|
|
|
108 |
#### Select data (2)
|
109 |
"""
|
110 |
)
|
111 |
+
gmenu2 = gr.Dropdown(choices=['[NONE]'] + grouplist,label="Group", value='A:L')
|
112 |
+
wmenu2 = gr.Dropdown(label="Word", choices=['[ALL]'] + [n for n,v in worddict['A:L']])
|
113 |
lmenu2 = gr.Radio(choices=["L1", "L2","All"],label="Speaker group",value="L1")
|
114 |
smenu2 = gr.Dropdown(["Annotated", "MFA"],label="Source",value="Annotated")
|
115 |
|
|
|
118 |
|
119 |
|
120 |
btn = gr.Button(value="Update Plot")
|
121 |
+
plo = gr.Plot(value=plott('AL:','[ALL]',"L1","Annotated",'A:L','[ALL]',"L1","Annotated"))
|
122 |
btn.click(plott, [gmenu1,wmenu1,lmenu1,smenu1,gmenu2,wmenu2,lmenu2,smenu2], plo)
|
123 |
|
124 |
|
|
|
140 |
"""
|
141 |
)
|
142 |
|
143 |
+
|
144 |
+
gr.Markdown(
|
145 |
+
"""
|
146 |
+
### About
|
147 |
+
|
148 |
+
This annotated data and its demo application accompany the paper
|
149 |
+
*Assessed and Annotated Vowel Lengths in Spoken Icelandic Sentences\
|
150 |
+
for L1 and L2 Speakers: A Resource for Pronunciation Training*, \
|
151 |
+
Caitlin Laura Richter, Kolbr煤n Fri冒riksd贸ttir, Korm谩kur Logi Bergsson, \
|
152 |
+
Erik Anders Maher, Ragnhei冒ur Mar铆a Benediktsd贸ttir, Jon Gudnason - NoDaLiDa/Baltic-HLT 2025, Tallinn, Estonia.
|
153 |
+
|
154 |
+
|
155 |
+
"""
|
156 |
+
)
|
157 |
+
|
158 |
gr.Markdown(
|
159 |
"""
|
160 |
## Demo: Viewing the data
|
|
|
163 |
Available speaker groups are native Icelandic speakers (L1), second-language speakers (L2), or all.
|
164 |
Data source options are gold (human) annotations or automated Montreal Forced Aligner (MFA).
|
165 |
|
166 |
+
The display is a scatter plot of vowel and consonant durations,
|
167 |
+
supplemented with density plots for each dimension separately.
|
168 |
+
|
169 |
The general expectation is that, all else being equal, syllables with long stressed vowels
|
170 |
followed by short consonants have a higher vowel:(vowel+consonant) duration ratio,
|
171 |
while syllables with short stressed vowels followed by long consonants have a lower ratio.
|
|
|
179 |
)
|
180 |
|
181 |
|
|
|
182 |
gr.Markdown(
|
183 |
"""
|
184 |
## Accessing the data
|
|
|
188 |
or [tsv](https://github.com/catiR/length-contrast-data-isl/blob/main/Data/Length_in_spoken_icelandic.tsv) files.
|
189 |
See [the paper](https://github.com/catiR/length-contrast-data-isl/blob/main/Data/133_Annotated_Vowel_Lengths.pdf)
|
190 |
for complete information.
|
191 |
+
"""
|
192 |
+
)
|
193 |
|
194 |
+
gr.Markdown(
|
195 |
+
"""
|
196 |
Audio is available from [Clarin](https://repository.clarin.is/repository/xmlui/) (Samr贸mur).
|
197 |
The 'collection' field plus recording filename in the annotations metadata
|
198 |
specify the original audio file, including which Samr贸mur collection it is found in.
|
199 |
"""
|
200 |
)
|
201 |
|
|
|
202 |
gr.Markdown(
|
203 |
+
"""
|
204 |
+
Annotation records are in the following scheme:
|
205 |
+
|
206 |
+
```
|
207 |
+
[ { recording: source-file-id.wav,
|
208 |
+
collection: samromur-collection,
|
209 |
+
speaker_lang: L1/L2,
|
210 |
+
word: target-word,
|
211 |
+
word_context: {
|
212 |
+
normalised: normalised-carrier-sentence-text,
|
213 |
+
before: sentence-context-preceding-token,
|
214 |
+
after: sentence-context-following-token
|
215 |
+
},
|
216 |
+
gold_annotation: {
|
217 |
+
target_word_start: seconds,
|
218 |
+
target_word_end: seconds,
|
219 |
+
prevowel: [ {
|
220 |
+
phone: ipa-character,
|
221 |
+
start: seconds,
|
222 |
+
end: seconds,
|
223 |
+
},
|
224 |
+
{ phone2 ... } ,
|
225 |
+
],
|
226 |
+
vowel: [ {
|
227 |
+
phone: ipa-character,
|
228 |
+
start: seconds,
|
229 |
+
end: seconds,
|
230 |
+
},
|
231 |
+
],
|
232 |
+
postvowel: [ {
|
233 |
+
phone: ipa-character,
|
234 |
+
start: seconds,
|
235 |
+
end: seconds,
|
236 |
+
},
|
237 |
+
]
|
238 |
+
},
|
239 |
+
mfa_annotation : {
|
240 |
+
... as for gold ...
|
241 |
+
}
|
242 |
+
},
|
243 |
+
]
|
244 |
+
```
|
245 |
+
|
246 |
+
"""
|
247 |
+
)
|
248 |
|
249 |
|
250 |
+
|
251 |
+
|
252 |
+
gr.Markdown(
|
253 |
+
"""
|
254 |
### Contact [email protected] about bugs, feedback, or collaboration!
|
255 |
|
256 |
"""
|
vowel_length.py
CHANGED
@@ -5,7 +5,9 @@ import pandas as pd
|
|
5 |
import matplotlib
|
6 |
matplotlib.use('Agg')
|
7 |
import matplotlib.pyplot as plt
|
8 |
-
|
|
|
|
|
9 |
|
10 |
# make subsets of words for convenience
|
11 |
def make_sets(db,shorts,longs):
|
@@ -70,7 +72,7 @@ def get_tk_data(tk,shorts,longs):
|
|
70 |
tot_start, tot_end = plist[0]['start'],plist[-1]['end']
|
71 |
tot_dur = tot_end-tot_start
|
72 |
return tot_dur
|
73 |
-
|
74 |
tkdat = {}
|
75 |
tkdat['word'] = tk['word']
|
76 |
tkdat['speaker_lang'] = tk['speaker_lang']
|
@@ -89,7 +91,7 @@ def get_tk_data(tk,shorts,longs):
|
|
89 |
tkdat[f'{s}_post_dur'] = _merge_intervals(tk[f'{s}_annotation']['postvowel'])
|
90 |
tkdat[f'{s}_word_dur'] = tk[f'{s}_annotation']['target_word_end'] -\
|
91 |
tk[f'{s}_annotation']['target_word_start']
|
92 |
-
|
93 |
return tkdat
|
94 |
|
95 |
|
@@ -118,18 +120,44 @@ def setup(annot_json):
|
|
118 |
'm枚mmu', 'n忙rri', 'palla', 'raggi', 'skeggi', 'snemma', 'sunna',
|
119 |
'tommi', 'veggi','vinnur', '谩sta'])
|
120 |
|
|
|
|
|
|
|
121 |
with open(annot_json, 'r') as handle:
|
122 |
db = json.load(handle)
|
123 |
|
124 |
sets = make_sets(db,shorts,longs)
|
125 |
|
126 |
db = [get_tk_data(tk,shorts,longs) for tk in db]
|
|
|
127 |
dat = pd.DataFrame.from_records(db)
|
128 |
dat = prep_dat(dat)
|
129 |
|
130 |
return sets,dat
|
131 |
|
132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
def vgraph(dat1,l1,src1,lab1,dat2,l2,src2,lab2):
|
135 |
|
@@ -161,6 +189,7 @@ def vgraph(dat1,l1,src1,lab1,dat2,l2,src2,lab2):
|
|
161 |
|
162 |
return vdurs, cdurs, rto, cc
|
163 |
|
|
|
164 |
|
165 |
vd1,cd1,ra1,cl1 = _gprep(dat1,l1,src1)
|
166 |
lab1 += f'\n Ratio: {ra1:.3f}'
|
@@ -171,12 +200,16 @@ def vgraph(dat1,l1,src1,lab1,dat2,l2,src2,lab2):
|
|
171 |
|
172 |
|
173 |
fig, ax = plt.subplots(figsize=(9,7))
|
174 |
-
ax.set_xlim(0.0,350)
|
175 |
-
ax.set_ylim(0.0,350)
|
176 |
|
177 |
ax.scatter(vd1,cd1,marker = mk1, label = lab1,
|
178 |
c = [cl1 + (.7,)], edgecolors = [cl1] )
|
179 |
|
|
|
|
|
|
|
|
|
180 |
if lab2:
|
181 |
vd2,cd2,ra2,cl2 = _gprep(dat2,l2,src2)
|
182 |
lab2 += f'\n Ratio: {ra2:.3f}'
|
@@ -186,22 +219,58 @@ def vgraph(dat1,l1,src1,lab1,dat2,l2,src2,lab2):
|
|
186 |
mk2 = '>'
|
187 |
ax.scatter(vd2,cd2, marker = mk2, label = lab2,
|
188 |
c = [cl2 + (.05,)], edgecolors = [cl2] )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
|
|
|
190 |
|
191 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
ax.set_xlabel("Vowel duration (ms)")
|
193 |
ax.set_ylabel("Consonant duration (ms)")
|
194 |
-
|
195 |
-
fig.legend(loc=7)
|
196 |
-
|
197 |
-
ax.axline((0,0),slope=1,color="darkgray")
|
198 |
-
|
199 |
fig.tight_layout()
|
200 |
-
|
201 |
-
fig.subplots_adjust(right=0.
|
202 |
|
203 |
#plt.xticks(ticks=[50,100,150,200,250,300],labels=[])
|
204 |
#plt.yticks(ticks=[100,200,300],labels=[])
|
205 |
-
|
206 |
return fig
|
207 |
|
|
|
5 |
import matplotlib
|
6 |
matplotlib.use('Agg')
|
7 |
import matplotlib.pyplot as plt
|
8 |
+
from scipy.stats import gaussian_kde
|
9 |
+
#from scipy.spatial import KDTree
|
10 |
+
#from sklearn.neighbors import NearestNeighbors
|
11 |
|
12 |
# make subsets of words for convenience
|
13 |
def make_sets(db,shorts,longs):
|
|
|
72 |
tot_start, tot_end = plist[0]['start'],plist[-1]['end']
|
73 |
tot_dur = tot_end-tot_start
|
74 |
return tot_dur
|
75 |
+
|
76 |
tkdat = {}
|
77 |
tkdat['word'] = tk['word']
|
78 |
tkdat['speaker_lang'] = tk['speaker_lang']
|
|
|
91 |
tkdat[f'{s}_post_dur'] = _merge_intervals(tk[f'{s}_annotation']['postvowel'])
|
92 |
tkdat[f'{s}_word_dur'] = tk[f'{s}_annotation']['target_word_end'] -\
|
93 |
tk[f'{s}_annotation']['target_word_start']
|
94 |
+
|
95 |
return tkdat
|
96 |
|
97 |
|
|
|
120 |
'm枚mmu', 'n忙rri', 'palla', 'raggi', 'skeggi', 'snemma', 'sunna',
|
121 |
'tommi', 'veggi','vinnur', '谩sta'])
|
122 |
|
123 |
+
# very basic remove about 5 outliers > 350ms
|
124 |
+
cut=0.35
|
125 |
+
|
126 |
with open(annot_json, 'r') as handle:
|
127 |
db = json.load(handle)
|
128 |
|
129 |
sets = make_sets(db,shorts,longs)
|
130 |
|
131 |
db = [get_tk_data(tk,shorts,longs) for tk in db]
|
132 |
+
db = [t for t in db if ((t['gold_v_dur'] <=cut) and (t['gold_post_dur'] <=cut))]
|
133 |
dat = pd.DataFrame.from_records(db)
|
134 |
dat = prep_dat(dat)
|
135 |
|
136 |
return sets,dat
|
137 |
|
138 |
|
139 |
+
def kldiv(s1,s2):
|
140 |
+
_log = lambda x: np.log2(x) if x != 0 else 0
|
141 |
+
_log = np.vectorize(_log)
|
142 |
+
|
143 |
+
n, m = len(s1), len(s2)
|
144 |
+
d = s1.shape[1]
|
145 |
+
assert d == 2 == s2.shape[1]
|
146 |
+
|
147 |
+
k = 1
|
148 |
+
while True:
|
149 |
+
knn1 = NearestNeighbors(n_neighbors = k+1).fit(s1)
|
150 |
+
nnDist1 = knn1.kneighbors(s1)[0][:, k]
|
151 |
+
if not nnDist1.all():
|
152 |
+
k += 1
|
153 |
+
else:
|
154 |
+
break
|
155 |
+
knn2 = NearestNeighbors(n_neighbors = k).fit(s2)
|
156 |
+
nnDist2 = knn2.kneighbors(s1)[0][:, k-1]
|
157 |
+
kl = (d/n) * sum(_log(nnDist2/nnDist1)) + _log((m/(n-1)))
|
158 |
+
return kl
|
159 |
+
|
160 |
+
|
161 |
|
162 |
def vgraph(dat1,l1,src1,lab1,dat2,l2,src2,lab2):
|
163 |
|
|
|
189 |
|
190 |
return vdurs, cdurs, rto, cc
|
191 |
|
192 |
+
plt.close()
|
193 |
|
194 |
vd1,cd1,ra1,cl1 = _gprep(dat1,l1,src1)
|
195 |
lab1 += f'\n Ratio: {ra1:.3f}'
|
|
|
200 |
|
201 |
|
202 |
fig, ax = plt.subplots(figsize=(9,7))
|
203 |
+
#ax.set_xlim(0.0, 350)
|
204 |
+
#ax.set_ylim(0.0, 350)
|
205 |
|
206 |
ax.scatter(vd1,cd1,marker = mk1, label = lab1,
|
207 |
c = [cl1 + (.7,)], edgecolors = [cl1] )
|
208 |
|
209 |
+
marginals = [(vd1, 'x', l1, cl1),
|
210 |
+
(cd1, 'y', l1, cl1)]
|
211 |
+
|
212 |
+
#kld = None
|
213 |
if lab2:
|
214 |
vd2,cd2,ra2,cl2 = _gprep(dat2,l2,src2)
|
215 |
lab2 += f'\n Ratio: {ra2:.3f}'
|
|
|
219 |
mk2 = '>'
|
220 |
ax.scatter(vd2,cd2, marker = mk2, label = lab2,
|
221 |
c = [cl2 + (.05,)], edgecolors = [cl2] )
|
222 |
+
#s1 = np.transpose(np.array([vd1,cd1]))
|
223 |
+
#s2 = np.transpose(np.array([vd2,cd2]))
|
224 |
+
#klda = kldiv(s1,s2)
|
225 |
+
#if klda:
|
226 |
+
# kldb = kldiv(s2,s1)
|
227 |
+
# kldsym = np.mean([klda,kldb])
|
228 |
+
# if not np.isnan(kldsym):
|
229 |
+
# ax.scatter([-300],[-300],c = 'white',label = f'\nKLDiv: {kldsym:.2f}')
|
230 |
+
|
231 |
+
marginals += [(vd2, 'x', l2, cl2),
|
232 |
+
(cd2, 'y', l2, cl2)]
|
233 |
+
|
234 |
+
#fig.legend(loc=8,ncols=2)
|
235 |
+
leg = fig.legend(loc=7,frameon=False)
|
236 |
+
for t in leg.get_texts():
|
237 |
+
t.set_verticalalignment("center_baseline")
|
238 |
|
239 |
+
ax.axline((0,0),slope=1,color="darkgray")
|
240 |
|
241 |
+
marginals = [m for m in marginals if len(m[0])>9]
|
242 |
+
lsts = {'L1': 'solid' , 'L2': 'dashed' , 'All': 'dashdot'}
|
243 |
+
for values, axt, lng, lcl in marginals:
|
244 |
+
kde = gaussian_kde(values, bw_method='scott')
|
245 |
+
pts = np.linspace(np.min(values), np.max(values))
|
246 |
+
dens = kde.pdf(pts)
|
247 |
+
scf=2500
|
248 |
+
lst = lsts[lng]
|
249 |
+
#l2dat = ax.plot(pts, [350-(scf*i) for i in dens], linestyle=lst, color = lcl)
|
250 |
+
l2dat = ax.plot(pts, [350+(scf*i) for i in dens], linestyle=lst, color = lcl, clip_on=False)
|
251 |
+
if axt == 'y':
|
252 |
+
for l2d in l2dat:
|
253 |
+
xln = l2d.get_xdata()
|
254 |
+
yln = l2d.get_ydata()
|
255 |
+
l2d.set_xdata(yln)
|
256 |
+
l2d.set_ydata(xln)
|
257 |
+
fig.canvas.draw()
|
258 |
+
#ax.draw_artist(l2d)
|
259 |
+
|
260 |
+
|
261 |
+
ax.set_xlim(0.0, 350)
|
262 |
+
ax.set_ylim(0.0, 350)
|
263 |
+
|
264 |
+
ax.set_title("Stressed vowel & following consonant(s) duration" , fontsize=16, y=-.155)
|
265 |
ax.set_xlabel("Vowel duration (ms)")
|
266 |
ax.set_ylabel("Consonant duration (ms)")
|
267 |
+
|
|
|
|
|
|
|
|
|
268 |
fig.tight_layout()
|
269 |
+
fig.subplots_adjust(bottom=0.13)
|
270 |
+
fig.subplots_adjust(right=0.72)
|
271 |
|
272 |
#plt.xticks(ticks=[50,100,150,200,250,300],labels=[])
|
273 |
#plt.yticks(ticks=[100,200,300],labels=[])
|
274 |
+
|
275 |
return fig
|
276 |
|