catiR commited on
Commit
ecd5f69
1 Parent(s): bd7c83f
Files changed (2) hide show
  1. app.py +75 -14
  2. vowel_length.py +83 -14
app.py CHANGED
@@ -78,7 +78,7 @@ def plott(g1,w1,l1,s1,g2,w2,l2,s2):
78
 
79
 
80
 
81
- bl = gr.Blocks(theme=gr.themes.Glass())
82
 
83
  with bl:
84
 
@@ -108,8 +108,8 @@ with bl:
108
  #### Select data (2)
109
  """
110
  )
111
- gmenu2 = gr.Dropdown(choices=['[NONE]'] + grouplist,label="Group", value='[NONE]')
112
- wmenu2 = gr.Dropdown(label="Word", choices=['[NONE]'])
113
  lmenu2 = gr.Radio(choices=["L1", "L2","All"],label="Speaker group",value="L1")
114
  smenu2 = gr.Dropdown(["Annotated", "MFA"],label="Source",value="Annotated")
115
 
@@ -118,7 +118,7 @@ with bl:
118
 
119
 
120
  btn = gr.Button(value="Update Plot")
121
- plo = gr.Plot()
122
  btn.click(plott, [gmenu1,wmenu1,lmenu1,smenu1,gmenu2,wmenu2,lmenu2,smenu2], plo)
123
 
124
 
@@ -140,6 +140,21 @@ with bl:
140
  """
141
  )
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  gr.Markdown(
144
  """
145
  ## Demo: Viewing the data
@@ -148,6 +163,9 @@ with bl:
148
  Available speaker groups are native Icelandic speakers (L1), second-language speakers (L2), or all.
149
  Data source options are gold (human) annotations or automated Montreal Forced Aligner (MFA).
150
 
 
 
 
151
  The general expectation is that, all else being equal, syllables with long stressed vowels
152
  followed by short consonants have a higher vowel:(vowel+consonant) duration ratio,
153
  while syllables with short stressed vowels followed by long consonants have a lower ratio.
@@ -161,7 +179,6 @@ with bl:
161
  )
162
 
163
 
164
-
165
  gr.Markdown(
166
  """
167
  ## Accessing the data
@@ -171,25 +188,69 @@ with bl:
171
  or [tsv](https://github.com/catiR/length-contrast-data-isl/blob/main/Data/Length_in_spoken_icelandic.tsv) files.
172
  See [the paper](https://github.com/catiR/length-contrast-data-isl/blob/main/Data/133_Annotated_Vowel_Lengths.pdf)
173
  for complete information.
 
 
174
 
 
 
175
  Audio is available from [Clarin](https://repository.clarin.is/repository/xmlui/) (Samr贸mur).
176
  The 'collection' field plus recording filename in the annotations metadata
177
  specify the original audio file, including which Samr贸mur collection it is found in.
178
  """
179
  )
180
 
181
-
182
  gr.Markdown(
183
- """
184
- ### About
185
-
186
- This annotated data and its demo application accompany the paper
187
- *Assessed and Annotated Vowel Lengths in Spoken Icelandic Sentences\
188
- for L1 and L2 Speakers: A Resource for Pronunciation Training*, \
189
- Caitlin Laura Richter, Kolbr煤n Fri冒riksd贸ttir, Korm谩kur Logi Bergsson, \
190
- Erik Anders Maher, Ragnhei冒ur Mar铆a Benediktsd贸ttir, Jon Gudnason - NoDaLiDa/Baltic-HLT 2025, Tallinn, Estonia.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
 
 
 
 
 
193
  ### Contact [email protected] about bugs, feedback, or collaboration!
194
 
195
  """
 
78
 
79
 
80
 
81
+ bl = gr.Blocks()#theme=gr.themes.Glass())
82
 
83
  with bl:
84
 
 
108
  #### Select data (2)
109
  """
110
  )
111
+ gmenu2 = gr.Dropdown(choices=['[NONE]'] + grouplist,label="Group", value='A:L')
112
+ wmenu2 = gr.Dropdown(label="Word", choices=['[ALL]'] + [n for n,v in worddict['A:L']])
113
  lmenu2 = gr.Radio(choices=["L1", "L2","All"],label="Speaker group",value="L1")
114
  smenu2 = gr.Dropdown(["Annotated", "MFA"],label="Source",value="Annotated")
115
 
 
118
 
119
 
120
  btn = gr.Button(value="Update Plot")
121
+ plo = gr.Plot(value=plott('AL:','[ALL]',"L1","Annotated",'A:L','[ALL]',"L1","Annotated"))
122
  btn.click(plott, [gmenu1,wmenu1,lmenu1,smenu1,gmenu2,wmenu2,lmenu2,smenu2], plo)
123
 
124
 
 
140
  """
141
  )
142
 
143
+
144
+ gr.Markdown(
145
+ """
146
+ ### About
147
+
148
+ This annotated data and its demo application accompany the paper
149
+ *Assessed and Annotated Vowel Lengths in Spoken Icelandic Sentences\
150
+ for L1 and L2 Speakers: A Resource for Pronunciation Training*, \
151
+ Caitlin Laura Richter, Kolbr煤n Fri冒riksd贸ttir, Korm谩kur Logi Bergsson, \
152
+ Erik Anders Maher, Ragnhei冒ur Mar铆a Benediktsd贸ttir, Jon Gudnason - NoDaLiDa/Baltic-HLT 2025, Tallinn, Estonia.
153
+
154
+
155
+ """
156
+ )
157
+
158
  gr.Markdown(
159
  """
160
  ## Demo: Viewing the data
 
163
  Available speaker groups are native Icelandic speakers (L1), second-language speakers (L2), or all.
164
  Data source options are gold (human) annotations or automated Montreal Forced Aligner (MFA).
165
 
166
+ The display is a scatter plot of vowel and consonant durations,
167
+ supplemented with density plots for each dimension separately.
168
+
169
  The general expectation is that, all else being equal, syllables with long stressed vowels
170
  followed by short consonants have a higher vowel:(vowel+consonant) duration ratio,
171
  while syllables with short stressed vowels followed by long consonants have a lower ratio.
 
179
  )
180
 
181
 
 
182
  gr.Markdown(
183
  """
184
  ## Accessing the data
 
188
  or [tsv](https://github.com/catiR/length-contrast-data-isl/blob/main/Data/Length_in_spoken_icelandic.tsv) files.
189
  See [the paper](https://github.com/catiR/length-contrast-data-isl/blob/main/Data/133_Annotated_Vowel_Lengths.pdf)
190
  for complete information.
191
+ """
192
+ )
193
 
194
+ gr.Markdown(
195
+ """
196
  Audio is available from [Clarin](https://repository.clarin.is/repository/xmlui/) (Samr贸mur).
197
  The 'collection' field plus recording filename in the annotations metadata
198
  specify the original audio file, including which Samr贸mur collection it is found in.
199
  """
200
  )
201
 
 
202
  gr.Markdown(
203
+ """
204
+ Annotation records are in the following scheme:
205
+
206
+ ```
207
+ [ { recording: source-file-id.wav,
208
+ collection: samromur-collection,
209
+ speaker_lang: L1/L2,
210
+ word: target-word,
211
+ word_context: {
212
+ normalised: normalised-carrier-sentence-text,
213
+ before: sentence-context-preceding-token,
214
+ after: sentence-context-following-token
215
+ },
216
+ gold_annotation: {
217
+ target_word_start: seconds,
218
+ target_word_end: seconds,
219
+ prevowel: [ {
220
+ phone: ipa-character,
221
+ start: seconds,
222
+ end: seconds,
223
+ },
224
+ { phone2 ... } ,
225
+ ],
226
+ vowel: [ {
227
+ phone: ipa-character,
228
+ start: seconds,
229
+ end: seconds,
230
+ },
231
+ ],
232
+ postvowel: [ {
233
+ phone: ipa-character,
234
+ start: seconds,
235
+ end: seconds,
236
+ },
237
+ ]
238
+ },
239
+ mfa_annotation : {
240
+ ... as for gold ...
241
+ }
242
+ },
243
+ ]
244
+ ```
245
+
246
+ """
247
+ )
248
 
249
 
250
+
251
+
252
+ gr.Markdown(
253
+ """
254
  ### Contact [email protected] about bugs, feedback, or collaboration!
255
 
256
  """
vowel_length.py CHANGED
@@ -5,7 +5,9 @@ import pandas as pd
5
  import matplotlib
6
  matplotlib.use('Agg')
7
  import matplotlib.pyplot as plt
8
-
 
 
9
 
10
  # make subsets of words for convenience
11
  def make_sets(db,shorts,longs):
@@ -70,7 +72,7 @@ def get_tk_data(tk,shorts,longs):
70
  tot_start, tot_end = plist[0]['start'],plist[-1]['end']
71
  tot_dur = tot_end-tot_start
72
  return tot_dur
73
-
74
  tkdat = {}
75
  tkdat['word'] = tk['word']
76
  tkdat['speaker_lang'] = tk['speaker_lang']
@@ -89,7 +91,7 @@ def get_tk_data(tk,shorts,longs):
89
  tkdat[f'{s}_post_dur'] = _merge_intervals(tk[f'{s}_annotation']['postvowel'])
90
  tkdat[f'{s}_word_dur'] = tk[f'{s}_annotation']['target_word_end'] -\
91
  tk[f'{s}_annotation']['target_word_start']
92
-
93
  return tkdat
94
 
95
 
@@ -118,18 +120,44 @@ def setup(annot_json):
118
  'm枚mmu', 'n忙rri', 'palla', 'raggi', 'skeggi', 'snemma', 'sunna',
119
  'tommi', 'veggi','vinnur', '谩sta'])
120
 
 
 
 
121
  with open(annot_json, 'r') as handle:
122
  db = json.load(handle)
123
 
124
  sets = make_sets(db,shorts,longs)
125
 
126
  db = [get_tk_data(tk,shorts,longs) for tk in db]
 
127
  dat = pd.DataFrame.from_records(db)
128
  dat = prep_dat(dat)
129
 
130
  return sets,dat
131
 
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
  def vgraph(dat1,l1,src1,lab1,dat2,l2,src2,lab2):
135
 
@@ -161,6 +189,7 @@ def vgraph(dat1,l1,src1,lab1,dat2,l2,src2,lab2):
161
 
162
  return vdurs, cdurs, rto, cc
163
 
 
164
 
165
  vd1,cd1,ra1,cl1 = _gprep(dat1,l1,src1)
166
  lab1 += f'\n Ratio: {ra1:.3f}'
@@ -171,12 +200,16 @@ def vgraph(dat1,l1,src1,lab1,dat2,l2,src2,lab2):
171
 
172
 
173
  fig, ax = plt.subplots(figsize=(9,7))
174
- ax.set_xlim(0.0,350)
175
- ax.set_ylim(0.0,350)
176
 
177
  ax.scatter(vd1,cd1,marker = mk1, label = lab1,
178
  c = [cl1 + (.7,)], edgecolors = [cl1] )
179
 
 
 
 
 
180
  if lab2:
181
  vd2,cd2,ra2,cl2 = _gprep(dat2,l2,src2)
182
  lab2 += f'\n Ratio: {ra2:.3f}'
@@ -186,22 +219,58 @@ def vgraph(dat1,l1,src1,lab1,dat2,l2,src2,lab2):
186
  mk2 = '>'
187
  ax.scatter(vd2,cd2, marker = mk2, label = lab2,
188
  c = [cl2 + (.05,)], edgecolors = [cl2] )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
 
190
 
191
- ax.set_title("Stressed vowel & following consonant(s) duration" )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  ax.set_xlabel("Vowel duration (ms)")
193
  ax.set_ylabel("Consonant duration (ms)")
194
- #fig.legend(loc=8,ncols=2)
195
- fig.legend(loc=7)
196
-
197
- ax.axline((0,0),slope=1,color="darkgray")
198
-
199
  fig.tight_layout()
200
- #fig.subplots_adjust(bottom=0.15)
201
- fig.subplots_adjust(right=0.75)
202
 
203
  #plt.xticks(ticks=[50,100,150,200,250,300],labels=[])
204
  #plt.yticks(ticks=[100,200,300],labels=[])
205
-
206
  return fig
207
 
 
5
  import matplotlib
6
  matplotlib.use('Agg')
7
  import matplotlib.pyplot as plt
8
+ from scipy.stats import gaussian_kde
9
+ #from scipy.spatial import KDTree
10
+ #from sklearn.neighbors import NearestNeighbors
11
 
12
  # make subsets of words for convenience
13
  def make_sets(db,shorts,longs):
 
72
  tot_start, tot_end = plist[0]['start'],plist[-1]['end']
73
  tot_dur = tot_end-tot_start
74
  return tot_dur
75
+
76
  tkdat = {}
77
  tkdat['word'] = tk['word']
78
  tkdat['speaker_lang'] = tk['speaker_lang']
 
91
  tkdat[f'{s}_post_dur'] = _merge_intervals(tk[f'{s}_annotation']['postvowel'])
92
  tkdat[f'{s}_word_dur'] = tk[f'{s}_annotation']['target_word_end'] -\
93
  tk[f'{s}_annotation']['target_word_start']
94
+
95
  return tkdat
96
 
97
 
 
120
  'm枚mmu', 'n忙rri', 'palla', 'raggi', 'skeggi', 'snemma', 'sunna',
121
  'tommi', 'veggi','vinnur', '谩sta'])
122
 
123
+ # very basic remove about 5 outliers > 350ms
124
+ cut=0.35
125
+
126
  with open(annot_json, 'r') as handle:
127
  db = json.load(handle)
128
 
129
  sets = make_sets(db,shorts,longs)
130
 
131
  db = [get_tk_data(tk,shorts,longs) for tk in db]
132
+ db = [t for t in db if ((t['gold_v_dur'] <=cut) and (t['gold_post_dur'] <=cut))]
133
  dat = pd.DataFrame.from_records(db)
134
  dat = prep_dat(dat)
135
 
136
  return sets,dat
137
 
138
 
139
+ def kldiv(s1,s2):
140
+ _log = lambda x: np.log2(x) if x != 0 else 0
141
+ _log = np.vectorize(_log)
142
+
143
+ n, m = len(s1), len(s2)
144
+ d = s1.shape[1]
145
+ assert d == 2 == s2.shape[1]
146
+
147
+ k = 1
148
+ while True:
149
+ knn1 = NearestNeighbors(n_neighbors = k+1).fit(s1)
150
+ nnDist1 = knn1.kneighbors(s1)[0][:, k]
151
+ if not nnDist1.all():
152
+ k += 1
153
+ else:
154
+ break
155
+ knn2 = NearestNeighbors(n_neighbors = k).fit(s2)
156
+ nnDist2 = knn2.kneighbors(s1)[0][:, k-1]
157
+ kl = (d/n) * sum(_log(nnDist2/nnDist1)) + _log((m/(n-1)))
158
+ return kl
159
+
160
+
161
 
162
  def vgraph(dat1,l1,src1,lab1,dat2,l2,src2,lab2):
163
 
 
189
 
190
  return vdurs, cdurs, rto, cc
191
 
192
+ plt.close()
193
 
194
  vd1,cd1,ra1,cl1 = _gprep(dat1,l1,src1)
195
  lab1 += f'\n Ratio: {ra1:.3f}'
 
200
 
201
 
202
  fig, ax = plt.subplots(figsize=(9,7))
203
+ #ax.set_xlim(0.0, 350)
204
+ #ax.set_ylim(0.0, 350)
205
 
206
  ax.scatter(vd1,cd1,marker = mk1, label = lab1,
207
  c = [cl1 + (.7,)], edgecolors = [cl1] )
208
 
209
+ marginals = [(vd1, 'x', l1, cl1),
210
+ (cd1, 'y', l1, cl1)]
211
+
212
+ #kld = None
213
  if lab2:
214
  vd2,cd2,ra2,cl2 = _gprep(dat2,l2,src2)
215
  lab2 += f'\n Ratio: {ra2:.3f}'
 
219
  mk2 = '>'
220
  ax.scatter(vd2,cd2, marker = mk2, label = lab2,
221
  c = [cl2 + (.05,)], edgecolors = [cl2] )
222
+ #s1 = np.transpose(np.array([vd1,cd1]))
223
+ #s2 = np.transpose(np.array([vd2,cd2]))
224
+ #klda = kldiv(s1,s2)
225
+ #if klda:
226
+ # kldb = kldiv(s2,s1)
227
+ # kldsym = np.mean([klda,kldb])
228
+ # if not np.isnan(kldsym):
229
+ # ax.scatter([-300],[-300],c = 'white',label = f'\nKLDiv: {kldsym:.2f}')
230
+
231
+ marginals += [(vd2, 'x', l2, cl2),
232
+ (cd2, 'y', l2, cl2)]
233
+
234
+ #fig.legend(loc=8,ncols=2)
235
+ leg = fig.legend(loc=7,frameon=False)
236
+ for t in leg.get_texts():
237
+ t.set_verticalalignment("center_baseline")
238
 
239
+ ax.axline((0,0),slope=1,color="darkgray")
240
 
241
+ marginals = [m for m in marginals if len(m[0])>9]
242
+ lsts = {'L1': 'solid' , 'L2': 'dashed' , 'All': 'dashdot'}
243
+ for values, axt, lng, lcl in marginals:
244
+ kde = gaussian_kde(values, bw_method='scott')
245
+ pts = np.linspace(np.min(values), np.max(values))
246
+ dens = kde.pdf(pts)
247
+ scf=2500
248
+ lst = lsts[lng]
249
+ #l2dat = ax.plot(pts, [350-(scf*i) for i in dens], linestyle=lst, color = lcl)
250
+ l2dat = ax.plot(pts, [350+(scf*i) for i in dens], linestyle=lst, color = lcl, clip_on=False)
251
+ if axt == 'y':
252
+ for l2d in l2dat:
253
+ xln = l2d.get_xdata()
254
+ yln = l2d.get_ydata()
255
+ l2d.set_xdata(yln)
256
+ l2d.set_ydata(xln)
257
+ fig.canvas.draw()
258
+ #ax.draw_artist(l2d)
259
+
260
+
261
+ ax.set_xlim(0.0, 350)
262
+ ax.set_ylim(0.0, 350)
263
+
264
+ ax.set_title("Stressed vowel & following consonant(s) duration" , fontsize=16, y=-.155)
265
  ax.set_xlabel("Vowel duration (ms)")
266
  ax.set_ylabel("Consonant duration (ms)")
267
+
 
 
 
 
268
  fig.tight_layout()
269
+ fig.subplots_adjust(bottom=0.13)
270
+ fig.subplots_adjust(right=0.72)
271
 
272
  #plt.xticks(ticks=[50,100,150,200,250,300],labels=[])
273
  #plt.yticks(ticks=[100,200,300],labels=[])
274
+
275
  return fig
276