csikasote commited on
Commit
b2286d8
·
verified ·
1 Parent(s): 9d330bf

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,9 @@ library_name: transformers
3
  license: cc-by-nc-4.0
4
  base_model: facebook/mms-1b-all
5
  tags:
 
 
 
6
  - generated_from_trainer
7
  metrics:
8
  - wer
@@ -16,10 +19,10 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # mms-1b-bemgen-female-model
18
 
19
- This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.1921
22
- - Wer: 0.3442
23
 
24
  ## Model description
25
 
 
3
  license: cc-by-nc-4.0
4
  base_model: facebook/mms-1b-all
5
  tags:
6
+ - automatic-speech-recognition
7
+ - bemgen
8
+ - mms
9
  - generated_from_trainer
10
  metrics:
11
  - wer
 
19
 
20
  # mms-1b-bemgen-female-model
21
 
22
+ This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on the BEMGEN - BEM dataset.
23
  It achieves the following results on the evaluation set:
24
  - Loss: 0.1921
25
+ - Wer: 0.3440
26
 
27
  ## Model description
28
 
adapter.bem.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24ba3bd1bac48734803e9d8774469e7d7bd277384888d42df4db7b1153dfa5b2
3
+ size 8798532
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.8776978417266186,
3
+ "eval_loss": 0.19208292663097382,
4
+ "eval_runtime": 30.107,
5
+ "eval_samples": 485,
6
+ "eval_samples_per_second": 16.109,
7
+ "eval_steps_per_second": 4.052,
8
+ "eval_wer": 0.3439911797133407,
9
+ "total_flos": 6.630103716413751e+18,
10
+ "train_loss": 0.5161716863087246,
11
+ "train_runtime": 2738.809,
12
+ "train_samples": 3890,
13
+ "train_samples_per_second": 42.61,
14
+ "train_steps_per_second": 10.658
15
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.8776978417266186,
3
+ "eval_loss": 0.19208292663097382,
4
+ "eval_runtime": 30.107,
5
+ "eval_samples": 485,
6
+ "eval_samples_per_second": 16.109,
7
+ "eval_steps_per_second": 4.052,
8
+ "eval_wer": 0.3439911797133407
9
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.8776978417266186,
3
+ "total_flos": 6.630103716413751e+18,
4
+ "train_loss": 0.5161716863087246,
5
+ "train_runtime": 2738.809,
6
+ "train_samples": 3890,
7
+ "train_samples_per_second": 42.61,
8
+ "train_steps_per_second": 10.658
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,499 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.1877833753824234,
3
+ "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-bemgen-female-model/checkpoint-2500",
4
+ "epoch": 2.8776978417266186,
5
+ "eval_steps": 100,
6
+ "global_step": 2800,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.10277492291880781,
13
+ "grad_norm": 2.668320894241333,
14
+ "learning_rate": 0.000285,
15
+ "loss": 6.6524,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.10277492291880781,
20
+ "eval_loss": 0.6405563950538635,
21
+ "eval_runtime": 30.4426,
22
+ "eval_samples_per_second": 15.932,
23
+ "eval_steps_per_second": 4.008,
24
+ "eval_wer": 0.6923925027563396,
25
+ "step": 100
26
+ },
27
+ {
28
+ "epoch": 0.20554984583761562,
29
+ "grad_norm": 3.9765446186065674,
30
+ "learning_rate": 0.0002990202818838088,
31
+ "loss": 0.4626,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 0.20554984583761562,
36
+ "eval_loss": 0.27498528361320496,
37
+ "eval_runtime": 30.5092,
38
+ "eval_samples_per_second": 15.897,
39
+ "eval_steps_per_second": 3.999,
40
+ "eval_wer": 0.4520396912899669,
41
+ "step": 200
42
+ },
43
+ {
44
+ "epoch": 0.30832476875642345,
45
+ "grad_norm": 1.2895652055740356,
46
+ "learning_rate": 0.00029798899965623925,
47
+ "loss": 0.3384,
48
+ "step": 300
49
+ },
50
+ {
51
+ "epoch": 0.30832476875642345,
52
+ "eval_loss": 0.2555796205997467,
53
+ "eval_runtime": 30.0486,
54
+ "eval_samples_per_second": 16.141,
55
+ "eval_steps_per_second": 4.06,
56
+ "eval_wer": 0.45865490628445427,
57
+ "step": 300
58
+ },
59
+ {
60
+ "epoch": 0.41109969167523125,
61
+ "grad_norm": 8.271388053894043,
62
+ "learning_rate": 0.0002969577174286696,
63
+ "loss": 0.3391,
64
+ "step": 400
65
+ },
66
+ {
67
+ "epoch": 0.41109969167523125,
68
+ "eval_loss": 0.2390519678592682,
69
+ "eval_runtime": 29.9294,
70
+ "eval_samples_per_second": 16.205,
71
+ "eval_steps_per_second": 4.076,
72
+ "eval_wer": 0.4119073869900772,
73
+ "step": 400
74
+ },
75
+ {
76
+ "epoch": 0.513874614594039,
77
+ "grad_norm": 1.8543354272842407,
78
+ "learning_rate": 0.0002959264352011,
79
+ "loss": 0.2928,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 0.513874614594039,
84
+ "eval_loss": 0.23259302973747253,
85
+ "eval_runtime": 30.0544,
86
+ "eval_samples_per_second": 16.137,
87
+ "eval_steps_per_second": 4.059,
88
+ "eval_wer": 0.40088202866593164,
89
+ "step": 500
90
+ },
91
+ {
92
+ "epoch": 0.6166495375128469,
93
+ "grad_norm": 1.9590275287628174,
94
+ "learning_rate": 0.0002948951529735304,
95
+ "loss": 0.3126,
96
+ "step": 600
97
+ },
98
+ {
99
+ "epoch": 0.6166495375128469,
100
+ "eval_loss": 0.22746512293815613,
101
+ "eval_runtime": 30.1494,
102
+ "eval_samples_per_second": 16.087,
103
+ "eval_steps_per_second": 4.047,
104
+ "eval_wer": 0.40308710033076073,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 0.7194244604316546,
109
+ "grad_norm": 1.9041965007781982,
110
+ "learning_rate": 0.0002938638707459608,
111
+ "loss": 0.3305,
112
+ "step": 700
113
+ },
114
+ {
115
+ "epoch": 0.7194244604316546,
116
+ "eval_loss": 0.22082751989364624,
117
+ "eval_runtime": 29.8842,
118
+ "eval_samples_per_second": 16.229,
119
+ "eval_steps_per_second": 4.082,
120
+ "eval_wer": 0.3977949283351709,
121
+ "step": 700
122
+ },
123
+ {
124
+ "epoch": 0.8221993833504625,
125
+ "grad_norm": 1.5732944011688232,
126
+ "learning_rate": 0.00029283258851839117,
127
+ "loss": 0.3043,
128
+ "step": 800
129
+ },
130
+ {
131
+ "epoch": 0.8221993833504625,
132
+ "eval_loss": 0.21230436861515045,
133
+ "eval_runtime": 29.8824,
134
+ "eval_samples_per_second": 16.23,
135
+ "eval_steps_per_second": 4.083,
136
+ "eval_wer": 0.38037486218302097,
137
+ "step": 800
138
+ },
139
+ {
140
+ "epoch": 0.9249743062692704,
141
+ "grad_norm": 1.669620156288147,
142
+ "learning_rate": 0.00029180130629082154,
143
+ "loss": 0.2989,
144
+ "step": 900
145
+ },
146
+ {
147
+ "epoch": 0.9249743062692704,
148
+ "eval_loss": 0.21349409222602844,
149
+ "eval_runtime": 30.0481,
150
+ "eval_samples_per_second": 16.141,
151
+ "eval_steps_per_second": 4.06,
152
+ "eval_wer": 0.37927232635060637,
153
+ "step": 900
154
+ },
155
+ {
156
+ "epoch": 1.027749229188078,
157
+ "grad_norm": 1.0011438131332397,
158
+ "learning_rate": 0.00029077002406325197,
159
+ "loss": 0.2911,
160
+ "step": 1000
161
+ },
162
+ {
163
+ "epoch": 1.027749229188078,
164
+ "eval_loss": 0.21172800660133362,
165
+ "eval_runtime": 30.6543,
166
+ "eval_samples_per_second": 15.822,
167
+ "eval_steps_per_second": 3.98,
168
+ "eval_wer": 0.38897464167585444,
169
+ "step": 1000
170
+ },
171
+ {
172
+ "epoch": 1.1305241521068858,
173
+ "grad_norm": 0.9472767114639282,
174
+ "learning_rate": 0.00028973874183568234,
175
+ "loss": 0.2994,
176
+ "step": 1100
177
+ },
178
+ {
179
+ "epoch": 1.1305241521068858,
180
+ "eval_loss": 0.20664845407009125,
181
+ "eval_runtime": 29.9146,
182
+ "eval_samples_per_second": 16.213,
183
+ "eval_steps_per_second": 4.078,
184
+ "eval_wer": 0.38676957001102535,
185
+ "step": 1100
186
+ },
187
+ {
188
+ "epoch": 1.2332990750256938,
189
+ "grad_norm": 0.9638025760650635,
190
+ "learning_rate": 0.0002887074596081127,
191
+ "loss": 0.2849,
192
+ "step": 1200
193
+ },
194
+ {
195
+ "epoch": 1.2332990750256938,
196
+ "eval_loss": 0.21126343309879303,
197
+ "eval_runtime": 29.8301,
198
+ "eval_samples_per_second": 16.259,
199
+ "eval_steps_per_second": 4.09,
200
+ "eval_wer": 0.39316427783902974,
201
+ "step": 1200
202
+ },
203
+ {
204
+ "epoch": 1.3360739979445015,
205
+ "grad_norm": 1.023758053779602,
206
+ "learning_rate": 0.00028767617738054314,
207
+ "loss": 0.2864,
208
+ "step": 1300
209
+ },
210
+ {
211
+ "epoch": 1.3360739979445015,
212
+ "eval_loss": 0.20175151526927948,
213
+ "eval_runtime": 30.0501,
214
+ "eval_samples_per_second": 16.14,
215
+ "eval_steps_per_second": 4.06,
216
+ "eval_wer": 0.3713340683572216,
217
+ "step": 1300
218
+ },
219
+ {
220
+ "epoch": 1.4388489208633093,
221
+ "grad_norm": 1.0510107278823853,
222
+ "learning_rate": 0.0002866448951529735,
223
+ "loss": 0.2611,
224
+ "step": 1400
225
+ },
226
+ {
227
+ "epoch": 1.4388489208633093,
228
+ "eval_loss": 0.2004697620868683,
229
+ "eval_runtime": 30.4532,
230
+ "eval_samples_per_second": 15.926,
231
+ "eval_steps_per_second": 4.006,
232
+ "eval_wer": 0.4004410143329658,
233
+ "step": 1400
234
+ },
235
+ {
236
+ "epoch": 1.541623843782117,
237
+ "grad_norm": 1.148527979850769,
238
+ "learning_rate": 0.0002856136129254039,
239
+ "loss": 0.2995,
240
+ "step": 1500
241
+ },
242
+ {
243
+ "epoch": 1.541623843782117,
244
+ "eval_loss": 0.1996380090713501,
245
+ "eval_runtime": 29.9115,
246
+ "eval_samples_per_second": 16.214,
247
+ "eval_steps_per_second": 4.079,
248
+ "eval_wer": 0.372877618522602,
249
+ "step": 1500
250
+ },
251
+ {
252
+ "epoch": 1.644398766700925,
253
+ "grad_norm": 0.7263904809951782,
254
+ "learning_rate": 0.00028458233069783426,
255
+ "loss": 0.2787,
256
+ "step": 1600
257
+ },
258
+ {
259
+ "epoch": 1.644398766700925,
260
+ "eval_loss": 0.2014673948287964,
261
+ "eval_runtime": 29.8924,
262
+ "eval_samples_per_second": 16.225,
263
+ "eval_steps_per_second": 4.081,
264
+ "eval_wer": 0.3647188533627343,
265
+ "step": 1600
266
+ },
267
+ {
268
+ "epoch": 1.7471736896197327,
269
+ "grad_norm": 0.9115646481513977,
270
+ "learning_rate": 0.0002835510484702647,
271
+ "loss": 0.2444,
272
+ "step": 1700
273
+ },
274
+ {
275
+ "epoch": 1.7471736896197327,
276
+ "eval_loss": 0.19884684681892395,
277
+ "eval_runtime": 30.122,
278
+ "eval_samples_per_second": 16.101,
279
+ "eval_steps_per_second": 4.05,
280
+ "eval_wer": 0.3603087100330761,
281
+ "step": 1700
282
+ },
283
+ {
284
+ "epoch": 1.8499486125385407,
285
+ "grad_norm": 0.9806845784187317,
286
+ "learning_rate": 0.00028251976624269506,
287
+ "loss": 0.2734,
288
+ "step": 1800
289
+ },
290
+ {
291
+ "epoch": 1.8499486125385407,
292
+ "eval_loss": 0.19500485062599182,
293
+ "eval_runtime": 30.0764,
294
+ "eval_samples_per_second": 16.126,
295
+ "eval_steps_per_second": 4.056,
296
+ "eval_wer": 0.3589856670341786,
297
+ "step": 1800
298
+ },
299
+ {
300
+ "epoch": 1.9527235354573484,
301
+ "grad_norm": 0.7178159356117249,
302
+ "learning_rate": 0.00028148848401512544,
303
+ "loss": 0.2794,
304
+ "step": 1900
305
+ },
306
+ {
307
+ "epoch": 1.9527235354573484,
308
+ "eval_loss": 0.19534997642040253,
309
+ "eval_runtime": 29.8724,
310
+ "eval_samples_per_second": 16.236,
311
+ "eval_steps_per_second": 4.084,
312
+ "eval_wer": 0.3545755237045204,
313
+ "step": 1900
314
+ },
315
+ {
316
+ "epoch": 2.055498458376156,
317
+ "grad_norm": 1.4567710161209106,
318
+ "learning_rate": 0.00028045720178755586,
319
+ "loss": 0.2708,
320
+ "step": 2000
321
+ },
322
+ {
323
+ "epoch": 2.055498458376156,
324
+ "eval_loss": 0.19343802332878113,
325
+ "eval_runtime": 29.925,
326
+ "eval_samples_per_second": 16.207,
327
+ "eval_steps_per_second": 4.077,
328
+ "eval_wer": 0.36097023153252483,
329
+ "step": 2000
330
+ },
331
+ {
332
+ "epoch": 2.158273381294964,
333
+ "grad_norm": 0.5352274179458618,
334
+ "learning_rate": 0.00027942591955998624,
335
+ "loss": 0.2545,
336
+ "step": 2100
337
+ },
338
+ {
339
+ "epoch": 2.158273381294964,
340
+ "eval_loss": 0.19527685642242432,
341
+ "eval_runtime": 30.2437,
342
+ "eval_samples_per_second": 16.036,
343
+ "eval_steps_per_second": 4.034,
344
+ "eval_wer": 0.36163175303197354,
345
+ "step": 2100
346
+ },
347
+ {
348
+ "epoch": 2.2610483042137717,
349
+ "grad_norm": 0.7689425349235535,
350
+ "learning_rate": 0.0002784049501546923,
351
+ "loss": 0.2529,
352
+ "step": 2200
353
+ },
354
+ {
355
+ "epoch": 2.2610483042137717,
356
+ "eval_loss": 0.19401085376739502,
357
+ "eval_runtime": 30.2778,
358
+ "eval_samples_per_second": 16.018,
359
+ "eval_steps_per_second": 4.029,
360
+ "eval_wer": 0.35589856670341785,
361
+ "step": 2200
362
+ },
363
+ {
364
+ "epoch": 2.3638232271325794,
365
+ "grad_norm": 0.518496572971344,
366
+ "learning_rate": 0.0002773736679271227,
367
+ "loss": 0.2628,
368
+ "step": 2300
369
+ },
370
+ {
371
+ "epoch": 2.3638232271325794,
372
+ "eval_loss": 0.1926334649324417,
373
+ "eval_runtime": 30.0716,
374
+ "eval_samples_per_second": 16.128,
375
+ "eval_steps_per_second": 4.057,
376
+ "eval_wer": 0.3585446527012128,
377
+ "step": 2300
378
+ },
379
+ {
380
+ "epoch": 2.4665981500513876,
381
+ "grad_norm": 0.47154027223587036,
382
+ "learning_rate": 0.0002763423856995531,
383
+ "loss": 0.2788,
384
+ "step": 2400
385
+ },
386
+ {
387
+ "epoch": 2.4665981500513876,
388
+ "eval_loss": 0.1925119012594223,
389
+ "eval_runtime": 29.9369,
390
+ "eval_samples_per_second": 16.201,
391
+ "eval_steps_per_second": 4.075,
392
+ "eval_wer": 0.35104740904079385,
393
+ "step": 2400
394
+ },
395
+ {
396
+ "epoch": 2.5693730729701953,
397
+ "grad_norm": 0.4298442304134369,
398
+ "learning_rate": 0.0002753111034719835,
399
+ "loss": 0.2473,
400
+ "step": 2500
401
+ },
402
+ {
403
+ "epoch": 2.5693730729701953,
404
+ "eval_loss": 0.1877833753824234,
405
+ "eval_runtime": 30.3202,
406
+ "eval_samples_per_second": 15.996,
407
+ "eval_steps_per_second": 4.024,
408
+ "eval_wer": 0.34994487320837925,
409
+ "step": 2500
410
+ },
411
+ {
412
+ "epoch": 2.672147995889003,
413
+ "grad_norm": 1.0587983131408691,
414
+ "learning_rate": 0.0002742798212444139,
415
+ "loss": 0.2595,
416
+ "step": 2600
417
+ },
418
+ {
419
+ "epoch": 2.672147995889003,
420
+ "eval_loss": 0.19105447828769684,
421
+ "eval_runtime": 30.2772,
422
+ "eval_samples_per_second": 16.019,
423
+ "eval_steps_per_second": 4.029,
424
+ "eval_wer": 0.37045203969129,
425
+ "step": 2600
426
+ },
427
+ {
428
+ "epoch": 2.774922918807811,
429
+ "grad_norm": 0.7508417963981628,
430
+ "learning_rate": 0.00027324853901684426,
431
+ "loss": 0.2516,
432
+ "step": 2700
433
+ },
434
+ {
435
+ "epoch": 2.774922918807811,
436
+ "eval_loss": 0.188262939453125,
437
+ "eval_runtime": 30.0618,
438
+ "eval_samples_per_second": 16.133,
439
+ "eval_steps_per_second": 4.058,
440
+ "eval_wer": 0.3479603087100331,
441
+ "step": 2700
442
+ },
443
+ {
444
+ "epoch": 2.8776978417266186,
445
+ "grad_norm": 1.4504516124725342,
446
+ "learning_rate": 0.00027221725678927463,
447
+ "loss": 0.2445,
448
+ "step": 2800
449
+ },
450
+ {
451
+ "epoch": 2.8776978417266186,
452
+ "eval_loss": 0.19207896292209625,
453
+ "eval_runtime": 30.0543,
454
+ "eval_samples_per_second": 16.137,
455
+ "eval_steps_per_second": 4.059,
456
+ "eval_wer": 0.3442116868798236,
457
+ "step": 2800
458
+ },
459
+ {
460
+ "epoch": 2.8776978417266186,
461
+ "step": 2800,
462
+ "total_flos": 6.630103716413751e+18,
463
+ "train_loss": 0.5161716863087246,
464
+ "train_runtime": 2738.809,
465
+ "train_samples_per_second": 42.61,
466
+ "train_steps_per_second": 10.658
467
+ }
468
+ ],
469
+ "logging_steps": 100,
470
+ "max_steps": 29190,
471
+ "num_input_tokens_seen": 0,
472
+ "num_train_epochs": 30,
473
+ "save_steps": 400,
474
+ "stateful_callbacks": {
475
+ "EarlyStoppingCallback": {
476
+ "args": {
477
+ "early_stopping_patience": 3,
478
+ "early_stopping_threshold": 0.0
479
+ },
480
+ "attributes": {
481
+ "early_stopping_patience_counter": 3
482
+ }
483
+ },
484
+ "TrainerControl": {
485
+ "args": {
486
+ "should_epoch_stop": false,
487
+ "should_evaluate": false,
488
+ "should_log": false,
489
+ "should_save": true,
490
+ "should_training_stop": true
491
+ },
492
+ "attributes": {}
493
+ }
494
+ },
495
+ "total_flos": 6.630103716413751e+18,
496
+ "train_batch_size": 4,
497
+ "trial_name": null,
498
+ "trial_params": null
499
+ }