ibnbd commited on
Commit
a85938a
·
verified ·
1 Parent(s): 8bdcfb0

Upload folder using huggingface_hub

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
config.json CHANGED
@@ -43,7 +43,7 @@
43
  "scale_embedding": false,
44
  "task": "transcribe",
45
  "torch_dtype": "float32",
46
- "transformers_version": "4.53.3",
47
  "use_cache": false,
48
  "use_weighted_layer_sum": false,
49
  "vocab_size": 51866
 
43
  "scale_embedding": false,
44
  "task": "transcribe",
45
  "torch_dtype": "float32",
46
+ "transformers_version": "4.54.0",
47
  "use_cache": false,
48
  "use_weighted_layer_sum": false,
49
  "vocab_size": 51866
generation_config.json CHANGED
@@ -247,6 +247,6 @@
247
  "transcribe": 50360,
248
  "translate": 50359
249
  },
250
- "transformers_version": "4.53.3",
251
  "use_cache": false
252
  }
 
247
  "transcribe": 50360,
248
  "translate": 50359
249
  },
250
+ "transformers_version": "4.54.0",
251
  "use_cache": false
252
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a25a3c539d1fae4eb5f2793fd614b4895804b5b9ef60dcbda634350bad1f3771
3
  size 3235581408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86b46c1643177ac43c7cee4227284e161af1fead0db26ca6160f4f9ac071731c
3
  size 3235581408
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7db07e6b8a1f18ca3c4fc22fc7bddf31df0b16f50a0c280f4b9099acce2d5b34
3
+ size 1375357387
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4b60c94b0982aceab90e3687055c9f5adce5e7bf5c684b99a918022ee83c7c4
3
+ size 14645
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4b5f405af12756483019edea5c184ab91054cbaa03d9d45c3675b74e3fc8557
3
+ size 1465
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
trainer_state.json ADDED
@@ -0,0 +1,493 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 3000,
3
+ "best_metric": 0.7510406970977783,
4
+ "best_model_checkpoint": "whisper-turbo-oliver/checkpoint-3000",
5
+ "epoch": 2.3041474654377883,
6
+ "eval_steps": 1000,
7
+ "global_step": 3000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.03840245775729647,
14
+ "grad_norm": 9.333383560180664,
15
+ "learning_rate": 2.4500000000000003e-06,
16
+ "loss": 2.084,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.07680491551459294,
21
+ "grad_norm": 1.5071637630462646,
22
+ "learning_rate": 4.950000000000001e-06,
23
+ "loss": 0.9651,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.1152073732718894,
28
+ "grad_norm": 1.1114453077316284,
29
+ "learning_rate": 7.45e-06,
30
+ "loss": 0.8516,
31
+ "step": 150
32
+ },
33
+ {
34
+ "epoch": 0.15360983102918588,
35
+ "grad_norm": 1.903541922569275,
36
+ "learning_rate": 9.950000000000001e-06,
37
+ "loss": 0.8221,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 0.19201228878648233,
42
+ "grad_norm": 1.2109239101409912,
43
+ "learning_rate": 1.2450000000000001e-05,
44
+ "loss": 0.8073,
45
+ "step": 250
46
+ },
47
+ {
48
+ "epoch": 0.2304147465437788,
49
+ "grad_norm": 1.016984224319458,
50
+ "learning_rate": 1.4950000000000001e-05,
51
+ "loss": 0.7959,
52
+ "step": 300
53
+ },
54
+ {
55
+ "epoch": 0.26881720430107525,
56
+ "grad_norm": 1.1783643960952759,
57
+ "learning_rate": 1.745e-05,
58
+ "loss": 0.7902,
59
+ "step": 350
60
+ },
61
+ {
62
+ "epoch": 0.30721966205837176,
63
+ "grad_norm": 0.6817651391029358,
64
+ "learning_rate": 1.995e-05,
65
+ "loss": 0.7854,
66
+ "step": 400
67
+ },
68
+ {
69
+ "epoch": 0.3456221198156682,
70
+ "grad_norm": 0.7436923980712891,
71
+ "learning_rate": 2.245e-05,
72
+ "loss": 0.7823,
73
+ "step": 450
74
+ },
75
+ {
76
+ "epoch": 0.38402457757296465,
77
+ "grad_norm": 0.6642886996269226,
78
+ "learning_rate": 2.495e-05,
79
+ "loss": 0.7775,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 0.42242703533026116,
84
+ "grad_norm": 0.831900954246521,
85
+ "learning_rate": 2.7450000000000003e-05,
86
+ "loss": 0.7761,
87
+ "step": 550
88
+ },
89
+ {
90
+ "epoch": 0.4608294930875576,
91
+ "grad_norm": 0.5995694994926453,
92
+ "learning_rate": 2.995e-05,
93
+ "loss": 0.7752,
94
+ "step": 600
95
+ },
96
+ {
97
+ "epoch": 0.49923195084485406,
98
+ "grad_norm": 0.9126259684562683,
99
+ "learning_rate": 3.245e-05,
100
+ "loss": 0.7747,
101
+ "step": 650
102
+ },
103
+ {
104
+ "epoch": 0.5376344086021505,
105
+ "grad_norm": 0.6514723300933838,
106
+ "learning_rate": 3.495e-05,
107
+ "loss": 0.7724,
108
+ "step": 700
109
+ },
110
+ {
111
+ "epoch": 0.576036866359447,
112
+ "grad_norm": 1.5725030899047852,
113
+ "learning_rate": 3.745e-05,
114
+ "loss": 0.7697,
115
+ "step": 750
116
+ },
117
+ {
118
+ "epoch": 0.6144393241167435,
119
+ "grad_norm": 0.5431535840034485,
120
+ "learning_rate": 3.995e-05,
121
+ "loss": 0.7688,
122
+ "step": 800
123
+ },
124
+ {
125
+ "epoch": 0.65284178187404,
126
+ "grad_norm": 1.0956439971923828,
127
+ "learning_rate": 4.245e-05,
128
+ "loss": 0.7692,
129
+ "step": 850
130
+ },
131
+ {
132
+ "epoch": 0.6912442396313364,
133
+ "grad_norm": 0.5050705671310425,
134
+ "learning_rate": 4.495e-05,
135
+ "loss": 0.77,
136
+ "step": 900
137
+ },
138
+ {
139
+ "epoch": 0.7296466973886329,
140
+ "grad_norm": 0.7762904763221741,
141
+ "learning_rate": 4.745e-05,
142
+ "loss": 0.7673,
143
+ "step": 950
144
+ },
145
+ {
146
+ "epoch": 0.7680491551459293,
147
+ "grad_norm": 1.0077502727508545,
148
+ "learning_rate": 4.995e-05,
149
+ "loss": 0.7676,
150
+ "step": 1000
151
+ },
152
+ {
153
+ "epoch": 0.7680491551459293,
154
+ "eval_cer": 0.00500196505770124,
155
+ "eval_loss": 0.7688098549842834,
156
+ "eval_runtime": 599.3052,
157
+ "eval_samples_per_second": 2.835,
158
+ "eval_steps_per_second": 0.045,
159
+ "eval_wer": 0.012253144756566513,
160
+ "step": 1000
161
+ },
162
+ {
163
+ "epoch": 0.8064516129032258,
164
+ "grad_norm": 0.40111133456230164,
165
+ "learning_rate": 4.9997949843345384e-05,
166
+ "loss": 0.7684,
167
+ "step": 1050
168
+ },
169
+ {
170
+ "epoch": 0.8448540706605223,
171
+ "grad_norm": 0.8781216144561768,
172
+ "learning_rate": 4.999163151231201e-05,
173
+ "loss": 0.7673,
174
+ "step": 1100
175
+ },
176
+ {
177
+ "epoch": 0.8832565284178188,
178
+ "grad_norm": 0.8268694281578064,
179
+ "learning_rate": 4.998104523202588e-05,
180
+ "loss": 0.765,
181
+ "step": 1150
182
+ },
183
+ {
184
+ "epoch": 0.9216589861751152,
185
+ "grad_norm": 0.518210232257843,
186
+ "learning_rate": 4.996619281036046e-05,
187
+ "loss": 0.764,
188
+ "step": 1200
189
+ },
190
+ {
191
+ "epoch": 0.9600614439324117,
192
+ "grad_norm": 0.25512072443962097,
193
+ "learning_rate": 4.9947076783740046e-05,
194
+ "loss": 0.7625,
195
+ "step": 1250
196
+ },
197
+ {
198
+ "epoch": 0.9984639016897081,
199
+ "grad_norm": 0.4513356387615204,
200
+ "learning_rate": 4.9923700416706686e-05,
201
+ "loss": 0.7609,
202
+ "step": 1300
203
+ },
204
+ {
205
+ "epoch": 1.0368663594470047,
206
+ "grad_norm": 0.2668992877006531,
207
+ "learning_rate": 4.989606770136262e-05,
208
+ "loss": 0.7583,
209
+ "step": 1350
210
+ },
211
+ {
212
+ "epoch": 1.075268817204301,
213
+ "grad_norm": 0.28391626477241516,
214
+ "learning_rate": 4.986418335668855e-05,
215
+ "loss": 0.7562,
216
+ "step": 1400
217
+ },
218
+ {
219
+ "epoch": 1.1136712749615976,
220
+ "grad_norm": 0.5757988691329956,
221
+ "learning_rate": 4.982805282773775e-05,
222
+ "loss": 0.7565,
223
+ "step": 1450
224
+ },
225
+ {
226
+ "epoch": 1.1520737327188941,
227
+ "grad_norm": 0.3012969195842743,
228
+ "learning_rate": 4.978768228470618e-05,
229
+ "loss": 0.756,
230
+ "step": 1500
231
+ },
232
+ {
233
+ "epoch": 1.1904761904761905,
234
+ "grad_norm": 0.366794228553772,
235
+ "learning_rate": 4.974307862187881e-05,
236
+ "loss": 0.7566,
237
+ "step": 1550
238
+ },
239
+ {
240
+ "epoch": 1.228878648233487,
241
+ "grad_norm": 0.4574221968650818,
242
+ "learning_rate": 4.969424945645218e-05,
243
+ "loss": 0.7567,
244
+ "step": 1600
245
+ },
246
+ {
247
+ "epoch": 1.2672811059907834,
248
+ "grad_norm": 0.507939338684082,
249
+ "learning_rate": 4.964120312723362e-05,
250
+ "loss": 0.7575,
251
+ "step": 1650
252
+ },
253
+ {
254
+ "epoch": 1.30568356374808,
255
+ "grad_norm": 0.23039250075817108,
256
+ "learning_rate": 4.958394869321719e-05,
257
+ "loss": 0.7574,
258
+ "step": 1700
259
+ },
260
+ {
261
+ "epoch": 1.3440860215053765,
262
+ "grad_norm": 0.5227251648902893,
263
+ "learning_rate": 4.952249593203659e-05,
264
+ "loss": 0.7557,
265
+ "step": 1750
266
+ },
267
+ {
268
+ "epoch": 1.3824884792626728,
269
+ "grad_norm": 0.5544179081916809,
270
+ "learning_rate": 4.945685533829544e-05,
271
+ "loss": 0.7547,
272
+ "step": 1800
273
+ },
274
+ {
275
+ "epoch": 1.4208909370199692,
276
+ "grad_norm": 0.29488006234169006,
277
+ "learning_rate": 4.938703812177501e-05,
278
+ "loss": 0.7557,
279
+ "step": 1850
280
+ },
281
+ {
282
+ "epoch": 1.4592933947772657,
283
+ "grad_norm": 0.593917191028595,
284
+ "learning_rate": 4.9313056205519894e-05,
285
+ "loss": 0.756,
286
+ "step": 1900
287
+ },
288
+ {
289
+ "epoch": 1.4976958525345623,
290
+ "grad_norm": 0.2776937782764435,
291
+ "learning_rate": 4.923492222380186e-05,
292
+ "loss": 0.7564,
293
+ "step": 1950
294
+ },
295
+ {
296
+ "epoch": 1.5360983102918588,
297
+ "grad_norm": 0.4567047357559204,
298
+ "learning_rate": 4.915264951996219e-05,
299
+ "loss": 0.7549,
300
+ "step": 2000
301
+ },
302
+ {
303
+ "epoch": 1.5360983102918588,
304
+ "eval_cer": 0.003501375540390868,
305
+ "eval_loss": 0.7553083896636963,
306
+ "eval_runtime": 597.0408,
307
+ "eval_samples_per_second": 2.846,
308
+ "eval_steps_per_second": 0.045,
309
+ "eval_wer": 0.008212214038975428,
310
+ "step": 2000
311
+ },
312
+ {
313
+ "epoch": 1.5745007680491552,
314
+ "grad_norm": 0.3459112048149109,
315
+ "learning_rate": 4.906625214413303e-05,
316
+ "loss": 0.7549,
317
+ "step": 2050
318
+ },
319
+ {
320
+ "epoch": 1.6129032258064515,
321
+ "grad_norm": 0.8464981913566589,
322
+ "learning_rate": 4.897574485083792e-05,
323
+ "loss": 0.7554,
324
+ "step": 2100
325
+ },
326
+ {
327
+ "epoch": 1.651305683563748,
328
+ "grad_norm": 0.11875268071889877,
329
+ "learning_rate": 4.888114309647211e-05,
330
+ "loss": 0.754,
331
+ "step": 2150
332
+ },
333
+ {
334
+ "epoch": 1.6897081413210446,
335
+ "grad_norm": 0.21423658728599548,
336
+ "learning_rate": 4.878246303666302e-05,
337
+ "loss": 0.7549,
338
+ "step": 2200
339
+ },
340
+ {
341
+ "epoch": 1.728110599078341,
342
+ "grad_norm": 0.3542362451553345,
343
+ "learning_rate": 4.86797215235112e-05,
344
+ "loss": 0.7544,
345
+ "step": 2250
346
+ },
347
+ {
348
+ "epoch": 1.7665130568356375,
349
+ "grad_norm": 0.6518175005912781,
350
+ "learning_rate": 4.8572936102712464e-05,
351
+ "loss": 0.755,
352
+ "step": 2300
353
+ },
354
+ {
355
+ "epoch": 1.8049155145929339,
356
+ "grad_norm": 0.37601879239082336,
357
+ "learning_rate": 4.846212501056149e-05,
358
+ "loss": 0.7546,
359
+ "step": 2350
360
+ },
361
+ {
362
+ "epoch": 1.8433179723502304,
363
+ "grad_norm": 0.8121321201324463,
364
+ "learning_rate": 4.834730717083754e-05,
365
+ "loss": 0.7559,
366
+ "step": 2400
367
+ },
368
+ {
369
+ "epoch": 1.881720430107527,
370
+ "grad_norm": 0.33677735924720764,
371
+ "learning_rate": 4.822850219157272e-05,
372
+ "loss": 0.7552,
373
+ "step": 2450
374
+ },
375
+ {
376
+ "epoch": 1.9201228878648233,
377
+ "grad_norm": 0.30549371242523193,
378
+ "learning_rate": 4.810573036170345e-05,
379
+ "loss": 0.7538,
380
+ "step": 2500
381
+ },
382
+ {
383
+ "epoch": 1.9585253456221197,
384
+ "grad_norm": 0.39071545004844666,
385
+ "learning_rate": 4.797901264760557e-05,
386
+ "loss": 0.7535,
387
+ "step": 2550
388
+ },
389
+ {
390
+ "epoch": 1.9969278033794162,
391
+ "grad_norm": 0.28000298142433167,
392
+ "learning_rate": 4.784837068951387e-05,
393
+ "loss": 0.7533,
394
+ "step": 2600
395
+ },
396
+ {
397
+ "epoch": 2.035330261136713,
398
+ "grad_norm": 0.21816645562648773,
399
+ "learning_rate": 4.7713826797826386e-05,
400
+ "loss": 0.751,
401
+ "step": 2650
402
+ },
403
+ {
404
+ "epoch": 2.0737327188940093,
405
+ "grad_norm": 0.21055564284324646,
406
+ "learning_rate": 4.7575403949294456e-05,
407
+ "loss": 0.7515,
408
+ "step": 2700
409
+ },
410
+ {
411
+ "epoch": 2.1121351766513055,
412
+ "grad_norm": 0.4243466556072235,
413
+ "learning_rate": 4.743312578309875e-05,
414
+ "loss": 0.7511,
415
+ "step": 2750
416
+ },
417
+ {
418
+ "epoch": 2.150537634408602,
419
+ "grad_norm": 0.1919689178466797,
420
+ "learning_rate": 4.7287016596812354e-05,
421
+ "loss": 0.75,
422
+ "step": 2800
423
+ },
424
+ {
425
+ "epoch": 2.1889400921658986,
426
+ "grad_norm": 0.16988199949264526,
427
+ "learning_rate": 4.713710134225132e-05,
428
+ "loss": 0.7504,
429
+ "step": 2850
430
+ },
431
+ {
432
+ "epoch": 2.227342549923195,
433
+ "grad_norm": 0.21615995466709137,
434
+ "learning_rate": 4.698340562121354e-05,
435
+ "loss": 0.75,
436
+ "step": 2900
437
+ },
438
+ {
439
+ "epoch": 2.2657450076804917,
440
+ "grad_norm": 0.15211135149002075,
441
+ "learning_rate": 4.682595568110655e-05,
442
+ "loss": 0.7499,
443
+ "step": 2950
444
+ },
445
+ {
446
+ "epoch": 2.3041474654377883,
447
+ "grad_norm": 0.17576761543750763,
448
+ "learning_rate": 4.6664778410465194e-05,
449
+ "loss": 0.7496,
450
+ "step": 3000
451
+ },
452
+ {
453
+ "epoch": 2.3041474654377883,
454
+ "eval_cer": 0.0034299188967094217,
455
+ "eval_loss": 0.7510406970977783,
456
+ "eval_runtime": 597.6926,
457
+ "eval_samples_per_second": 2.843,
458
+ "eval_steps_per_second": 0.045,
459
+ "eval_wer": 0.007951508831388907,
460
+ "step": 3000
461
+ }
462
+ ],
463
+ "logging_steps": 50,
464
+ "max_steps": 13020,
465
+ "num_input_tokens_seen": 0,
466
+ "num_train_epochs": 10,
467
+ "save_steps": 1000,
468
+ "stateful_callbacks": {
469
+ "EarlyStoppingCallback": {
470
+ "args": {
471
+ "early_stopping_patience": 10,
472
+ "early_stopping_threshold": 0.001
473
+ },
474
+ "attributes": {
475
+ "early_stopping_patience_counter": 0
476
+ }
477
+ },
478
+ "TrainerControl": {
479
+ "args": {
480
+ "should_epoch_stop": false,
481
+ "should_evaluate": false,
482
+ "should_log": false,
483
+ "should_save": true,
484
+ "should_training_stop": false
485
+ },
486
+ "attributes": {}
487
+ }
488
+ },
489
+ "total_flos": 6.54506878182359e+20,
490
+ "train_batch_size": 64,
491
+ "trial_name": null,
492
+ "trial_params": null
493
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f4dbf9ed7fa0e275be5d3b4b8168d7d5abb7913b855c67a0016c884b482c4fb
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7abc171d9a2eb02755d24fd8f922bffab8345d133f4d98839e62e96887ddf1f
3
  size 5905
training_metrics.json CHANGED
@@ -1,1060 +1,1060 @@
1
  [
2
  {
3
- "loss": 3.5837,
4
- "grad_norm": 31.927167892456055,
5
  "learning_rate": 4.800000000000001e-07,
6
  "epoch": 0.009600614439324117,
7
  "step": 25
8
  },
9
  {
10
- "loss": 2.9494,
11
- "grad_norm": 13.517321586608887,
12
  "learning_rate": 9.800000000000001e-07,
13
  "epoch": 0.019201228878648235,
14
  "step": 50
15
  },
16
  {
17
- "loss": 2.2297,
18
- "grad_norm": 11.44046401977539,
19
  "learning_rate": 1.48e-06,
20
  "epoch": 0.02880184331797235,
21
  "step": 75
22
  },
23
  {
24
- "loss": 1.7016,
25
- "grad_norm": 2.807133674621582,
26
  "learning_rate": 1.98e-06,
27
  "epoch": 0.03840245775729647,
28
  "step": 100
29
  },
30
  {
31
- "loss": 1.5778,
32
- "grad_norm": 1.8624107837677002,
33
  "learning_rate": 2.4800000000000004e-06,
34
  "epoch": 0.04800307219662058,
35
  "step": 125
36
  },
37
  {
38
- "loss": 1.5518,
39
- "grad_norm": 1.7535284757614136,
40
  "learning_rate": 2.9800000000000003e-06,
41
  "epoch": 0.0576036866359447,
42
  "step": 150
43
  },
44
  {
45
- "loss": 1.5253,
46
- "grad_norm": 1.7349804639816284,
47
  "learning_rate": 3.48e-06,
48
  "epoch": 0.06720430107526881,
49
  "step": 175
50
  },
51
  {
52
- "loss": 1.5081,
53
- "grad_norm": 1.990842580795288,
54
  "learning_rate": 3.980000000000001e-06,
55
  "epoch": 0.07680491551459294,
56
  "step": 200
57
  },
58
  {
59
- "loss": 1.5003,
60
- "grad_norm": 1.1042917966842651,
61
  "learning_rate": 4.48e-06,
62
  "epoch": 0.08640552995391705,
63
  "step": 225
64
  },
65
  {
66
- "loss": 1.4981,
67
- "grad_norm": 1.5932936668395996,
68
  "learning_rate": 4.980000000000001e-06,
69
  "epoch": 0.09600614439324116,
70
  "step": 250
71
  },
72
  {
73
- "loss": 1.4839,
74
- "grad_norm": 1.271634578704834,
75
  "learning_rate": 5.480000000000001e-06,
76
  "epoch": 0.10560675883256529,
77
  "step": 275
78
  },
79
  {
80
- "loss": 1.4802,
81
- "grad_norm": 1.1124892234802246,
82
  "learning_rate": 5.98e-06,
83
  "epoch": 0.1152073732718894,
84
  "step": 300
85
  },
86
  {
87
- "loss": 1.4779,
88
- "grad_norm": 1.7024626731872559,
89
  "learning_rate": 6.480000000000001e-06,
90
  "epoch": 0.12480798771121351,
91
  "step": 325
92
  },
93
  {
94
- "loss": 1.4741,
95
- "grad_norm": 1.404011607170105,
96
  "learning_rate": 6.98e-06,
97
  "epoch": 0.13440860215053763,
98
  "step": 350
99
  },
100
  {
101
- "loss": 1.4686,
102
- "grad_norm": 1.750034213066101,
103
  "learning_rate": 7.48e-06,
104
  "epoch": 0.14400921658986174,
105
  "step": 375
106
  },
107
  {
108
- "loss": 1.4683,
109
- "grad_norm": 0.7955018877983093,
110
  "learning_rate": 7.980000000000002e-06,
111
  "epoch": 0.15360983102918588,
112
  "step": 400
113
  },
114
  {
115
- "loss": 1.463,
116
- "grad_norm": 1.526512622833252,
117
  "learning_rate": 8.48e-06,
118
  "epoch": 0.16321044546851,
119
  "step": 425
120
  },
121
  {
122
- "loss": 1.4609,
123
- "grad_norm": 0.947767436504364,
124
  "learning_rate": 8.98e-06,
125
  "epoch": 0.1728110599078341,
126
  "step": 450
127
  },
128
  {
129
- "loss": 1.4615,
130
- "grad_norm": 1.0896203517913818,
131
  "learning_rate": 9.48e-06,
132
  "epoch": 0.18241167434715821,
133
  "step": 475
134
  },
135
  {
136
- "loss": 1.4612,
137
- "grad_norm": 0.834524929523468,
138
  "learning_rate": 9.980000000000001e-06,
139
  "epoch": 0.19201228878648233,
140
  "step": 500
141
  },
142
  {
143
- "eval_loss": 1.4578877687454224,
144
- "eval_wer": 0.019150391367125163,
145
- "eval_cer": 0.005926353711189946,
146
- "eval_runtime": 180.69,
147
- "eval_samples_per_second": 9.408,
148
- "eval_steps_per_second": 0.149,
149
  "epoch": 0.19201228878648233,
150
  "step": 500
151
  },
152
  {
153
- "loss": 1.4572,
154
- "grad_norm": 1.6100836992263794,
155
  "learning_rate": 9.990602975724355e-06,
156
  "epoch": 0.20161290322580644,
157
  "step": 525
158
  },
159
  {
160
- "loss": 1.4557,
161
- "grad_norm": 1.5937349796295166,
162
  "learning_rate": 9.980814408770558e-06,
163
  "epoch": 0.21121351766513058,
164
  "step": 550
165
  },
166
  {
167
- "loss": 1.4566,
168
- "grad_norm": 0.7914408445358276,
169
  "learning_rate": 9.971025841816759e-06,
170
  "epoch": 0.2208141321044547,
171
  "step": 575
172
  },
173
  {
174
- "loss": 1.4515,
175
- "grad_norm": 0.9205504059791565,
176
  "learning_rate": 9.96123727486296e-06,
177
  "epoch": 0.2304147465437788,
178
  "step": 600
179
  },
180
  {
181
- "loss": 1.4522,
182
- "grad_norm": 0.724983811378479,
183
  "learning_rate": 9.951448707909162e-06,
184
  "epoch": 0.24001536098310292,
185
  "step": 625
186
  },
187
  {
188
- "loss": 1.4504,
189
- "grad_norm": 0.6097744107246399,
190
  "learning_rate": 9.941660140955365e-06,
191
  "epoch": 0.24961597542242703,
192
  "step": 650
193
  },
194
  {
195
- "loss": 1.4507,
196
- "grad_norm": 0.7213621139526367,
197
  "learning_rate": 9.931871574001566e-06,
198
  "epoch": 0.25921658986175117,
199
  "step": 675
200
  },
201
  {
202
- "loss": 1.4479,
203
- "grad_norm": 0.8606914281845093,
204
  "learning_rate": 9.92208300704777e-06,
205
  "epoch": 0.26881720430107525,
206
  "step": 700
207
  },
208
  {
209
- "loss": 1.4465,
210
- "grad_norm": 0.7317363619804382,
211
  "learning_rate": 9.91229444009397e-06,
212
  "epoch": 0.2784178187403994,
213
  "step": 725
214
  },
215
  {
216
- "loss": 1.4436,
217
- "grad_norm": 0.7534998059272766,
218
  "learning_rate": 9.902505873140174e-06,
219
  "epoch": 0.2880184331797235,
220
  "step": 750
221
  },
222
  {
223
- "loss": 1.4461,
224
- "grad_norm": 0.6188538074493408,
225
  "learning_rate": 9.892717306186375e-06,
226
  "epoch": 0.2976190476190476,
227
  "step": 775
228
  },
229
  {
230
- "loss": 1.4422,
231
- "grad_norm": 0.6928197145462036,
232
  "learning_rate": 9.882928739232577e-06,
233
  "epoch": 0.30721966205837176,
234
  "step": 800
235
  },
236
  {
237
- "loss": 1.4439,
238
- "grad_norm": 0.9215940833091736,
239
  "learning_rate": 9.87314017227878e-06,
240
  "epoch": 0.31682027649769584,
241
  "step": 825
242
  },
243
  {
244
- "loss": 1.4443,
245
- "grad_norm": 0.9667465090751648,
246
  "learning_rate": 9.863351605324981e-06,
247
  "epoch": 0.32642089093702,
248
  "step": 850
249
  },
250
  {
251
- "loss": 1.4424,
252
- "grad_norm": 0.7931649088859558,
253
  "learning_rate": 9.853563038371182e-06,
254
  "epoch": 0.33602150537634407,
255
  "step": 875
256
  },
257
  {
258
- "loss": 1.4402,
259
- "grad_norm": 0.45881208777427673,
260
  "learning_rate": 9.843774471417386e-06,
261
  "epoch": 0.3456221198156682,
262
  "step": 900
263
  },
264
  {
265
- "loss": 1.4425,
266
- "grad_norm": 2.3223390579223633,
267
  "learning_rate": 9.833985904463587e-06,
268
  "epoch": 0.35522273425499235,
269
  "step": 925
270
  },
271
  {
272
- "loss": 1.4409,
273
- "grad_norm": 0.7167399525642395,
274
  "learning_rate": 9.82419733750979e-06,
275
  "epoch": 0.36482334869431643,
276
  "step": 950
277
  },
278
  {
279
- "loss": 1.4385,
280
- "grad_norm": 0.45477813482284546,
281
  "learning_rate": 9.814408770555991e-06,
282
  "epoch": 0.37442396313364057,
283
  "step": 975
284
  },
285
  {
286
- "loss": 1.4399,
287
- "grad_norm": 0.6906208992004395,
288
  "learning_rate": 9.804620203602193e-06,
289
  "epoch": 0.38402457757296465,
290
  "step": 1000
291
  },
292
  {
293
- "eval_loss": 1.4414963722229004,
294
- "eval_wer": 0.006839425488258986,
295
- "eval_cer": 0.0029700519759095784,
296
- "eval_runtime": 180.7728,
297
- "eval_samples_per_second": 9.404,
298
- "eval_steps_per_second": 0.149,
299
  "epoch": 0.38402457757296465,
300
  "step": 1000
301
  },
302
  {
303
- "loss": 1.4404,
304
- "grad_norm": 0.612010657787323,
305
  "learning_rate": 9.794831636648396e-06,
306
  "epoch": 0.3936251920122888,
307
  "step": 1025
308
  },
309
  {
310
- "loss": 1.437,
311
- "grad_norm": 0.6493867635726929,
312
  "learning_rate": 9.785043069694597e-06,
313
  "epoch": 0.4032258064516129,
314
  "step": 1050
315
  },
316
  {
317
- "loss": 1.4372,
318
- "grad_norm": 0.5084114074707031,
319
  "learning_rate": 9.7752545027408e-06,
320
  "epoch": 0.412826420890937,
321
  "step": 1075
322
  },
323
  {
324
- "loss": 1.4365,
325
- "grad_norm": 0.32950568199157715,
326
  "learning_rate": 9.765465935787002e-06,
327
  "epoch": 0.42242703533026116,
328
  "step": 1100
329
  },
330
  {
331
- "loss": 1.4378,
332
- "grad_norm": 0.5013596415519714,
333
  "learning_rate": 9.755677368833205e-06,
334
  "epoch": 0.43202764976958524,
335
  "step": 1125
336
  },
337
  {
338
- "loss": 1.4356,
339
- "grad_norm": 0.5923940539360046,
340
  "learning_rate": 9.745888801879405e-06,
341
  "epoch": 0.4416282642089094,
342
  "step": 1150
343
  },
344
  {
345
- "loss": 1.4365,
346
- "grad_norm": 0.7093729972839355,
347
  "learning_rate": 9.736100234925608e-06,
348
  "epoch": 0.45122887864823347,
349
  "step": 1175
350
  },
351
  {
352
- "loss": 1.4339,
353
- "grad_norm": 0.45401209592819214,
354
  "learning_rate": 9.726311667971809e-06,
355
  "epoch": 0.4608294930875576,
356
  "step": 1200
357
  },
358
  {
359
- "loss": 1.4365,
360
- "grad_norm": 0.8377461433410645,
361
  "learning_rate": 9.716523101018012e-06,
362
  "epoch": 0.47043010752688175,
363
  "step": 1225
364
  },
365
  {
366
- "loss": 1.4354,
367
- "grad_norm": 0.5953696370124817,
368
  "learning_rate": 9.706734534064213e-06,
369
  "epoch": 0.48003072196620583,
370
  "step": 1250
371
  },
372
  {
373
- "loss": 1.4356,
374
- "grad_norm": 0.464432030916214,
375
  "learning_rate": 9.696945967110417e-06,
376
  "epoch": 0.48963133640552997,
377
  "step": 1275
378
  },
379
  {
380
- "loss": 1.4326,
381
- "grad_norm": 0.6318503022193909,
382
  "learning_rate": 9.687157400156618e-06,
383
  "epoch": 0.49923195084485406,
384
  "step": 1300
385
  },
386
  {
387
- "loss": 1.434,
388
- "grad_norm": 1.390724778175354,
389
  "learning_rate": 9.677368833202821e-06,
390
  "epoch": 0.5088325652841782,
391
  "step": 1325
392
  },
393
  {
394
- "loss": 1.4337,
395
- "grad_norm": 0.4549782872200012,
396
  "learning_rate": 9.66758026624902e-06,
397
  "epoch": 0.5184331797235023,
398
  "step": 1350
399
  },
400
  {
401
- "loss": 1.4345,
402
- "grad_norm": 0.607494592666626,
403
  "learning_rate": 9.657791699295224e-06,
404
  "epoch": 0.5280337941628265,
405
  "step": 1375
406
  },
407
  {
408
- "loss": 1.4345,
409
- "grad_norm": 0.49749523401260376,
410
  "learning_rate": 9.648003132341425e-06,
411
  "epoch": 0.5376344086021505,
412
  "step": 1400
413
  },
414
  {
415
- "loss": 1.4324,
416
- "grad_norm": 0.5609452724456787,
417
  "learning_rate": 9.638214565387628e-06,
418
  "epoch": 0.5472350230414746,
419
  "step": 1425
420
  },
421
  {
422
- "loss": 1.4338,
423
- "grad_norm": 0.7919191718101501,
424
  "learning_rate": 9.62842599843383e-06,
425
  "epoch": 0.5568356374807988,
426
  "step": 1450
427
  },
428
  {
429
- "loss": 1.435,
430
- "grad_norm": 0.4627026617527008,
431
  "learning_rate": 9.618637431480033e-06,
432
  "epoch": 0.5664362519201229,
433
  "step": 1475
434
  },
435
  {
436
- "loss": 1.4338,
437
- "grad_norm": 0.7627348303794861,
438
  "learning_rate": 9.608848864526234e-06,
439
  "epoch": 0.576036866359447,
440
  "step": 1500
441
  },
442
  {
443
- "eval_loss": 1.4339057207107544,
444
- "eval_wer": 0.005319553157534767,
445
- "eval_cer": 0.002406292110111927,
446
- "eval_runtime": 180.3166,
447
- "eval_samples_per_second": 9.428,
448
- "eval_steps_per_second": 0.15,
449
  "epoch": 0.576036866359447,
450
  "step": 1500
451
  },
452
  {
453
- "loss": 1.432,
454
- "grad_norm": 0.4532851576805115,
455
  "learning_rate": 9.599060297572435e-06,
456
  "epoch": 0.5856374807987711,
457
  "step": 1525
458
  },
459
  {
460
- "loss": 1.4332,
461
- "grad_norm": 0.40782368183135986,
462
  "learning_rate": 9.589271730618639e-06,
463
  "epoch": 0.5952380952380952,
464
  "step": 1550
465
  },
466
  {
467
- "loss": 1.4322,
468
- "grad_norm": 5.594751358032227,
469
  "learning_rate": 9.57948316366484e-06,
470
  "epoch": 0.6048387096774194,
471
  "step": 1575
472
  },
473
  {
474
- "loss": 1.4314,
475
- "grad_norm": 0.521115243434906,
476
  "learning_rate": 9.569694596711043e-06,
477
  "epoch": 0.6144393241167435,
478
  "step": 1600
479
  },
480
  {
481
- "loss": 1.432,
482
- "grad_norm": 0.9263116717338562,
483
  "learning_rate": 9.559906029757244e-06,
484
  "epoch": 0.6240399385560675,
485
  "step": 1625
486
  },
487
  {
488
- "loss": 1.4312,
489
- "grad_norm": 0.483038991689682,
490
  "learning_rate": 9.550117462803446e-06,
491
  "epoch": 0.6336405529953917,
492
  "step": 1650
493
  },
494
  {
495
- "loss": 1.4317,
496
- "grad_norm": 0.5169259905815125,
497
  "learning_rate": 9.540328895849649e-06,
498
  "epoch": 0.6432411674347158,
499
  "step": 1675
500
  },
501
  {
502
- "loss": 1.4319,
503
- "grad_norm": 0.6301758885383606,
504
  "learning_rate": 9.53054032889585e-06,
505
  "epoch": 0.65284178187404,
506
  "step": 1700
507
  },
508
  {
509
- "loss": 1.4309,
510
- "grad_norm": 0.43223053216934204,
511
  "learning_rate": 9.520751761942052e-06,
512
  "epoch": 0.6624423963133641,
513
  "step": 1725
514
  },
515
  {
516
- "loss": 1.4302,
517
- "grad_norm": 0.42416876554489136,
518
  "learning_rate": 9.510963194988255e-06,
519
  "epoch": 0.6720430107526881,
520
  "step": 1750
521
  },
522
  {
523
- "loss": 1.4305,
524
- "grad_norm": 0.3732769787311554,
525
  "learning_rate": 9.501174628034456e-06,
526
  "epoch": 0.6816436251920123,
527
  "step": 1775
528
  },
529
  {
530
- "loss": 1.4293,
531
- "grad_norm": 0.3774888813495636,
532
  "learning_rate": 9.49138606108066e-06,
533
  "epoch": 0.6912442396313364,
534
  "step": 1800
535
  },
536
  {
537
- "loss": 1.4295,
538
- "grad_norm": 0.34194567799568176,
539
  "learning_rate": 9.48159749412686e-06,
540
  "epoch": 0.7008448540706606,
541
  "step": 1825
542
  },
543
  {
544
- "loss": 1.4285,
545
- "grad_norm": 0.40550151467323303,
546
  "learning_rate": 9.471808927173064e-06,
547
  "epoch": 0.7104454685099847,
548
  "step": 1850
549
  },
550
  {
551
- "loss": 1.4283,
552
- "grad_norm": 0.46141359210014343,
553
  "learning_rate": 9.462020360219265e-06,
554
  "epoch": 0.7200460829493087,
555
  "step": 1875
556
  },
557
  {
558
- "loss": 1.4275,
559
- "grad_norm": 0.3962096571922302,
560
  "learning_rate": 9.452231793265466e-06,
561
  "epoch": 0.7296466973886329,
562
  "step": 1900
563
  },
564
  {
565
- "loss": 1.4286,
566
- "grad_norm": 0.28840935230255127,
567
  "learning_rate": 9.442443226311668e-06,
568
  "epoch": 0.739247311827957,
569
  "step": 1925
570
  },
571
  {
572
- "loss": 1.4287,
573
- "grad_norm": 1.2259435653686523,
574
  "learning_rate": 9.432654659357871e-06,
575
  "epoch": 0.7488479262672811,
576
  "step": 1950
577
  },
578
  {
579
- "loss": 1.4301,
580
- "grad_norm": 0.49282413721084595,
581
  "learning_rate": 9.422866092404072e-06,
582
  "epoch": 0.7584485407066052,
583
  "step": 1975
584
  },
585
  {
586
- "loss": 1.4298,
587
- "grad_norm": 0.6675013303756714,
588
  "learning_rate": 9.413077525450275e-06,
589
  "epoch": 0.7680491551459293,
590
  "step": 2000
591
  },
592
  {
593
- "eval_loss": 1.4311903715133667,
594
- "eval_wer": 0.006079489322896876,
595
- "eval_cer": 0.002708797403954569,
596
- "eval_runtime": 181.4592,
597
- "eval_samples_per_second": 9.368,
598
- "eval_steps_per_second": 0.149,
599
  "epoch": 0.7680491551459293,
600
  "step": 2000
601
  },
602
  {
603
- "loss": 1.4305,
604
- "grad_norm": 0.7503907084465027,
605
  "learning_rate": 9.403288958496477e-06,
606
  "epoch": 0.7776497695852534,
607
  "step": 2025
608
  },
609
  {
610
- "loss": 1.43,
611
- "grad_norm": 0.40446972846984863,
612
  "learning_rate": 9.39350039154268e-06,
613
  "epoch": 0.7872503840245776,
614
  "step": 2050
615
  },
616
  {
617
- "loss": 1.4286,
618
- "grad_norm": 0.6334962248802185,
619
  "learning_rate": 9.383711824588881e-06,
620
  "epoch": 0.7968509984639017,
621
  "step": 2075
622
  },
623
  {
624
- "loss": 1.4281,
625
- "grad_norm": 0.4710507094860077,
626
  "learning_rate": 9.373923257635083e-06,
627
  "epoch": 0.8064516129032258,
628
  "step": 2100
629
  },
630
  {
631
- "loss": 1.4285,
632
- "grad_norm": 0.47485098242759705,
633
  "learning_rate": 9.364134690681284e-06,
634
  "epoch": 0.8160522273425499,
635
  "step": 2125
636
  },
637
  {
638
- "loss": 1.4271,
639
- "grad_norm": 0.2644312381744385,
640
  "learning_rate": 9.354346123727487e-06,
641
  "epoch": 0.825652841781874,
642
  "step": 2150
643
  },
644
  {
645
- "loss": 1.4286,
646
- "grad_norm": 0.28581735491752625,
647
  "learning_rate": 9.344557556773688e-06,
648
  "epoch": 0.8352534562211982,
649
  "step": 2175
650
  },
651
  {
652
- "loss": 1.4281,
653
- "grad_norm": 0.34762442111968994,
654
  "learning_rate": 9.334768989819891e-06,
655
  "epoch": 0.8448540706605223,
656
  "step": 2200
657
  },
658
  {
659
- "loss": 1.4272,
660
- "grad_norm": 0.4322021007537842,
661
  "learning_rate": 9.324980422866093e-06,
662
  "epoch": 0.8544546850998463,
663
  "step": 2225
664
  },
665
  {
666
- "loss": 1.4265,
667
- "grad_norm": 0.5740979909896851,
668
  "learning_rate": 9.315191855912296e-06,
669
  "epoch": 0.8640552995391705,
670
  "step": 2250
671
  },
672
  {
673
- "loss": 1.4262,
674
- "grad_norm": 0.41522952914237976,
675
  "learning_rate": 9.305403288958497e-06,
676
  "epoch": 0.8736559139784946,
677
  "step": 2275
678
  },
679
  {
680
- "loss": 1.4283,
681
- "grad_norm": 0.6065871715545654,
682
  "learning_rate": 9.295614722004699e-06,
683
  "epoch": 0.8832565284178188,
684
  "step": 2300
685
  },
686
  {
687
- "loss": 1.4266,
688
- "grad_norm": 0.7368581295013428,
689
  "learning_rate": 9.285826155050902e-06,
690
  "epoch": 0.8928571428571429,
691
  "step": 2325
692
  },
693
  {
694
- "loss": 1.4286,
695
- "grad_norm": 0.4163774251937866,
696
  "learning_rate": 9.276037588097103e-06,
697
  "epoch": 0.9024577572964669,
698
  "step": 2350
699
  },
700
  {
701
- "loss": 1.4267,
702
- "grad_norm": 0.5644450187683105,
703
  "learning_rate": 9.266249021143305e-06,
704
  "epoch": 0.9120583717357911,
705
  "step": 2375
706
  },
707
  {
708
- "loss": 1.4274,
709
- "grad_norm": 0.2744387686252594,
710
  "learning_rate": 9.256460454189508e-06,
711
  "epoch": 0.9216589861751152,
712
  "step": 2400
713
  },
714
  {
715
- "loss": 1.4271,
716
- "grad_norm": 0.42348650097846985,
717
  "learning_rate": 9.246671887235709e-06,
718
  "epoch": 0.9312596006144394,
719
  "step": 2425
720
  },
721
  {
722
- "loss": 1.4259,
723
- "grad_norm": 0.4279247522354126,
724
  "learning_rate": 9.236883320281912e-06,
725
  "epoch": 0.9408602150537635,
726
  "step": 2450
727
  },
728
  {
729
- "loss": 1.4259,
730
- "grad_norm": 0.3883613348007202,
731
  "learning_rate": 9.227094753328114e-06,
732
  "epoch": 0.9504608294930875,
733
  "step": 2475
734
  },
735
  {
736
- "loss": 1.4257,
737
- "grad_norm": 0.6730173230171204,
738
  "learning_rate": 9.217306186374315e-06,
739
  "epoch": 0.9600614439324117,
740
  "step": 2500
741
  },
742
  {
743
- "eval_loss": 1.4273574352264404,
744
- "eval_wer": 0.00471160422524508,
745
- "eval_cer": 0.0021862882600445508,
746
- "eval_runtime": 180.2256,
747
- "eval_samples_per_second": 9.433,
748
- "eval_steps_per_second": 0.15,
749
  "epoch": 0.9600614439324117,
750
  "step": 2500
751
  },
752
  {
753
- "loss": 1.4259,
754
- "grad_norm": 0.392243355512619,
755
  "learning_rate": 9.207517619420518e-06,
756
  "epoch": 0.9696620583717358,
757
  "step": 2525
758
  },
759
  {
760
- "loss": 1.4267,
761
- "grad_norm": 0.32680612802505493,
762
  "learning_rate": 9.19772905246672e-06,
763
  "epoch": 0.9792626728110599,
764
  "step": 2550
765
  },
766
  {
767
- "loss": 1.4251,
768
- "grad_norm": 0.4720834195613861,
769
  "learning_rate": 9.187940485512922e-06,
770
  "epoch": 0.988863287250384,
771
  "step": 2575
772
  },
773
  {
774
- "loss": 1.4259,
775
- "grad_norm": 0.5003166794776917,
776
  "learning_rate": 9.178151918559124e-06,
777
  "epoch": 0.9984639016897081,
778
  "step": 2600
779
  },
780
  {
781
- "loss": 1.4236,
782
- "grad_norm": 0.3816595673561096,
783
  "learning_rate": 9.168363351605327e-06,
784
  "epoch": 1.0080645161290323,
785
  "step": 2625
786
  },
787
  {
788
- "loss": 1.424,
789
- "grad_norm": 0.5128536224365234,
790
  "learning_rate": 9.158574784651528e-06,
791
  "epoch": 1.0176651305683564,
792
  "step": 2650
793
  },
794
  {
795
- "loss": 1.4237,
796
- "grad_norm": 0.46583881974220276,
797
  "learning_rate": 9.14878621769773e-06,
798
  "epoch": 1.0272657450076805,
799
  "step": 2675
800
  },
801
  {
802
- "loss": 1.4242,
803
- "grad_norm": 0.5510522127151489,
804
  "learning_rate": 9.138997650743931e-06,
805
  "epoch": 1.0368663594470047,
806
  "step": 2700
807
  },
808
  {
809
- "loss": 1.4256,
810
- "grad_norm": 1.018068790435791,
811
  "learning_rate": 9.129209083790134e-06,
812
  "epoch": 1.0464669738863288,
813
  "step": 2725
814
  },
815
  {
816
- "loss": 1.424,
817
- "grad_norm": 0.2575235664844513,
818
  "learning_rate": 9.119420516836336e-06,
819
  "epoch": 1.0560675883256527,
820
  "step": 2750
821
  },
822
  {
823
- "loss": 1.4247,
824
- "grad_norm": 0.2963893711566925,
825
  "learning_rate": 9.109631949882539e-06,
826
  "epoch": 1.0656682027649769,
827
  "step": 2775
828
  },
829
  {
830
- "loss": 1.4243,
831
- "grad_norm": 0.48700642585754395,
832
  "learning_rate": 9.09984338292874e-06,
833
  "epoch": 1.075268817204301,
834
  "step": 2800
835
  },
836
  {
837
- "loss": 1.4239,
838
- "grad_norm": 0.30403944849967957,
839
  "learning_rate": 9.090054815974943e-06,
840
  "epoch": 1.0848694316436251,
841
  "step": 2825
842
  },
843
  {
844
- "loss": 1.424,
845
- "grad_norm": 0.4571227431297302,
846
  "learning_rate": 9.080266249021143e-06,
847
  "epoch": 1.0944700460829493,
848
  "step": 2850
849
  },
850
  {
851
- "loss": 1.4244,
852
- "grad_norm": 0.3448657691478729,
853
  "learning_rate": 9.070477682067346e-06,
854
  "epoch": 1.1040706605222734,
855
  "step": 2875
856
  },
857
  {
858
- "loss": 1.4243,
859
- "grad_norm": 0.7213252186775208,
860
  "learning_rate": 9.060689115113547e-06,
861
  "epoch": 1.1136712749615976,
862
  "step": 2900
863
  },
864
  {
865
- "loss": 1.4234,
866
- "grad_norm": 0.3630754053592682,
867
  "learning_rate": 9.05090054815975e-06,
868
  "epoch": 1.1232718894009217,
869
  "step": 2925
870
  },
871
  {
872
- "loss": 1.4237,
873
- "grad_norm": 0.2607744038105011,
874
  "learning_rate": 9.041111981205952e-06,
875
  "epoch": 1.1328725038402458,
876
  "step": 2950
877
  },
878
  {
879
- "loss": 1.4255,
880
- "grad_norm": 1.6808569431304932,
881
  "learning_rate": 9.031323414252155e-06,
882
  "epoch": 1.14247311827957,
883
  "step": 2975
884
  },
885
  {
886
- "loss": 1.424,
887
- "grad_norm": 0.5143694281578064,
888
  "learning_rate": 9.021534847298356e-06,
889
  "epoch": 1.1520737327188941,
890
  "step": 3000
891
  },
892
  {
893
- "eval_loss": 1.426444411277771,
894
- "eval_wer": 0.004559616992172657,
895
- "eval_cer": 0.002296290185078239,
896
- "eval_runtime": 181.4246,
897
- "eval_samples_per_second": 9.37,
898
- "eval_steps_per_second": 0.149,
899
  "epoch": 1.1520737327188941,
900
  "step": 3000
901
  },
902
  {
903
- "loss": 1.4241,
904
- "grad_norm": 0.4142398238182068,
905
  "learning_rate": 9.01174628034456e-06,
906
  "epoch": 1.161674347158218,
907
  "step": 3025
908
  },
909
  {
910
- "loss": 1.4238,
911
- "grad_norm": 0.46893906593322754,
912
  "learning_rate": 9.00195771339076e-06,
913
  "epoch": 1.1712749615975422,
914
  "step": 3050
915
  },
916
  {
917
- "loss": 1.4233,
918
- "grad_norm": 0.2761977016925812,
919
  "learning_rate": 8.992169146436962e-06,
920
  "epoch": 1.1808755760368663,
921
  "step": 3075
922
  },
923
  {
924
- "loss": 1.4225,
925
- "grad_norm": 0.3853098154067993,
926
  "learning_rate": 8.982380579483163e-06,
927
  "epoch": 1.1904761904761905,
928
  "step": 3100
929
  },
930
  {
931
- "loss": 1.4228,
932
- "grad_norm": 0.23497086763381958,
933
  "learning_rate": 8.972592012529366e-06,
934
  "epoch": 1.2000768049155146,
935
  "step": 3125
936
  },
937
  {
938
- "loss": 1.4225,
939
- "grad_norm": 0.3737936019897461,
940
  "learning_rate": 8.962803445575568e-06,
941
  "epoch": 1.2096774193548387,
942
  "step": 3150
943
  },
944
  {
945
- "loss": 1.4241,
946
- "grad_norm": 0.7541052103042603,
947
  "learning_rate": 8.953014878621771e-06,
948
  "epoch": 1.2192780337941629,
949
  "step": 3175
950
  },
951
  {
952
- "loss": 1.4237,
953
- "grad_norm": 0.4566754102706909,
954
  "learning_rate": 8.943226311667972e-06,
955
  "epoch": 1.228878648233487,
956
  "step": 3200
957
  },
958
  {
959
- "loss": 1.423,
960
- "grad_norm": 0.31264716386795044,
961
  "learning_rate": 8.933437744714175e-06,
962
  "epoch": 1.238479262672811,
963
  "step": 3225
964
  },
965
  {
966
- "loss": 1.4226,
967
- "grad_norm": 0.38216665387153625,
968
  "learning_rate": 8.923649177760377e-06,
969
  "epoch": 1.248079877112135,
970
  "step": 3250
971
  },
972
  {
973
- "loss": 1.4222,
974
- "grad_norm": 0.2926384508609772,
975
  "learning_rate": 8.913860610806578e-06,
976
  "epoch": 1.2576804915514592,
977
  "step": 3275
978
  },
979
  {
980
- "loss": 1.4231,
981
- "grad_norm": 0.2229822278022766,
982
  "learning_rate": 8.904072043852781e-06,
983
  "epoch": 1.2672811059907834,
984
  "step": 3300
985
  },
986
  {
987
- "loss": 1.4234,
988
- "grad_norm": 0.3581763803958893,
989
  "learning_rate": 8.894283476898983e-06,
990
  "epoch": 1.2768817204301075,
991
  "step": 3325
992
  },
993
  {
994
- "loss": 1.4231,
995
- "grad_norm": 0.21746738255023956,
996
  "learning_rate": 8.884494909945186e-06,
997
  "epoch": 1.2864823348694316,
998
  "step": 3350
999
  },
1000
  {
1001
- "loss": 1.4233,
1002
- "grad_norm": 0.2930072844028473,
1003
  "learning_rate": 8.874706342991387e-06,
1004
  "epoch": 1.2960829493087558,
1005
  "step": 3375
1006
  },
1007
  {
1008
- "loss": 1.4241,
1009
- "grad_norm": 0.34126031398773193,
1010
  "learning_rate": 8.864917776037588e-06,
1011
  "epoch": 1.30568356374808,
1012
  "step": 3400
1013
  },
1014
  {
1015
- "loss": 1.4236,
1016
- "grad_norm": 0.6360740065574646,
1017
  "learning_rate": 8.85512920908379e-06,
1018
  "epoch": 1.315284178187404,
1019
  "step": 3425
1020
  },
1021
  {
1022
- "loss": 1.4225,
1023
- "grad_norm": 0.4061238467693329,
1024
  "learning_rate": 8.845340642129993e-06,
1025
  "epoch": 1.3248847926267282,
1026
  "step": 3450
1027
  },
1028
  {
1029
- "loss": 1.4227,
1030
- "grad_norm": 0.2641013264656067,
1031
  "learning_rate": 8.835552075176194e-06,
1032
  "epoch": 1.3344854070660523,
1033
  "step": 3475
1034
  },
1035
  {
1036
- "loss": 1.423,
1037
- "grad_norm": 0.49870121479034424,
1038
  "learning_rate": 8.825763508222397e-06,
1039
  "epoch": 1.3440860215053765,
1040
  "step": 3500
1041
  },
1042
  {
1043
- "eval_loss": 1.4249826669692993,
1044
- "eval_wer": 0.004559616992172657,
1045
- "eval_cer": 0.002351291147595083,
1046
- "eval_runtime": 180.6281,
1047
- "eval_samples_per_second": 9.412,
1048
- "eval_steps_per_second": 0.149,
1049
  "epoch": 1.3440860215053765,
1050
  "step": 3500
1051
  },
1052
  {
1053
- "train_runtime": 3219.4834,
1054
- "train_samples_per_second": 517.474,
1055
- "train_steps_per_second": 8.088,
1056
- "total_flos": 3.8181358092681216e+20,
1057
- "train_loss": 1.471987566266741,
1058
  "epoch": 1.3440860215053765,
1059
  "step": 3500
1060
  }
 
1
  [
2
  {
3
+ "loss": 3.3642,
4
+ "grad_norm": 27.656972885131836,
5
  "learning_rate": 4.800000000000001e-07,
6
  "epoch": 0.009600614439324117,
7
  "step": 25
8
  },
9
  {
10
+ "loss": 2.8525,
11
+ "grad_norm": 11.508564949035645,
12
  "learning_rate": 9.800000000000001e-07,
13
  "epoch": 0.019201228878648235,
14
  "step": 50
15
  },
16
  {
17
+ "loss": 2.3063,
18
+ "grad_norm": 8.78321361541748,
19
  "learning_rate": 1.48e-06,
20
  "epoch": 0.02880184331797235,
21
  "step": 75
22
  },
23
  {
24
+ "loss": 1.8411,
25
+ "grad_norm": 2.794045925140381,
26
  "learning_rate": 1.98e-06,
27
  "epoch": 0.03840245775729647,
28
  "step": 100
29
  },
30
  {
31
+ "loss": 1.6453,
32
+ "grad_norm": 2.6623477935791016,
33
  "learning_rate": 2.4800000000000004e-06,
34
  "epoch": 0.04800307219662058,
35
  "step": 125
36
  },
37
  {
38
+ "loss": 1.6035,
39
+ "grad_norm": 1.9311230182647705,
40
  "learning_rate": 2.9800000000000003e-06,
41
  "epoch": 0.0576036866359447,
42
  "step": 150
43
  },
44
  {
45
+ "loss": 1.5725,
46
+ "grad_norm": 1.5370234251022339,
47
  "learning_rate": 3.48e-06,
48
  "epoch": 0.06720430107526881,
49
  "step": 175
50
  },
51
  {
52
+ "loss": 1.5626,
53
+ "grad_norm": 2.0659360885620117,
54
  "learning_rate": 3.980000000000001e-06,
55
  "epoch": 0.07680491551459294,
56
  "step": 200
57
  },
58
  {
59
+ "loss": 1.5467,
60
+ "grad_norm": 2.592463254928589,
61
  "learning_rate": 4.48e-06,
62
  "epoch": 0.08640552995391705,
63
  "step": 225
64
  },
65
  {
66
+ "loss": 1.5359,
67
+ "grad_norm": 1.7290267944335938,
68
  "learning_rate": 4.980000000000001e-06,
69
  "epoch": 0.09600614439324116,
70
  "step": 250
71
  },
72
  {
73
+ "loss": 1.5246,
74
+ "grad_norm": 1.2997428178787231,
75
  "learning_rate": 5.480000000000001e-06,
76
  "epoch": 0.10560675883256529,
77
  "step": 275
78
  },
79
  {
80
+ "loss": 1.5175,
81
+ "grad_norm": 1.8068249225616455,
82
  "learning_rate": 5.98e-06,
83
  "epoch": 0.1152073732718894,
84
  "step": 300
85
  },
86
  {
87
+ "loss": 1.5085,
88
+ "grad_norm": 2.0698606967926025,
89
  "learning_rate": 6.480000000000001e-06,
90
  "epoch": 0.12480798771121351,
91
  "step": 325
92
  },
93
  {
94
+ "loss": 1.5069,
95
+ "grad_norm": 1.5639880895614624,
96
  "learning_rate": 6.98e-06,
97
  "epoch": 0.13440860215053763,
98
  "step": 350
99
  },
100
  {
101
+ "loss": 1.5035,
102
+ "grad_norm": 2.6863772869110107,
103
  "learning_rate": 7.48e-06,
104
  "epoch": 0.14400921658986174,
105
  "step": 375
106
  },
107
  {
108
+ "loss": 1.4968,
109
+ "grad_norm": 2.0388848781585693,
110
  "learning_rate": 7.980000000000002e-06,
111
  "epoch": 0.15360983102918588,
112
  "step": 400
113
  },
114
  {
115
+ "loss": 1.4922,
116
+ "grad_norm": 1.2673157453536987,
117
  "learning_rate": 8.48e-06,
118
  "epoch": 0.16321044546851,
119
  "step": 425
120
  },
121
  {
122
+ "loss": 1.4871,
123
+ "grad_norm": 1.3405632972717285,
124
  "learning_rate": 8.98e-06,
125
  "epoch": 0.1728110599078341,
126
  "step": 450
127
  },
128
  {
129
+ "loss": 1.4895,
130
+ "grad_norm": 1.068382978439331,
131
  "learning_rate": 9.48e-06,
132
  "epoch": 0.18241167434715821,
133
  "step": 475
134
  },
135
  {
136
+ "loss": 1.4854,
137
+ "grad_norm": 1.6716822385787964,
138
  "learning_rate": 9.980000000000001e-06,
139
  "epoch": 0.19201228878648233,
140
  "step": 500
141
  },
142
  {
143
+ "eval_loss": 1.481889247894287,
144
+ "eval_wer": 0.03017662777813987,
145
+ "eval_cer": 0.011480700751485703,
146
+ "eval_runtime": 603.2619,
147
+ "eval_samples_per_second": 2.816,
148
+ "eval_steps_per_second": 0.045,
149
  "epoch": 0.19201228878648233,
150
  "step": 500
151
  },
152
  {
153
+ "loss": 1.4798,
154
+ "grad_norm": 2.015155553817749,
155
  "learning_rate": 9.990602975724355e-06,
156
  "epoch": 0.20161290322580644,
157
  "step": 525
158
  },
159
  {
160
+ "loss": 1.4766,
161
+ "grad_norm": 1.353633999824524,
162
  "learning_rate": 9.980814408770558e-06,
163
  "epoch": 0.21121351766513058,
164
  "step": 550
165
  },
166
  {
167
+ "loss": 1.4719,
168
+ "grad_norm": 1.360000729560852,
169
  "learning_rate": 9.971025841816759e-06,
170
  "epoch": 0.2208141321044547,
171
  "step": 575
172
  },
173
  {
174
+ "loss": 1.4706,
175
+ "grad_norm": 0.8802452087402344,
176
  "learning_rate": 9.96123727486296e-06,
177
  "epoch": 0.2304147465437788,
178
  "step": 600
179
  },
180
  {
181
+ "loss": 1.4712,
182
+ "grad_norm": 1.2904715538024902,
183
  "learning_rate": 9.951448707909162e-06,
184
  "epoch": 0.24001536098310292,
185
  "step": 625
186
  },
187
  {
188
+ "loss": 1.4692,
189
+ "grad_norm": 1.1254265308380127,
190
  "learning_rate": 9.941660140955365e-06,
191
  "epoch": 0.24961597542242703,
192
  "step": 650
193
  },
194
  {
195
+ "loss": 1.4663,
196
+ "grad_norm": 1.4376908540725708,
197
  "learning_rate": 9.931871574001566e-06,
198
  "epoch": 0.25921658986175117,
199
  "step": 675
200
  },
201
  {
202
+ "loss": 1.4647,
203
+ "grad_norm": 0.7454094886779785,
204
  "learning_rate": 9.92208300704777e-06,
205
  "epoch": 0.26881720430107525,
206
  "step": 700
207
  },
208
  {
209
+ "loss": 1.4621,
210
+ "grad_norm": 1.176084280014038,
211
  "learning_rate": 9.91229444009397e-06,
212
  "epoch": 0.2784178187403994,
213
  "step": 725
214
  },
215
  {
216
+ "loss": 1.4617,
217
+ "grad_norm": 1.0089287757873535,
218
  "learning_rate": 9.902505873140174e-06,
219
  "epoch": 0.2880184331797235,
220
  "step": 750
221
  },
222
  {
223
+ "loss": 1.4586,
224
+ "grad_norm": 1.1119129657745361,
225
  "learning_rate": 9.892717306186375e-06,
226
  "epoch": 0.2976190476190476,
227
  "step": 775
228
  },
229
  {
230
+ "loss": 1.4633,
231
+ "grad_norm": 0.7650630474090576,
232
  "learning_rate": 9.882928739232577e-06,
233
  "epoch": 0.30721966205837176,
234
  "step": 800
235
  },
236
  {
237
+ "loss": 1.4617,
238
+ "grad_norm": 0.9932444095611572,
239
  "learning_rate": 9.87314017227878e-06,
240
  "epoch": 0.31682027649769584,
241
  "step": 825
242
  },
243
  {
244
+ "loss": 1.4573,
245
+ "grad_norm": 0.85480135679245,
246
  "learning_rate": 9.863351605324981e-06,
247
  "epoch": 0.32642089093702,
248
  "step": 850
249
  },
250
  {
251
+ "loss": 1.4536,
252
+ "grad_norm": 1.326219081878662,
253
  "learning_rate": 9.853563038371182e-06,
254
  "epoch": 0.33602150537634407,
255
  "step": 875
256
  },
257
  {
258
+ "loss": 1.4558,
259
+ "grad_norm": 0.8132762908935547,
260
  "learning_rate": 9.843774471417386e-06,
261
  "epoch": 0.3456221198156682,
262
  "step": 900
263
  },
264
  {
265
+ "loss": 1.4533,
266
+ "grad_norm": 1.0124117136001587,
267
  "learning_rate": 9.833985904463587e-06,
268
  "epoch": 0.35522273425499235,
269
  "step": 925
270
  },
271
  {
272
+ "loss": 1.4516,
273
+ "grad_norm": 0.9896714091300964,
274
  "learning_rate": 9.82419733750979e-06,
275
  "epoch": 0.36482334869431643,
276
  "step": 950
277
  },
278
  {
279
+ "loss": 1.4514,
280
+ "grad_norm": 1.136326789855957,
281
  "learning_rate": 9.814408770555991e-06,
282
  "epoch": 0.37442396313364057,
283
  "step": 975
284
  },
285
  {
286
+ "loss": 1.4518,
287
+ "grad_norm": 0.647108793258667,
288
  "learning_rate": 9.804620203602193e-06,
289
  "epoch": 0.38402457757296465,
290
  "step": 1000
291
  },
292
  {
293
+ "eval_loss": 1.4519319534301758,
294
+ "eval_wer": 0.017727954115883463,
295
+ "eval_cer": 0.006859837793418843,
296
+ "eval_runtime": 602.9922,
297
+ "eval_samples_per_second": 2.818,
298
+ "eval_steps_per_second": 0.045,
299
  "epoch": 0.38402457757296465,
300
  "step": 1000
301
  },
302
  {
303
+ "loss": 1.4497,
304
+ "grad_norm": 1.0084048509597778,
305
  "learning_rate": 9.794831636648396e-06,
306
  "epoch": 0.3936251920122888,
307
  "step": 1025
308
  },
309
  {
310
+ "loss": 1.4504,
311
+ "grad_norm": 0.9754135012626648,
312
  "learning_rate": 9.785043069694597e-06,
313
  "epoch": 0.4032258064516129,
314
  "step": 1050
315
  },
316
  {
317
+ "loss": 1.4507,
318
+ "grad_norm": 0.6452074646949768,
319
  "learning_rate": 9.7752545027408e-06,
320
  "epoch": 0.412826420890937,
321
  "step": 1075
322
  },
323
  {
324
+ "loss": 1.4488,
325
+ "grad_norm": 0.8794124126434326,
326
  "learning_rate": 9.765465935787002e-06,
327
  "epoch": 0.42242703533026116,
328
  "step": 1100
329
  },
330
  {
331
+ "loss": 1.4472,
332
+ "grad_norm": 0.8075922727584839,
333
  "learning_rate": 9.755677368833205e-06,
334
  "epoch": 0.43202764976958524,
335
  "step": 1125
336
  },
337
  {
338
+ "loss": 1.4488,
339
+ "grad_norm": 0.9166923761367798,
340
  "learning_rate": 9.745888801879405e-06,
341
  "epoch": 0.4416282642089094,
342
  "step": 1150
343
  },
344
  {
345
+ "loss": 1.4479,
346
+ "grad_norm": 0.8185281753540039,
347
  "learning_rate": 9.736100234925608e-06,
348
  "epoch": 0.45122887864823347,
349
  "step": 1175
350
  },
351
  {
352
+ "loss": 1.4468,
353
+ "grad_norm": 0.8979498147964478,
354
  "learning_rate": 9.726311667971809e-06,
355
  "epoch": 0.4608294930875576,
356
  "step": 1200
357
  },
358
  {
359
+ "loss": 1.4457,
360
+ "grad_norm": 0.8547759056091309,
361
  "learning_rate": 9.716523101018012e-06,
362
  "epoch": 0.47043010752688175,
363
  "step": 1225
364
  },
365
  {
366
+ "loss": 1.4483,
367
+ "grad_norm": 0.816839873790741,
368
  "learning_rate": 9.706734534064213e-06,
369
  "epoch": 0.48003072196620583,
370
  "step": 1250
371
  },
372
  {
373
+ "loss": 1.4457,
374
+ "grad_norm": 1.0802863836288452,
375
  "learning_rate": 9.696945967110417e-06,
376
  "epoch": 0.48963133640552997,
377
  "step": 1275
378
  },
379
  {
380
+ "loss": 1.4455,
381
+ "grad_norm": 0.9443736672401428,
382
  "learning_rate": 9.687157400156618e-06,
383
  "epoch": 0.49923195084485406,
384
  "step": 1300
385
  },
386
  {
387
+ "loss": 1.4455,
388
+ "grad_norm": 0.9043710827827454,
389
  "learning_rate": 9.677368833202821e-06,
390
  "epoch": 0.5088325652841782,
391
  "step": 1325
392
  },
393
  {
394
+ "loss": 1.4434,
395
+ "grad_norm": 0.3099987208843231,
396
  "learning_rate": 9.66758026624902e-06,
397
  "epoch": 0.5184331797235023,
398
  "step": 1350
399
  },
400
  {
401
+ "loss": 1.4432,
402
+ "grad_norm": 1.402285099029541,
403
  "learning_rate": 9.657791699295224e-06,
404
  "epoch": 0.5280337941628265,
405
  "step": 1375
406
  },
407
  {
408
+ "loss": 1.4446,
409
+ "grad_norm": 0.7573044896125793,
410
  "learning_rate": 9.648003132341425e-06,
411
  "epoch": 0.5376344086021505,
412
  "step": 1400
413
  },
414
  {
415
+ "loss": 1.4414,
416
+ "grad_norm": 1.2929362058639526,
417
  "learning_rate": 9.638214565387628e-06,
418
  "epoch": 0.5472350230414746,
419
  "step": 1425
420
  },
421
  {
422
+ "loss": 1.4392,
423
+ "grad_norm": 0.8310167193412781,
424
  "learning_rate": 9.62842599843383e-06,
425
  "epoch": 0.5568356374807988,
426
  "step": 1450
427
  },
428
  {
429
+ "loss": 1.4426,
430
+ "grad_norm": 0.710106611251831,
431
  "learning_rate": 9.618637431480033e-06,
432
  "epoch": 0.5664362519201229,
433
  "step": 1475
434
  },
435
  {
436
+ "loss": 1.4434,
437
+ "grad_norm": 1.0039565563201904,
438
  "learning_rate": 9.608848864526234e-06,
439
  "epoch": 0.576036866359447,
440
  "step": 1500
441
  },
442
  {
443
+ "eval_loss": 1.4412317276000977,
444
+ "eval_wer": 0.014925373134328358,
445
+ "eval_cer": 0.00557361820715281,
446
+ "eval_runtime": 604.8563,
447
+ "eval_samples_per_second": 2.809,
448
+ "eval_steps_per_second": 0.045,
449
  "epoch": 0.576036866359447,
450
  "step": 1500
451
  },
452
  {
453
+ "loss": 1.4416,
454
+ "grad_norm": 0.5386250019073486,
455
  "learning_rate": 9.599060297572435e-06,
456
  "epoch": 0.5856374807987711,
457
  "step": 1525
458
  },
459
  {
460
+ "loss": 1.4405,
461
+ "grad_norm": 0.6138635277748108,
462
  "learning_rate": 9.589271730618639e-06,
463
  "epoch": 0.5952380952380952,
464
  "step": 1550
465
  },
466
  {
467
+ "loss": 1.4401,
468
+ "grad_norm": 0.6281514167785645,
469
  "learning_rate": 9.57948316366484e-06,
470
  "epoch": 0.6048387096774194,
471
  "step": 1575
472
  },
473
  {
474
+ "loss": 1.4399,
475
+ "grad_norm": 0.7416335344314575,
476
  "learning_rate": 9.569694596711043e-06,
477
  "epoch": 0.6144393241167435,
478
  "step": 1600
479
  },
480
  {
481
+ "loss": 1.4389,
482
+ "grad_norm": 0.6534410119056702,
483
  "learning_rate": 9.559906029757244e-06,
484
  "epoch": 0.6240399385560675,
485
  "step": 1625
486
  },
487
  {
488
+ "loss": 1.4421,
489
+ "grad_norm": 0.7885088920593262,
490
  "learning_rate": 9.550117462803446e-06,
491
  "epoch": 0.6336405529953917,
492
  "step": 1650
493
  },
494
  {
495
+ "loss": 1.4392,
496
+ "grad_norm": 0.5605193972587585,
497
  "learning_rate": 9.540328895849649e-06,
498
  "epoch": 0.6432411674347158,
499
  "step": 1675
500
  },
501
  {
502
+ "loss": 1.4374,
503
+ "grad_norm": 0.5420004725456238,
504
  "learning_rate": 9.53054032889585e-06,
505
  "epoch": 0.65284178187404,
506
  "step": 1700
507
  },
508
  {
509
+ "loss": 1.4398,
510
+ "grad_norm": 0.5770916938781738,
511
  "learning_rate": 9.520751761942052e-06,
512
  "epoch": 0.6624423963133641,
513
  "step": 1725
514
  },
515
  {
516
+ "loss": 1.4394,
517
+ "grad_norm": 0.6007382869720459,
518
  "learning_rate": 9.510963194988255e-06,
519
  "epoch": 0.6720430107526881,
520
  "step": 1750
521
  },
522
  {
523
+ "loss": 1.4386,
524
+ "grad_norm": 0.6682799458503723,
525
  "learning_rate": 9.501174628034456e-06,
526
  "epoch": 0.6816436251920123,
527
  "step": 1775
528
  },
529
  {
530
+ "loss": 1.4387,
531
+ "grad_norm": 0.5475255846977234,
532
  "learning_rate": 9.49138606108066e-06,
533
  "epoch": 0.6912442396313364,
534
  "step": 1800
535
  },
536
  {
537
+ "loss": 1.4379,
538
+ "grad_norm": 0.8577839136123657,
539
  "learning_rate": 9.48159749412686e-06,
540
  "epoch": 0.7008448540706606,
541
  "step": 1825
542
  },
543
  {
544
+ "loss": 1.4383,
545
+ "grad_norm": 0.5306459069252014,
546
  "learning_rate": 9.471808927173064e-06,
547
  "epoch": 0.7104454685099847,
548
  "step": 1850
549
  },
550
  {
551
+ "loss": 1.438,
552
+ "grad_norm": 0.7568140625953674,
553
  "learning_rate": 9.462020360219265e-06,
554
  "epoch": 0.7200460829493087,
555
  "step": 1875
556
  },
557
  {
558
+ "loss": 1.4376,
559
+ "grad_norm": 0.561225414276123,
560
  "learning_rate": 9.452231793265466e-06,
561
  "epoch": 0.7296466973886329,
562
  "step": 1900
563
  },
564
  {
565
+ "loss": 1.4367,
566
+ "grad_norm": 0.8214055299758911,
567
  "learning_rate": 9.442443226311668e-06,
568
  "epoch": 0.739247311827957,
569
  "step": 1925
570
  },
571
  {
572
+ "loss": 1.4355,
573
+ "grad_norm": 0.40683963894844055,
574
  "learning_rate": 9.432654659357871e-06,
575
  "epoch": 0.7488479262672811,
576
  "step": 1950
577
  },
578
  {
579
+ "loss": 1.4364,
580
+ "grad_norm": 0.7378409504890442,
581
  "learning_rate": 9.422866092404072e-06,
582
  "epoch": 0.7584485407066052,
583
  "step": 1975
584
  },
585
  {
586
+ "loss": 1.4358,
587
+ "grad_norm": 0.4747765064239502,
588
  "learning_rate": 9.413077525450275e-06,
589
  "epoch": 0.7680491551459293,
590
  "step": 2000
591
  },
592
  {
593
+ "eval_loss": 1.4366859197616577,
594
+ "eval_wer": 0.012122792152773251,
595
+ "eval_cer": 0.004870961210951922,
596
+ "eval_runtime": 605.1852,
597
+ "eval_samples_per_second": 2.807,
598
+ "eval_steps_per_second": 0.045,
599
  "epoch": 0.7680491551459293,
600
  "step": 2000
601
  },
602
  {
603
+ "loss": 1.4361,
604
+ "grad_norm": 0.9131174087524414,
605
  "learning_rate": 9.403288958496477e-06,
606
  "epoch": 0.7776497695852534,
607
  "step": 2025
608
  },
609
  {
610
+ "loss": 1.4359,
611
+ "grad_norm": 0.6853222846984863,
612
  "learning_rate": 9.39350039154268e-06,
613
  "epoch": 0.7872503840245776,
614
  "step": 2050
615
  },
616
  {
617
+ "loss": 1.4356,
618
+ "grad_norm": 0.43234285712242126,
619
  "learning_rate": 9.383711824588881e-06,
620
  "epoch": 0.7968509984639017,
621
  "step": 2075
622
  },
623
  {
624
+ "loss": 1.4363,
625
+ "grad_norm": 0.3734281361103058,
626
  "learning_rate": 9.373923257635083e-06,
627
  "epoch": 0.8064516129032258,
628
  "step": 2100
629
  },
630
  {
631
+ "loss": 1.4363,
632
+ "grad_norm": 0.689392626285553,
633
  "learning_rate": 9.364134690681284e-06,
634
  "epoch": 0.8160522273425499,
635
  "step": 2125
636
  },
637
  {
638
+ "loss": 1.4359,
639
+ "grad_norm": 0.5757440328598022,
640
  "learning_rate": 9.354346123727487e-06,
641
  "epoch": 0.825652841781874,
642
  "step": 2150
643
  },
644
  {
645
+ "loss": 1.4345,
646
+ "grad_norm": 0.7384234666824341,
647
  "learning_rate": 9.344557556773688e-06,
648
  "epoch": 0.8352534562211982,
649
  "step": 2175
650
  },
651
  {
652
+ "loss": 1.4356,
653
+ "grad_norm": 0.9221552014350891,
654
  "learning_rate": 9.334768989819891e-06,
655
  "epoch": 0.8448540706605223,
656
  "step": 2200
657
  },
658
  {
659
+ "loss": 1.4359,
660
+ "grad_norm": 0.8016390800476074,
661
  "learning_rate": 9.324980422866093e-06,
662
  "epoch": 0.8544546850998463,
663
  "step": 2225
664
  },
665
  {
666
+ "loss": 1.4339,
667
+ "grad_norm": 0.4008951783180237,
668
  "learning_rate": 9.315191855912296e-06,
669
  "epoch": 0.8640552995391705,
670
  "step": 2250
671
  },
672
  {
673
+ "loss": 1.4341,
674
+ "grad_norm": 1.4997563362121582,
675
  "learning_rate": 9.305403288958497e-06,
676
  "epoch": 0.8736559139784946,
677
  "step": 2275
678
  },
679
  {
680
+ "loss": 1.4335,
681
+ "grad_norm": 0.7560231685638428,
682
  "learning_rate": 9.295614722004699e-06,
683
  "epoch": 0.8832565284178188,
684
  "step": 2300
685
  },
686
  {
687
+ "loss": 1.4342,
688
+ "grad_norm": 1.0922359228134155,
689
  "learning_rate": 9.285826155050902e-06,
690
  "epoch": 0.8928571428571429,
691
  "step": 2325
692
  },
693
  {
694
+ "loss": 1.4328,
695
+ "grad_norm": 0.3814384639263153,
696
  "learning_rate": 9.276037588097103e-06,
697
  "epoch": 0.9024577572964669,
698
  "step": 2350
699
  },
700
  {
701
+ "loss": 1.4336,
702
+ "grad_norm": 0.4015847444534302,
703
  "learning_rate": 9.266249021143305e-06,
704
  "epoch": 0.9120583717357911,
705
  "step": 2375
706
  },
707
  {
708
+ "loss": 1.4328,
709
+ "grad_norm": 0.6820506453514099,
710
  "learning_rate": 9.256460454189508e-06,
711
  "epoch": 0.9216589861751152,
712
  "step": 2400
713
  },
714
  {
715
+ "loss": 1.433,
716
+ "grad_norm": 0.7453433871269226,
717
  "learning_rate": 9.246671887235709e-06,
718
  "epoch": 0.9312596006144394,
719
  "step": 2425
720
  },
721
  {
722
+ "loss": 1.4335,
723
+ "grad_norm": 0.4594730734825134,
724
  "learning_rate": 9.236883320281912e-06,
725
  "epoch": 0.9408602150537635,
726
  "step": 2450
727
  },
728
  {
729
+ "loss": 1.432,
730
+ "grad_norm": 0.7341485023498535,
731
  "learning_rate": 9.227094753328114e-06,
732
  "epoch": 0.9504608294930875,
733
  "step": 2475
734
  },
735
  {
736
+ "loss": 1.4333,
737
+ "grad_norm": 0.41172826290130615,
738
  "learning_rate": 9.217306186374315e-06,
739
  "epoch": 0.9600614439324117,
740
  "step": 2500
741
  },
742
  {
743
+ "eval_loss": 1.4331741333007812,
744
+ "eval_wer": 0.012774555171739555,
745
+ "eval_cer": 0.005121059463836983,
746
+ "eval_runtime": 606.0707,
747
+ "eval_samples_per_second": 2.803,
748
+ "eval_steps_per_second": 0.045,
749
  "epoch": 0.9600614439324117,
750
  "step": 2500
751
  },
752
  {
753
+ "loss": 1.4319,
754
+ "grad_norm": 0.35165274143218994,
755
  "learning_rate": 9.207517619420518e-06,
756
  "epoch": 0.9696620583717358,
757
  "step": 2525
758
  },
759
  {
760
+ "loss": 1.4316,
761
+ "grad_norm": 0.5128066539764404,
762
  "learning_rate": 9.19772905246672e-06,
763
  "epoch": 0.9792626728110599,
764
  "step": 2550
765
  },
766
  {
767
+ "loss": 1.4323,
768
+ "grad_norm": 0.4359879195690155,
769
  "learning_rate": 9.187940485512922e-06,
770
  "epoch": 0.988863287250384,
771
  "step": 2575
772
  },
773
  {
774
+ "loss": 1.4326,
775
+ "grad_norm": 0.6791874766349792,
776
  "learning_rate": 9.178151918559124e-06,
777
  "epoch": 0.9984639016897081,
778
  "step": 2600
779
  },
780
  {
781
+ "loss": 1.4317,
782
+ "grad_norm": 0.5291798114776611,
783
  "learning_rate": 9.168363351605327e-06,
784
  "epoch": 1.0080645161290323,
785
  "step": 2625
786
  },
787
  {
788
+ "loss": 1.4306,
789
+ "grad_norm": 0.5678306221961975,
790
  "learning_rate": 9.158574784651528e-06,
791
  "epoch": 1.0176651305683564,
792
  "step": 2650
793
  },
794
  {
795
+ "loss": 1.4289,
796
+ "grad_norm": 0.31047800183296204,
797
  "learning_rate": 9.14878621769773e-06,
798
  "epoch": 1.0272657450076805,
799
  "step": 2675
800
  },
801
  {
802
+ "loss": 1.4279,
803
+ "grad_norm": 0.37818169593811035,
804
  "learning_rate": 9.138997650743931e-06,
805
  "epoch": 1.0368663594470047,
806
  "step": 2700
807
  },
808
  {
809
+ "loss": 1.4283,
810
+ "grad_norm": 0.33200085163116455,
811
  "learning_rate": 9.129209083790134e-06,
812
  "epoch": 1.0464669738863288,
813
  "step": 2725
814
  },
815
  {
816
+ "loss": 1.4286,
817
+ "grad_norm": 0.36598828434944153,
818
  "learning_rate": 9.119420516836336e-06,
819
  "epoch": 1.0560675883256527,
820
  "step": 2750
821
  },
822
  {
823
+ "loss": 1.4285,
824
+ "grad_norm": 0.3898240923881531,
825
  "learning_rate": 9.109631949882539e-06,
826
  "epoch": 1.0656682027649769,
827
  "step": 2775
828
  },
829
  {
830
+ "loss": 1.4283,
831
+ "grad_norm": 0.3763328790664673,
832
  "learning_rate": 9.09984338292874e-06,
833
  "epoch": 1.075268817204301,
834
  "step": 2800
835
  },
836
  {
837
+ "loss": 1.427,
838
+ "grad_norm": 0.5297687649726868,
839
  "learning_rate": 9.090054815974943e-06,
840
  "epoch": 1.0848694316436251,
841
  "step": 2825
842
  },
843
  {
844
+ "loss": 1.4291,
845
+ "grad_norm": 0.5956099629402161,
846
  "learning_rate": 9.080266249021143e-06,
847
  "epoch": 1.0944700460829493,
848
  "step": 2850
849
  },
850
  {
851
+ "loss": 1.4277,
852
+ "grad_norm": 0.3517364263534546,
853
  "learning_rate": 9.070477682067346e-06,
854
  "epoch": 1.1040706605222734,
855
  "step": 2875
856
  },
857
  {
858
+ "loss": 1.428,
859
+ "grad_norm": 0.47178784012794495,
860
  "learning_rate": 9.060689115113547e-06,
861
  "epoch": 1.1136712749615976,
862
  "step": 2900
863
  },
864
  {
865
+ "loss": 1.428,
866
+ "grad_norm": 0.4502784013748169,
867
  "learning_rate": 9.05090054815975e-06,
868
  "epoch": 1.1232718894009217,
869
  "step": 2925
870
  },
871
  {
872
+ "loss": 1.428,
873
+ "grad_norm": 0.32381555438041687,
874
  "learning_rate": 9.041111981205952e-06,
875
  "epoch": 1.1328725038402458,
876
  "step": 2950
877
  },
878
  {
879
+ "loss": 1.4278,
880
+ "grad_norm": 0.4513380527496338,
881
  "learning_rate": 9.031323414252155e-06,
882
  "epoch": 1.14247311827957,
883
  "step": 2975
884
  },
885
  {
886
+ "loss": 1.4284,
887
+ "grad_norm": 0.2871541976928711,
888
  "learning_rate": 9.021534847298356e-06,
889
  "epoch": 1.1520737327188941,
890
  "step": 3000
891
  },
892
  {
893
+ "eval_loss": 1.430253505706787,
894
+ "eval_wer": 0.012383497360359774,
895
+ "eval_cer": 0.004775685686043326,
896
+ "eval_runtime": 605.0894,
897
+ "eval_samples_per_second": 2.808,
898
+ "eval_steps_per_second": 0.045,
899
  "epoch": 1.1520737327188941,
900
  "step": 3000
901
  },
902
  {
903
+ "loss": 1.4282,
904
+ "grad_norm": 0.3893296718597412,
905
  "learning_rate": 9.01174628034456e-06,
906
  "epoch": 1.161674347158218,
907
  "step": 3025
908
  },
909
  {
910
+ "loss": 1.4281,
911
+ "grad_norm": 0.3037892282009125,
912
  "learning_rate": 9.00195771339076e-06,
913
  "epoch": 1.1712749615975422,
914
  "step": 3050
915
  },
916
  {
917
+ "loss": 1.4277,
918
+ "grad_norm": 0.5750548243522644,
919
  "learning_rate": 8.992169146436962e-06,
920
  "epoch": 1.1808755760368663,
921
  "step": 3075
922
  },
923
  {
924
+ "loss": 1.428,
925
+ "grad_norm": 0.6014561653137207,
926
  "learning_rate": 8.982380579483163e-06,
927
  "epoch": 1.1904761904761905,
928
  "step": 3100
929
  },
930
  {
931
+ "loss": 1.4278,
932
+ "grad_norm": 0.42518579959869385,
933
  "learning_rate": 8.972592012529366e-06,
934
  "epoch": 1.2000768049155146,
935
  "step": 3125
936
  },
937
  {
938
+ "loss": 1.4268,
939
+ "grad_norm": 0.3365946412086487,
940
  "learning_rate": 8.962803445575568e-06,
941
  "epoch": 1.2096774193548387,
942
  "step": 3150
943
  },
944
  {
945
+ "loss": 1.4279,
946
+ "grad_norm": 0.7084966897964478,
947
  "learning_rate": 8.953014878621771e-06,
948
  "epoch": 1.2192780337941629,
949
  "step": 3175
950
  },
951
  {
952
+ "loss": 1.4273,
953
+ "grad_norm": 0.4307415783405304,
954
  "learning_rate": 8.943226311667972e-06,
955
  "epoch": 1.228878648233487,
956
  "step": 3200
957
  },
958
  {
959
+ "loss": 1.4279,
960
+ "grad_norm": 0.40840864181518555,
961
  "learning_rate": 8.933437744714175e-06,
962
  "epoch": 1.238479262672811,
963
  "step": 3225
964
  },
965
  {
966
+ "loss": 1.4275,
967
+ "grad_norm": 0.34653913974761963,
968
  "learning_rate": 8.923649177760377e-06,
969
  "epoch": 1.248079877112135,
970
  "step": 3250
971
  },
972
  {
973
+ "loss": 1.427,
974
+ "grad_norm": 0.5619207620620728,
975
  "learning_rate": 8.913860610806578e-06,
976
  "epoch": 1.2576804915514592,
977
  "step": 3275
978
  },
979
  {
980
+ "loss": 1.4278,
981
+ "grad_norm": 0.9659692049026489,
982
  "learning_rate": 8.904072043852781e-06,
983
  "epoch": 1.2672811059907834,
984
  "step": 3300
985
  },
986
  {
987
+ "loss": 1.4271,
988
+ "grad_norm": 0.5404800176620483,
989
  "learning_rate": 8.894283476898983e-06,
990
  "epoch": 1.2768817204301075,
991
  "step": 3325
992
  },
993
  {
994
+ "loss": 1.4277,
995
+ "grad_norm": 0.5914152264595032,
996
  "learning_rate": 8.884494909945186e-06,
997
  "epoch": 1.2864823348694316,
998
  "step": 3350
999
  },
1000
  {
1001
+ "loss": 1.4291,
1002
+ "grad_norm": 0.6599079966545105,
1003
  "learning_rate": 8.874706342991387e-06,
1004
  "epoch": 1.2960829493087558,
1005
  "step": 3375
1006
  },
1007
  {
1008
+ "loss": 1.4268,
1009
+ "grad_norm": 0.3193410336971283,
1010
  "learning_rate": 8.864917776037588e-06,
1011
  "epoch": 1.30568356374808,
1012
  "step": 3400
1013
  },
1014
  {
1015
+ "loss": 1.4267,
1016
+ "grad_norm": 0.4547780454158783,
1017
  "learning_rate": 8.85512920908379e-06,
1018
  "epoch": 1.315284178187404,
1019
  "step": 3425
1020
  },
1021
  {
1022
+ "loss": 1.4263,
1023
+ "grad_norm": 0.3442824184894562,
1024
  "learning_rate": 8.845340642129993e-06,
1025
  "epoch": 1.3248847926267282,
1026
  "step": 3450
1027
  },
1028
  {
1029
+ "loss": 1.4266,
1030
+ "grad_norm": 0.41667816042900085,
1031
  "learning_rate": 8.835552075176194e-06,
1032
  "epoch": 1.3344854070660523,
1033
  "step": 3475
1034
  },
1035
  {
1036
+ "loss": 1.4278,
1037
+ "grad_norm": 0.35117366909980774,
1038
  "learning_rate": 8.825763508222397e-06,
1039
  "epoch": 1.3440860215053765,
1040
  "step": 3500
1041
  },
1042
  {
1043
+ "eval_loss": 1.428139328956604,
1044
+ "eval_wer": 0.01016750309587434,
1045
+ "eval_cer": 0.004275489180273203,
1046
+ "eval_runtime": 604.2764,
1047
+ "eval_samples_per_second": 2.812,
1048
+ "eval_steps_per_second": 0.045,
1049
  "epoch": 1.3440860215053765,
1050
  "step": 3500
1051
  },
1052
  {
1053
+ "train_runtime": 17120.6339,
1054
+ "train_samples_per_second": 97.309,
1055
+ "train_steps_per_second": 1.521,
1056
+ "total_flos": 3.818118759756595e+20,
1057
+ "train_loss": 1.482932498386928,
1058
  "epoch": 1.3440860215053765,
1059
  "step": 3500
1060
  }
vocab.json CHANGED
The diff for this file is too large to render. See raw diff