cloudwalkerw commited on
Commit
38ffa63
·
1 Parent(s): 58032f9

End of training

Browse files
Files changed (5) hide show
  1. README.md +3 -2
  2. all_results.json +9 -9
  3. eval_results.json +5 -5
  4. train_results.json +5 -5
  5. trainer_state.json +82 -547
README.md CHANGED
@@ -1,6 +1,7 @@
1
  ---
2
  base_model: microsoft/wavlm-base
3
  tags:
 
4
  - generated_from_trainer
5
  metrics:
6
  - accuracy
@@ -16,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [microsoft/wavlm-base](https://huggingface.co/microsoft/wavlm-base) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.7129
20
- - Accuracy: 0.1026
21
 
22
  ## Model description
23
 
 
1
  ---
2
  base_model: microsoft/wavlm-base
3
  tags:
4
+ - audio-classification
5
  - generated_from_trainer
6
  metrics:
7
  - accuracy
 
17
 
18
  This model is a fine-tuned version of [microsoft/wavlm-base](https://huggingface.co/microsoft/wavlm-base) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.6534
21
+ - Accuracy: 0.8974
22
 
23
  ## Model description
24
 
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 9.98,
3
  "eval_accuracy": 0.8974400257607471,
4
- "eval_loss": 0.3295176029205322,
5
- "eval_runtime": 499.4668,
6
- "eval_samples_per_second": 49.741,
7
- "eval_steps_per_second": 24.871,
8
- "train_loss": 0.3321091743430706,
9
- "train_runtime": 22869.426,
10
- "train_samples_per_second": 11.098,
11
- "train_steps_per_second": 0.173
12
  }
 
1
  {
2
+ "epoch": 9.91,
3
  "eval_accuracy": 0.8974400257607471,
4
+ "eval_loss": 0.6533961296081543,
5
+ "eval_runtime": 509.5539,
6
+ "eval_samples_per_second": 48.756,
7
+ "eval_steps_per_second": 24.378,
8
+ "train_loss": 0.4679390847659576,
9
+ "train_runtime": 4727.8199,
10
+ "train_samples_per_second": 10.914,
11
+ "train_steps_per_second": 0.169
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.98,
3
  "eval_accuracy": 0.8974400257607471,
4
- "eval_loss": 0.3295176029205322,
5
- "eval_runtime": 499.4668,
6
- "eval_samples_per_second": 49.741,
7
- "eval_steps_per_second": 24.871
8
  }
 
1
  {
2
+ "epoch": 9.91,
3
  "eval_accuracy": 0.8974400257607471,
4
+ "eval_loss": 0.6533961296081543,
5
+ "eval_runtime": 509.5539,
6
+ "eval_samples_per_second": 48.756,
7
+ "eval_steps_per_second": 24.378
8
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 9.98,
3
- "train_loss": 0.3321091743430706,
4
- "train_runtime": 22869.426,
5
- "train_samples_per_second": 11.098,
6
- "train_steps_per_second": 0.173
7
  }
 
1
  {
2
+ "epoch": 9.91,
3
+ "train_loss": 0.4679390847659576,
4
+ "train_runtime": 4727.8199,
5
+ "train_samples_per_second": 10.914,
6
+ "train_steps_per_second": 0.169
7
  }
trainer_state.json CHANGED
@@ -1,613 +1,148 @@
1
  {
2
  "best_metric": 0.8974400257607471,
3
- "best_model_checkpoint": "/home/cloudwalker/ASVmodel/wavlm-base_3/checkpoint-100",
4
- "epoch": 9.98109640831758,
5
  "eval_steps": 100,
6
- "global_step": 3960,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.25,
13
- "learning_rate": 7.575757575757576e-05,
14
- "loss": 0.4161,
15
  "step": 100
16
  },
17
  {
18
- "epoch": 0.25,
19
- "eval_accuracy": 0.8974400257607471,
20
- "eval_loss": 0.3295176029205322,
21
- "eval_runtime": 497.7495,
22
- "eval_samples_per_second": 49.913,
23
- "eval_steps_per_second": 24.956,
24
  "step": 100
25
  },
26
  {
27
- "epoch": 0.5,
28
- "learning_rate": 0.00015151515151515152,
29
- "loss": 0.3196,
30
  "step": 200
31
  },
32
  {
33
- "epoch": 0.5,
34
- "eval_accuracy": 0.8974400257607471,
35
- "eval_loss": 0.3312481641769409,
36
- "eval_runtime": 499.0539,
37
- "eval_samples_per_second": 49.782,
38
- "eval_steps_per_second": 24.891,
39
  "step": 200
40
  },
41
  {
42
- "epoch": 0.76,
43
- "learning_rate": 0.00022727272727272725,
44
- "loss": 0.3391,
45
  "step": 300
46
  },
47
  {
48
- "epoch": 0.76,
49
- "eval_accuracy": 0.8974400257607471,
50
- "eval_loss": 0.3353486657142639,
51
- "eval_runtime": 499.0444,
52
- "eval_samples_per_second": 49.783,
53
- "eval_steps_per_second": 24.892,
54
  "step": 300
55
  },
56
  {
57
- "epoch": 1.01,
58
- "learning_rate": 0.00029966329966329963,
59
- "loss": 0.3285,
60
  "step": 400
61
  },
62
  {
63
- "epoch": 1.01,
64
- "eval_accuracy": 0.8974400257607471,
65
- "eval_loss": 0.3321685791015625,
66
- "eval_runtime": 499.1271,
67
- "eval_samples_per_second": 49.775,
68
- "eval_steps_per_second": 24.887,
69
  "step": 400
70
  },
71
  {
72
- "epoch": 1.26,
73
- "learning_rate": 0.00029124579124579125,
74
- "loss": 0.3354,
75
  "step": 500
76
  },
77
  {
78
- "epoch": 1.26,
79
  "eval_accuracy": 0.8974400257607471,
80
- "eval_loss": 0.3366284668445587,
81
- "eval_runtime": 499.5555,
82
- "eval_samples_per_second": 49.732,
83
- "eval_steps_per_second": 24.866,
84
  "step": 500
85
  },
86
  {
87
- "epoch": 1.51,
88
- "learning_rate": 0.0002828282828282828,
89
- "loss": 0.3344,
90
  "step": 600
91
  },
92
  {
93
- "epoch": 1.51,
94
- "eval_accuracy": 0.8974400257607471,
95
- "eval_loss": 0.3315127491950989,
96
- "eval_runtime": 499.6088,
97
- "eval_samples_per_second": 49.727,
98
- "eval_steps_per_second": 24.863,
99
  "step": 600
100
  },
101
  {
102
- "epoch": 1.76,
103
- "learning_rate": 0.0002744107744107744,
104
- "loss": 0.3343,
105
  "step": 700
106
  },
107
  {
108
- "epoch": 1.76,
109
  "eval_accuracy": 0.8974400257607471,
110
- "eval_loss": 0.33083751797676086,
111
- "eval_runtime": 499.431,
112
- "eval_samples_per_second": 49.745,
113
- "eval_steps_per_second": 24.872,
114
  "step": 700
115
  },
116
  {
117
- "epoch": 2.02,
118
- "learning_rate": 0.00026599326599326595,
119
- "loss": 0.325,
120
  "step": 800
121
  },
122
  {
123
- "epoch": 2.02,
124
- "eval_accuracy": 0.8974400257607471,
125
- "eval_loss": 0.33818838000297546,
126
- "eval_runtime": 499.7312,
127
- "eval_samples_per_second": 49.715,
128
- "eval_steps_per_second": 24.857,
129
  "step": 800
130
  },
131
  {
132
- "epoch": 2.27,
133
- "learning_rate": 0.00025757575757575756,
134
- "loss": 0.34,
135
- "step": 900
136
- },
137
- {
138
- "epoch": 2.27,
139
- "eval_accuracy": 0.8974400257607471,
140
- "eval_loss": 0.3314325511455536,
141
- "eval_runtime": 499.9566,
142
- "eval_samples_per_second": 49.692,
143
- "eval_steps_per_second": 24.846,
144
- "step": 900
145
- },
146
- {
147
- "epoch": 2.52,
148
- "learning_rate": 0.00024915824915824913,
149
- "loss": 0.3333,
150
- "step": 1000
151
- },
152
- {
153
- "epoch": 2.52,
154
- "eval_accuracy": 0.8974400257607471,
155
- "eval_loss": 0.33876436948776245,
156
- "eval_runtime": 499.9566,
157
- "eval_samples_per_second": 49.692,
158
- "eval_steps_per_second": 24.846,
159
- "step": 1000
160
- },
161
- {
162
- "epoch": 2.77,
163
- "learning_rate": 0.00024074074074074072,
164
- "loss": 0.318,
165
- "step": 1100
166
- },
167
- {
168
- "epoch": 2.77,
169
- "eval_accuracy": 0.8974400257607471,
170
- "eval_loss": 0.33707037568092346,
171
- "eval_runtime": 499.5137,
172
- "eval_samples_per_second": 49.736,
173
- "eval_steps_per_second": 24.868,
174
- "step": 1100
175
- },
176
- {
177
- "epoch": 3.02,
178
- "learning_rate": 0.0002323232323232323,
179
- "loss": 0.3281,
180
- "step": 1200
181
- },
182
- {
183
- "epoch": 3.02,
184
- "eval_accuracy": 0.8974400257607471,
185
- "eval_loss": 0.33621686697006226,
186
- "eval_runtime": 499.3401,
187
- "eval_samples_per_second": 49.754,
188
- "eval_steps_per_second": 24.877,
189
- "step": 1200
190
- },
191
- {
192
- "epoch": 3.28,
193
- "learning_rate": 0.0002239057239057239,
194
- "loss": 0.3293,
195
- "step": 1300
196
- },
197
- {
198
- "epoch": 3.28,
199
- "eval_accuracy": 0.8974400257607471,
200
- "eval_loss": 0.33068734407424927,
201
- "eval_runtime": 499.4564,
202
- "eval_samples_per_second": 49.742,
203
- "eval_steps_per_second": 24.871,
204
- "step": 1300
205
- },
206
- {
207
- "epoch": 3.53,
208
- "learning_rate": 0.00021548821548821544,
209
- "loss": 0.3175,
210
- "step": 1400
211
- },
212
- {
213
- "epoch": 3.53,
214
- "eval_accuracy": 0.8974400257607471,
215
- "eval_loss": 0.33569779992103577,
216
- "eval_runtime": 499.6949,
217
- "eval_samples_per_second": 49.718,
218
- "eval_steps_per_second": 24.859,
219
- "step": 1400
220
- },
221
- {
222
- "epoch": 3.78,
223
- "learning_rate": 0.00020707070707070703,
224
- "loss": 0.3415,
225
- "step": 1500
226
- },
227
- {
228
- "epoch": 3.78,
229
- "eval_accuracy": 0.8974400257607471,
230
- "eval_loss": 0.3321123421192169,
231
- "eval_runtime": 499.567,
232
- "eval_samples_per_second": 49.731,
233
- "eval_steps_per_second": 24.866,
234
- "step": 1500
235
- },
236
- {
237
- "epoch": 4.03,
238
- "learning_rate": 0.00019865319865319862,
239
- "loss": 0.341,
240
- "step": 1600
241
- },
242
- {
243
- "epoch": 4.03,
244
- "eval_accuracy": 0.8974400257607471,
245
- "eval_loss": 0.33071625232696533,
246
- "eval_runtime": 499.8939,
247
- "eval_samples_per_second": 49.699,
248
- "eval_steps_per_second": 24.849,
249
- "step": 1600
250
- },
251
- {
252
- "epoch": 4.28,
253
- "learning_rate": 0.00019023569023569022,
254
- "loss": 0.3285,
255
- "step": 1700
256
- },
257
- {
258
- "epoch": 4.28,
259
- "eval_accuracy": 0.8974400257607471,
260
- "eval_loss": 0.3307797610759735,
261
- "eval_runtime": 500.1708,
262
- "eval_samples_per_second": 49.671,
263
- "eval_steps_per_second": 24.836,
264
- "step": 1700
265
- },
266
- {
267
- "epoch": 4.54,
268
- "learning_rate": 0.0001818181818181818,
269
- "loss": 0.3337,
270
- "step": 1800
271
- },
272
- {
273
- "epoch": 4.54,
274
- "eval_accuracy": 0.8974400257607471,
275
- "eval_loss": 0.330828994512558,
276
- "eval_runtime": 499.7725,
277
- "eval_samples_per_second": 49.711,
278
- "eval_steps_per_second": 24.855,
279
- "step": 1800
280
- },
281
- {
282
- "epoch": 4.79,
283
- "learning_rate": 0.0001734006734006734,
284
- "loss": 0.3276,
285
- "step": 1900
286
- },
287
- {
288
- "epoch": 4.79,
289
- "eval_accuracy": 0.8974400257607471,
290
- "eval_loss": 0.33070385456085205,
291
- "eval_runtime": 499.7607,
292
- "eval_samples_per_second": 49.712,
293
- "eval_steps_per_second": 24.856,
294
- "step": 1900
295
- },
296
- {
297
- "epoch": 5.04,
298
- "learning_rate": 0.000164983164983165,
299
- "loss": 0.3248,
300
- "step": 2000
301
- },
302
- {
303
- "epoch": 5.04,
304
- "eval_accuracy": 0.8974400257607471,
305
- "eval_loss": 0.33109623193740845,
306
- "eval_runtime": 500.0337,
307
- "eval_samples_per_second": 49.685,
308
- "eval_steps_per_second": 24.842,
309
- "step": 2000
310
- },
311
- {
312
- "epoch": 5.29,
313
- "learning_rate": 0.00015656565656565653,
314
- "loss": 0.3371,
315
- "step": 2100
316
- },
317
- {
318
- "epoch": 5.29,
319
- "eval_accuracy": 0.8974400257607471,
320
- "eval_loss": 0.3317299485206604,
321
- "eval_runtime": 499.7678,
322
- "eval_samples_per_second": 49.711,
323
- "eval_steps_per_second": 24.856,
324
- "step": 2100
325
- },
326
- {
327
- "epoch": 5.55,
328
- "learning_rate": 0.00014814814814814812,
329
- "loss": 0.3261,
330
- "step": 2200
331
- },
332
- {
333
- "epoch": 5.55,
334
- "eval_accuracy": 0.8974400257607471,
335
- "eval_loss": 0.33148789405822754,
336
- "eval_runtime": 499.916,
337
- "eval_samples_per_second": 49.696,
338
- "eval_steps_per_second": 24.848,
339
- "step": 2200
340
- },
341
- {
342
- "epoch": 5.8,
343
- "learning_rate": 0.0001397306397306397,
344
- "loss": 0.3277,
345
- "step": 2300
346
- },
347
- {
348
- "epoch": 5.8,
349
- "eval_accuracy": 0.8974400257607471,
350
- "eval_loss": 0.33226969838142395,
351
- "eval_runtime": 500.0285,
352
- "eval_samples_per_second": 49.685,
353
- "eval_steps_per_second": 24.843,
354
- "step": 2300
355
- },
356
- {
357
- "epoch": 6.05,
358
- "learning_rate": 0.0001313131313131313,
359
- "loss": 0.3297,
360
- "step": 2400
361
- },
362
- {
363
- "epoch": 6.05,
364
- "eval_accuracy": 0.8974400257607471,
365
- "eval_loss": 0.33209508657455444,
366
- "eval_runtime": 499.9688,
367
- "eval_samples_per_second": 49.691,
368
- "eval_steps_per_second": 24.846,
369
- "step": 2400
370
- },
371
- {
372
- "epoch": 6.3,
373
- "learning_rate": 0.0001228956228956229,
374
- "loss": 0.3397,
375
- "step": 2500
376
- },
377
- {
378
- "epoch": 6.3,
379
- "eval_accuracy": 0.8974400257607471,
380
- "eval_loss": 0.3315936028957367,
381
- "eval_runtime": 500.0781,
382
- "eval_samples_per_second": 49.68,
383
- "eval_steps_per_second": 24.84,
384
- "step": 2500
385
- },
386
- {
387
- "epoch": 6.55,
388
- "learning_rate": 0.00011447811447811446,
389
- "loss": 0.3313,
390
- "step": 2600
391
- },
392
- {
393
- "epoch": 6.55,
394
- "eval_accuracy": 0.8974400257607471,
395
- "eval_loss": 0.3375791013240814,
396
- "eval_runtime": 500.3761,
397
- "eval_samples_per_second": 49.651,
398
- "eval_steps_per_second": 24.825,
399
- "step": 2600
400
- },
401
- {
402
- "epoch": 6.81,
403
- "learning_rate": 0.00010606060606060605,
404
- "loss": 0.3297,
405
- "step": 2700
406
- },
407
- {
408
- "epoch": 6.81,
409
- "eval_accuracy": 0.8974400257607471,
410
- "eval_loss": 0.3325986862182617,
411
- "eval_runtime": 499.9451,
412
- "eval_samples_per_second": 49.693,
413
- "eval_steps_per_second": 24.847,
414
- "step": 2700
415
- },
416
- {
417
- "epoch": 7.06,
418
- "learning_rate": 9.764309764309764e-05,
419
- "loss": 0.3148,
420
- "step": 2800
421
- },
422
- {
423
- "epoch": 7.06,
424
- "eval_accuracy": 0.8974400257607471,
425
- "eval_loss": 0.3326033651828766,
426
- "eval_runtime": 500.102,
427
- "eval_samples_per_second": 49.678,
428
- "eval_steps_per_second": 24.839,
429
- "step": 2800
430
- },
431
- {
432
- "epoch": 7.31,
433
- "learning_rate": 8.92255892255892e-05,
434
- "loss": 0.33,
435
- "step": 2900
436
- },
437
- {
438
- "epoch": 7.31,
439
- "eval_accuracy": 0.8974400257607471,
440
- "eval_loss": 0.33067989349365234,
441
- "eval_runtime": 499.6928,
442
- "eval_samples_per_second": 49.719,
443
- "eval_steps_per_second": 24.859,
444
- "step": 2900
445
- },
446
- {
447
- "epoch": 7.56,
448
- "learning_rate": 8.08080808080808e-05,
449
- "loss": 0.3373,
450
- "step": 3000
451
- },
452
- {
453
- "epoch": 7.56,
454
- "eval_accuracy": 0.8974400257607471,
455
- "eval_loss": 0.3357817232608795,
456
- "eval_runtime": 499.6901,
457
- "eval_samples_per_second": 49.719,
458
- "eval_steps_per_second": 24.859,
459
- "step": 3000
460
- },
461
- {
462
- "epoch": 7.81,
463
- "learning_rate": 7.239057239057239e-05,
464
- "loss": 0.3229,
465
- "step": 3100
466
- },
467
- {
468
- "epoch": 7.81,
469
- "eval_accuracy": 0.8974400257607471,
470
- "eval_loss": 0.331524521112442,
471
- "eval_runtime": 499.59,
472
- "eval_samples_per_second": 49.729,
473
- "eval_steps_per_second": 24.864,
474
- "step": 3100
475
- },
476
- {
477
- "epoch": 8.07,
478
- "learning_rate": 6.397306397306397e-05,
479
- "loss": 0.3311,
480
- "step": 3200
481
- },
482
- {
483
- "epoch": 8.07,
484
- "eval_accuracy": 0.8974400257607471,
485
- "eval_loss": 0.3329682946205139,
486
- "eval_runtime": 499.5794,
487
- "eval_samples_per_second": 49.73,
488
- "eval_steps_per_second": 24.865,
489
- "step": 3200
490
- },
491
- {
492
- "epoch": 8.32,
493
- "learning_rate": 5.5555555555555545e-05,
494
- "loss": 0.32,
495
- "step": 3300
496
- },
497
- {
498
- "epoch": 8.32,
499
- "eval_accuracy": 0.8974400257607471,
500
- "eval_loss": 0.33294886350631714,
501
- "eval_runtime": 500.087,
502
- "eval_samples_per_second": 49.679,
503
- "eval_steps_per_second": 24.84,
504
- "step": 3300
505
- },
506
- {
507
- "epoch": 8.57,
508
- "learning_rate": 4.7138047138047136e-05,
509
- "loss": 0.3303,
510
- "step": 3400
511
- },
512
- {
513
- "epoch": 8.57,
514
- "eval_accuracy": 0.8974400257607471,
515
- "eval_loss": 0.33330321311950684,
516
- "eval_runtime": 499.642,
517
- "eval_samples_per_second": 49.724,
518
- "eval_steps_per_second": 24.862,
519
- "step": 3400
520
- },
521
- {
522
- "epoch": 8.82,
523
- "learning_rate": 3.8720538720538714e-05,
524
- "loss": 0.3268,
525
- "step": 3500
526
- },
527
- {
528
- "epoch": 8.82,
529
- "eval_accuracy": 0.8974400257607471,
530
- "eval_loss": 0.332504540681839,
531
- "eval_runtime": 499.7153,
532
- "eval_samples_per_second": 49.716,
533
- "eval_steps_per_second": 24.858,
534
- "step": 3500
535
- },
536
- {
537
- "epoch": 9.07,
538
- "learning_rate": 3.03030303030303e-05,
539
- "loss": 0.3362,
540
- "step": 3600
541
- },
542
- {
543
- "epoch": 9.07,
544
- "eval_accuracy": 0.8974400257607471,
545
- "eval_loss": 0.33143192529678345,
546
- "eval_runtime": 499.7396,
547
- "eval_samples_per_second": 49.714,
548
- "eval_steps_per_second": 24.857,
549
- "step": 3600
550
- },
551
- {
552
- "epoch": 9.33,
553
- "learning_rate": 2.1885521885521884e-05,
554
- "loss": 0.3391,
555
- "step": 3700
556
- },
557
- {
558
- "epoch": 9.33,
559
- "eval_accuracy": 0.8974400257607471,
560
- "eval_loss": 0.33087798953056335,
561
- "eval_runtime": 499.6009,
562
- "eval_samples_per_second": 49.728,
563
- "eval_steps_per_second": 24.864,
564
- "step": 3700
565
- },
566
- {
567
- "epoch": 9.58,
568
- "learning_rate": 1.3468013468013465e-05,
569
- "loss": 0.3233,
570
- "step": 3800
571
- },
572
- {
573
- "epoch": 9.58,
574
- "eval_accuracy": 0.8974400257607471,
575
- "eval_loss": 0.33190712332725525,
576
- "eval_runtime": 499.6968,
577
- "eval_samples_per_second": 49.718,
578
- "eval_steps_per_second": 24.859,
579
- "step": 3800
580
- },
581
- {
582
- "epoch": 9.83,
583
- "learning_rate": 5.05050505050505e-06,
584
- "loss": 0.3196,
585
- "step": 3900
586
- },
587
- {
588
- "epoch": 9.83,
589
- "eval_accuracy": 0.8974400257607471,
590
- "eval_loss": 0.33247998356819153,
591
- "eval_runtime": 499.604,
592
- "eval_samples_per_second": 49.727,
593
- "eval_steps_per_second": 24.864,
594
- "step": 3900
595
- },
596
- {
597
- "epoch": 9.98,
598
- "step": 3960,
599
- "total_flos": 1.506420070651024e+19,
600
- "train_loss": 0.3321091743430706,
601
- "train_runtime": 22869.426,
602
- "train_samples_per_second": 11.098,
603
- "train_steps_per_second": 0.173
604
  }
605
  ],
606
  "logging_steps": 100,
607
- "max_steps": 3960,
608
  "num_train_epochs": 10,
609
  "save_steps": 100,
610
- "total_flos": 1.506420070651024e+19,
611
  "trial_name": null,
612
  "trial_params": null
613
  }
 
1
  {
2
  "best_metric": 0.8974400257607471,
3
+ "best_model_checkpoint": "/home/cloudwalker/ASVmodel/wavlm-base_3/checkpoint-500",
4
+ "epoch": 9.907120743034056,
5
  "eval_steps": 100,
6
+ "global_step": 800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.24,
13
+ "learning_rate": 0.00029166666666666664,
14
+ "loss": 0.2236,
15
  "step": 100
16
  },
17
  {
18
+ "epoch": 1.24,
19
+ "eval_accuracy": 0.4466672033488971,
20
+ "eval_loss": 12.849496841430664,
21
+ "eval_runtime": 506.1701,
22
+ "eval_samples_per_second": 49.082,
23
+ "eval_steps_per_second": 24.541,
24
  "step": 100
25
  },
26
  {
27
+ "epoch": 2.48,
28
+ "learning_rate": 0.00025,
29
+ "loss": 0.0514,
30
  "step": 200
31
  },
32
  {
33
+ "epoch": 2.48,
34
+ "eval_accuracy": 0.26771051360489456,
35
+ "eval_loss": 16.30784797668457,
36
+ "eval_runtime": 508.9091,
37
+ "eval_samples_per_second": 48.818,
38
+ "eval_steps_per_second": 24.409,
39
  "step": 200
40
  },
41
  {
42
+ "epoch": 3.72,
43
+ "learning_rate": 0.00020833333333333332,
44
+ "loss": 0.0,
45
  "step": 300
46
  },
47
  {
48
+ "epoch": 3.72,
49
+ "eval_accuracy": 0.25970053131540816,
50
+ "eval_loss": 17.56509017944336,
51
+ "eval_runtime": 508.9562,
52
+ "eval_samples_per_second": 48.814,
53
+ "eval_steps_per_second": 24.407,
54
  "step": 300
55
  },
56
  {
57
+ "epoch": 4.95,
58
+ "learning_rate": 0.00016666666666666666,
59
+ "loss": 0.3252,
60
  "step": 400
61
  },
62
  {
63
+ "epoch": 4.95,
64
+ "eval_accuracy": 0.1912332957655772,
65
+ "eval_loss": 15.03822135925293,
66
+ "eval_runtime": 508.9315,
67
+ "eval_samples_per_second": 48.816,
68
+ "eval_steps_per_second": 24.408,
69
  "step": 400
70
  },
71
  {
72
+ "epoch": 6.19,
73
+ "learning_rate": 0.000125,
74
+ "loss": 1.0577,
75
  "step": 500
76
  },
77
  {
78
+ "epoch": 6.19,
79
  "eval_accuracy": 0.8974400257607471,
80
+ "eval_loss": 0.6533961296081543,
81
+ "eval_runtime": 509.063,
82
+ "eval_samples_per_second": 48.803,
83
+ "eval_steps_per_second": 24.402,
84
  "step": 500
85
  },
86
  {
87
+ "epoch": 7.43,
88
+ "learning_rate": 8.333333333333333e-05,
89
+ "loss": 0.6973,
90
  "step": 600
91
  },
92
  {
93
+ "epoch": 7.43,
94
+ "eval_accuracy": 0.10255997423925294,
95
+ "eval_loss": 0.7351841926574707,
96
+ "eval_runtime": 509.1844,
97
+ "eval_samples_per_second": 48.792,
98
+ "eval_steps_per_second": 24.396,
99
  "step": 600
100
  },
101
  {
102
+ "epoch": 8.67,
103
+ "learning_rate": 4.1666666666666665e-05,
104
+ "loss": 0.6939,
105
  "step": 700
106
  },
107
  {
108
+ "epoch": 8.67,
109
  "eval_accuracy": 0.8974400257607471,
110
+ "eval_loss": 0.6210092902183533,
111
+ "eval_runtime": 509.3594,
112
+ "eval_samples_per_second": 48.775,
113
+ "eval_steps_per_second": 24.387,
114
  "step": 700
115
  },
116
  {
117
+ "epoch": 9.91,
118
+ "learning_rate": 0.0,
119
+ "loss": 0.6944,
120
  "step": 800
121
  },
122
  {
123
+ "epoch": 9.91,
124
+ "eval_accuracy": 0.10255997423925294,
125
+ "eval_loss": 0.7128772735595703,
126
+ "eval_runtime": 509.4575,
127
+ "eval_samples_per_second": 48.766,
128
+ "eval_steps_per_second": 24.383,
129
  "step": 800
130
  },
131
  {
132
+ "epoch": 9.91,
133
+ "step": 800,
134
+ "total_flos": 2.7192972653929354e+18,
135
+ "train_loss": 0.4679390847659576,
136
+ "train_runtime": 4727.8199,
137
+ "train_samples_per_second": 10.914,
138
+ "train_steps_per_second": 0.169
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  }
140
  ],
141
  "logging_steps": 100,
142
+ "max_steps": 800,
143
  "num_train_epochs": 10,
144
  "save_steps": 100,
145
+ "total_flos": 2.7192972653929354e+18,
146
  "trial_name": null,
147
  "trial_params": null
148
  }