Melo1512 commited on
Commit
89cfdec
·
verified ·
1 Parent(s): 5c03b22

End of training

Browse files
README.md CHANGED
@@ -23,7 +23,7 @@ model-index:
23
  metrics:
24
  - name: Accuracy
25
  type: accuracy
26
- value: 0.9456521739130435
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -33,8 +33,8 @@ should probably proofread and complete it, then remove this comment. -->
33
 
34
  This model is a fine-tuned version of [facebook/vit-msn-small](https://huggingface.co/facebook/vit-msn-small) on the imagefolder dataset.
35
  It achieves the following results on the evaluation set:
36
- - Loss: 0.3383
37
- - Accuracy: 0.9457
38
 
39
  ## Model description
40
 
 
23
  metrics:
24
  - name: Accuracy
25
  type: accuracy
26
+ value: 0.9565217391304348
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
33
 
34
  This model is a fine-tuned version of [facebook/vit-msn-small](https://huggingface.co/facebook/vit-msn-small) on the imagefolder dataset.
35
  It achieves the following results on the evaluation set:
36
+ - Loss: 0.1797
37
+ - Accuracy: 0.9565
38
 
39
  ## Model description
40
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 60.0,
3
- "eval_accuracy": 0.9840425531914894,
4
- "eval_loss": 0.0718478411436081,
5
- "eval_runtime": 0.629,
6
- "eval_samples_per_second": 298.878,
7
- "eval_steps_per_second": 9.539,
8
- "total_flos": 8.758829206639411e+17,
9
- "train_loss": 0.09168570356236563,
10
- "train_runtime": 380.8373,
11
- "train_samples_per_second": 117.531,
12
- "train_steps_per_second": 0.945
13
  }
 
1
  {
2
  "epoch": 60.0,
3
+ "eval_accuracy": 0.9565217391304348,
4
+ "eval_loss": 0.17970529198646545,
5
+ "eval_runtime": 0.3194,
6
+ "eval_samples_per_second": 288.014,
7
+ "eval_steps_per_second": 9.392,
8
+ "total_flos": 4.3676735454019584e+17,
9
+ "train_loss": 0.10428427308797836,
10
+ "train_runtime": 199.9604,
11
+ "train_samples_per_second": 111.622,
12
+ "train_steps_per_second": 0.9
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 60.0,
3
- "eval_accuracy": 0.9840425531914894,
4
- "eval_loss": 0.0718478411436081,
5
- "eval_runtime": 0.629,
6
- "eval_samples_per_second": 298.878,
7
- "eval_steps_per_second": 9.539
8
  }
 
1
  {
2
  "epoch": 60.0,
3
+ "eval_accuracy": 0.9565217391304348,
4
+ "eval_loss": 0.17970529198646545,
5
+ "eval_runtime": 0.3194,
6
+ "eval_samples_per_second": 288.014,
7
+ "eval_steps_per_second": 9.392
8
  }
runs/Dec11_15-57-15_ae1aa77fe319/events.out.tfevents.1733932871.ae1aa77fe319.236.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dbb19274d18831ff2aabd8629937545425c2d78d370cf1fb3ff4dc0580e2c41
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 60.0,
3
- "total_flos": 8.758829206639411e+17,
4
- "train_loss": 0.09168570356236563,
5
- "train_runtime": 380.8373,
6
- "train_samples_per_second": 117.531,
7
- "train_steps_per_second": 0.945
8
  }
 
1
  {
2
  "epoch": 60.0,
3
+ "total_flos": 4.3676735454019584e+17,
4
+ "train_loss": 0.10428427308797836,
5
+ "train_runtime": 199.9604,
6
+ "train_samples_per_second": 111.622,
7
+ "train_steps_per_second": 0.9
8
  }
trainer_state.json CHANGED
@@ -1,817 +1,691 @@
1
  {
2
- "best_metric": 0.9840425531914894,
3
- "best_model_checkpoint": "vit-msn-small-wbc-blur-detector/checkpoint-72",
4
  "epoch": 60.0,
5
  "eval_steps": 500,
6
- "global_step": 360,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.7340425531914894,
14
- "eval_loss": 0.5712631344795227,
15
- "eval_runtime": 0.5622,
16
- "eval_samples_per_second": 334.415,
17
- "eval_steps_per_second": 10.673,
18
- "step": 6
19
- },
20
- {
21
- "epoch": 1.6666666666666665,
22
- "grad_norm": 10.44019603729248,
23
- "learning_rate": 1.388888888888889e-05,
24
- "loss": 0.6051,
25
- "step": 10
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.7659574468085106,
30
- "eval_loss": 0.4693465232849121,
31
- "eval_runtime": 0.5811,
32
- "eval_samples_per_second": 323.506,
33
- "eval_steps_per_second": 10.325,
34
- "step": 12
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.9414893617021277,
39
- "eval_loss": 0.1643817126750946,
40
- "eval_runtime": 0.6,
41
- "eval_samples_per_second": 313.329,
42
- "eval_steps_per_second": 10.0,
43
- "step": 18
44
  },
45
  {
46
  "epoch": 3.3333333333333335,
47
- "grad_norm": 8.123635292053223,
48
  "learning_rate": 2.777777777777778e-05,
49
- "loss": 0.2544,
50
- "step": 20
51
  },
52
  {
53
  "epoch": 4.0,
54
- "eval_accuracy": 0.9574468085106383,
55
- "eval_loss": 0.08451231569051743,
56
- "eval_runtime": 0.5515,
57
- "eval_samples_per_second": 340.904,
58
- "eval_steps_per_second": 10.88,
59
- "step": 24
60
- },
61
- {
62
- "epoch": 5.0,
63
- "grad_norm": 37.864131927490234,
64
- "learning_rate": 4.166666666666667e-05,
65
- "loss": 0.1896,
66
- "step": 30
67
  },
68
  {
69
  "epoch": 5.0,
70
- "eval_accuracy": 0.973404255319149,
71
- "eval_loss": 0.09721191227436066,
72
- "eval_runtime": 0.5728,
73
- "eval_samples_per_second": 328.217,
74
- "eval_steps_per_second": 10.475,
75
- "step": 30
76
  },
77
  {
78
  "epoch": 6.0,
79
- "eval_accuracy": 0.9680851063829787,
80
- "eval_loss": 0.1011401042342186,
81
- "eval_runtime": 0.5505,
82
- "eval_samples_per_second": 341.503,
83
- "eval_steps_per_second": 10.899,
84
- "step": 36
85
  },
86
  {
87
  "epoch": 6.666666666666667,
88
- "grad_norm": 11.042017936706543,
89
  "learning_rate": 4.938271604938271e-05,
90
- "loss": 0.2534,
91
- "step": 40
92
  },
93
  {
94
  "epoch": 7.0,
95
- "eval_accuracy": 0.9787234042553191,
96
- "eval_loss": 0.08934129774570465,
97
- "eval_runtime": 0.5635,
98
- "eval_samples_per_second": 333.602,
99
- "eval_steps_per_second": 10.647,
100
- "step": 42
101
  },
102
  {
103
  "epoch": 8.0,
104
- "eval_accuracy": 0.9627659574468085,
105
- "eval_loss": 0.08766720443964005,
106
- "eval_runtime": 0.5541,
107
- "eval_samples_per_second": 339.311,
108
- "eval_steps_per_second": 10.829,
109
- "step": 48
110
- },
111
- {
112
- "epoch": 8.333333333333334,
113
- "grad_norm": 8.848210334777832,
114
- "learning_rate": 4.783950617283951e-05,
115
- "loss": 0.1535,
116
- "step": 50
117
  },
118
  {
119
  "epoch": 9.0,
120
- "eval_accuracy": 0.973404255319149,
121
- "eval_loss": 0.10303648561239243,
122
- "eval_runtime": 0.5537,
123
- "eval_samples_per_second": 339.542,
124
- "eval_steps_per_second": 10.836,
125
- "step": 54
126
  },
127
  {
128
  "epoch": 10.0,
129
- "grad_norm": 8.038219451904297,
130
  "learning_rate": 4.62962962962963e-05,
131
- "loss": 0.1277,
132
- "step": 60
133
  },
134
  {
135
  "epoch": 10.0,
136
- "eval_accuracy": 0.973404255319149,
137
- "eval_loss": 0.07765703648328781,
138
- "eval_runtime": 0.5522,
139
- "eval_samples_per_second": 340.451,
140
- "eval_steps_per_second": 10.865,
141
- "step": 60
142
  },
143
  {
144
  "epoch": 11.0,
145
- "eval_accuracy": 0.9680851063829787,
146
- "eval_loss": 0.08228272944688797,
147
- "eval_runtime": 0.5522,
148
- "eval_samples_per_second": 340.441,
149
- "eval_steps_per_second": 10.865,
150
- "step": 66
151
- },
152
- {
153
- "epoch": 11.666666666666666,
154
- "grad_norm": 11.488448143005371,
155
- "learning_rate": 4.4753086419753084e-05,
156
- "loss": 0.1147,
157
- "step": 70
158
  },
159
  {
160
  "epoch": 12.0,
161
- "eval_accuracy": 0.9840425531914894,
162
- "eval_loss": 0.0718478411436081,
163
- "eval_runtime": 0.5684,
164
- "eval_samples_per_second": 330.757,
165
- "eval_steps_per_second": 10.556,
166
- "step": 72
167
  },
168
  {
169
  "epoch": 13.0,
170
- "eval_accuracy": 0.9521276595744681,
171
- "eval_loss": 0.09054908156394958,
172
- "eval_runtime": 0.5801,
173
- "eval_samples_per_second": 324.086,
174
- "eval_steps_per_second": 10.343,
175
- "step": 78
176
  },
177
  {
178
  "epoch": 13.333333333333334,
179
- "grad_norm": 10.379966735839844,
180
  "learning_rate": 4.3209876543209875e-05,
181
- "loss": 0.112,
182
- "step": 80
183
  },
184
  {
185
  "epoch": 14.0,
186
- "eval_accuracy": 0.9202127659574468,
187
- "eval_loss": 0.21216550469398499,
188
- "eval_runtime": 0.5499,
189
- "eval_samples_per_second": 341.878,
190
- "eval_steps_per_second": 10.911,
191
- "step": 84
192
- },
193
- {
194
- "epoch": 15.0,
195
- "grad_norm": 6.255307674407959,
196
- "learning_rate": 4.166666666666667e-05,
197
- "loss": 0.1115,
198
- "step": 90
199
  },
200
  {
201
  "epoch": 15.0,
202
- "eval_accuracy": 0.9414893617021277,
203
- "eval_loss": 0.1408630907535553,
204
- "eval_runtime": 0.6004,
205
- "eval_samples_per_second": 313.12,
206
- "eval_steps_per_second": 9.993,
207
- "step": 90
208
  },
209
  {
210
  "epoch": 16.0,
211
- "eval_accuracy": 0.973404255319149,
212
- "eval_loss": 0.08175182342529297,
213
- "eval_runtime": 0.5516,
214
- "eval_samples_per_second": 340.81,
215
- "eval_steps_per_second": 10.877,
216
- "step": 96
217
  },
218
  {
219
  "epoch": 16.666666666666668,
220
- "grad_norm": 32.63232421875,
221
  "learning_rate": 4.012345679012346e-05,
222
- "loss": 0.107,
223
- "step": 100
224
  },
225
  {
226
  "epoch": 17.0,
227
- "eval_accuracy": 0.973404255319149,
228
- "eval_loss": 0.059448737651109695,
229
- "eval_runtime": 0.5763,
230
- "eval_samples_per_second": 326.236,
231
- "eval_steps_per_second": 10.412,
232
- "step": 102
233
  },
234
  {
235
  "epoch": 18.0,
236
- "eval_accuracy": 0.9308510638297872,
237
- "eval_loss": 0.16710200905799866,
238
- "eval_runtime": 0.5588,
239
- "eval_samples_per_second": 336.405,
240
- "eval_steps_per_second": 10.736,
241
- "step": 108
242
- },
243
- {
244
- "epoch": 18.333333333333332,
245
- "grad_norm": 3.7432363033294678,
246
- "learning_rate": 3.8580246913580246e-05,
247
- "loss": 0.0941,
248
- "step": 110
249
  },
250
  {
251
  "epoch": 19.0,
252
- "eval_accuracy": 0.9308510638297872,
253
- "eval_loss": 0.140838161110878,
254
- "eval_runtime": 0.5919,
255
- "eval_samples_per_second": 317.613,
256
- "eval_steps_per_second": 10.137,
257
- "step": 114
258
  },
259
  {
260
  "epoch": 20.0,
261
- "grad_norm": 7.966294288635254,
262
  "learning_rate": 3.7037037037037037e-05,
263
- "loss": 0.0629,
264
- "step": 120
265
  },
266
  {
267
  "epoch": 20.0,
268
- "eval_accuracy": 0.9414893617021277,
269
- "eval_loss": 0.13265569508075714,
270
- "eval_runtime": 0.5698,
271
- "eval_samples_per_second": 329.925,
272
- "eval_steps_per_second": 10.53,
273
- "step": 120
274
  },
275
  {
276
  "epoch": 21.0,
277
- "eval_accuracy": 0.9680851063829787,
278
- "eval_loss": 0.08957220613956451,
279
- "eval_runtime": 0.5645,
280
- "eval_samples_per_second": 333.023,
281
- "eval_steps_per_second": 10.628,
282
- "step": 126
283
- },
284
- {
285
- "epoch": 21.666666666666668,
286
- "grad_norm": 6.942417144775391,
287
- "learning_rate": 3.5493827160493834e-05,
288
- "loss": 0.081,
289
- "step": 130
290
  },
291
  {
292
  "epoch": 22.0,
293
- "eval_accuracy": 0.9574468085106383,
294
- "eval_loss": 0.09119919687509537,
295
- "eval_runtime": 0.5916,
296
- "eval_samples_per_second": 317.756,
297
- "eval_steps_per_second": 10.141,
298
- "step": 132
299
  },
300
  {
301
  "epoch": 23.0,
302
- "eval_accuracy": 0.9521276595744681,
303
- "eval_loss": 0.10360775887966156,
304
- "eval_runtime": 0.5817,
305
- "eval_samples_per_second": 323.216,
306
- "eval_steps_per_second": 10.315,
307
- "step": 138
308
  },
309
  {
310
  "epoch": 23.333333333333332,
311
- "grad_norm": 3.9243404865264893,
312
  "learning_rate": 3.395061728395062e-05,
313
- "loss": 0.0706,
314
- "step": 140
315
  },
316
  {
317
  "epoch": 24.0,
318
- "eval_accuracy": 0.9521276595744681,
319
- "eval_loss": 0.07820819318294525,
320
- "eval_runtime": 0.5573,
321
- "eval_samples_per_second": 337.319,
322
- "eval_steps_per_second": 10.766,
323
- "step": 144
324
- },
325
- {
326
- "epoch": 25.0,
327
- "grad_norm": 5.995626449584961,
328
- "learning_rate": 3.240740740740741e-05,
329
- "loss": 0.0728,
330
- "step": 150
331
  },
332
  {
333
  "epoch": 25.0,
334
- "eval_accuracy": 0.9627659574468085,
335
- "eval_loss": 0.06730703264474869,
336
- "eval_runtime": 0.564,
337
- "eval_samples_per_second": 333.307,
338
- "eval_steps_per_second": 10.637,
339
- "step": 150
340
  },
341
  {
342
  "epoch": 26.0,
343
- "eval_accuracy": 0.9627659574468085,
344
- "eval_loss": 0.13579747080802917,
345
- "eval_runtime": 0.5839,
346
- "eval_samples_per_second": 321.994,
347
- "eval_steps_per_second": 10.276,
348
- "step": 156
349
  },
350
  {
351
  "epoch": 26.666666666666668,
352
- "grad_norm": 3.335559606552124,
353
  "learning_rate": 3.08641975308642e-05,
354
- "loss": 0.0535,
355
- "step": 160
356
  },
357
  {
358
  "epoch": 27.0,
359
- "eval_accuracy": 0.9574468085106383,
360
- "eval_loss": 0.09251847118139267,
361
- "eval_runtime": 0.5476,
362
- "eval_samples_per_second": 343.33,
363
- "eval_steps_per_second": 10.957,
364
- "step": 162
365
  },
366
  {
367
  "epoch": 28.0,
368
- "eval_accuracy": 0.973404255319149,
369
- "eval_loss": 0.09499593824148178,
370
- "eval_runtime": 0.557,
371
- "eval_samples_per_second": 337.516,
372
- "eval_steps_per_second": 10.772,
373
- "step": 168
374
- },
375
- {
376
- "epoch": 28.333333333333332,
377
- "grad_norm": 6.770501136779785,
378
- "learning_rate": 2.9320987654320992e-05,
379
- "loss": 0.058,
380
- "step": 170
381
  },
382
  {
383
  "epoch": 29.0,
384
- "eval_accuracy": 0.9574468085106383,
385
- "eval_loss": 0.09998849779367447,
386
- "eval_runtime": 0.5597,
387
- "eval_samples_per_second": 335.871,
388
- "eval_steps_per_second": 10.719,
389
- "step": 174
390
  },
391
  {
392
  "epoch": 30.0,
393
- "grad_norm": 19.26597023010254,
394
  "learning_rate": 2.777777777777778e-05,
395
- "loss": 0.0662,
396
- "step": 180
397
  },
398
  {
399
  "epoch": 30.0,
400
- "eval_accuracy": 0.9414893617021277,
401
- "eval_loss": 0.251209557056427,
402
- "eval_runtime": 0.5694,
403
- "eval_samples_per_second": 330.181,
404
- "eval_steps_per_second": 10.538,
405
- "step": 180
406
  },
407
  {
408
  "epoch": 31.0,
409
- "eval_accuracy": 0.9680851063829787,
410
- "eval_loss": 0.06487108021974564,
411
- "eval_runtime": 0.5919,
412
- "eval_samples_per_second": 317.599,
413
- "eval_steps_per_second": 10.136,
414
- "step": 186
415
- },
416
- {
417
- "epoch": 31.666666666666668,
418
- "grad_norm": 1.7159186601638794,
419
- "learning_rate": 2.623456790123457e-05,
420
- "loss": 0.0564,
421
- "step": 190
422
  },
423
  {
424
  "epoch": 32.0,
425
- "eval_accuracy": 0.9521276595744681,
426
- "eval_loss": 0.13227558135986328,
427
- "eval_runtime": 0.5679,
428
- "eval_samples_per_second": 331.05,
429
- "eval_steps_per_second": 10.565,
430
- "step": 192
431
  },
432
  {
433
  "epoch": 33.0,
434
- "eval_accuracy": 0.9680851063829787,
435
- "eval_loss": 0.06702585518360138,
436
- "eval_runtime": 0.591,
437
- "eval_samples_per_second": 318.119,
438
- "eval_steps_per_second": 10.153,
439
- "step": 198
440
  },
441
  {
442
  "epoch": 33.333333333333336,
443
- "grad_norm": 0.7996916770935059,
444
  "learning_rate": 2.4691358024691357e-05,
445
- "loss": 0.0591,
446
- "step": 200
447
  },
448
  {
449
  "epoch": 34.0,
450
- "eval_accuracy": 0.9627659574468085,
451
- "eval_loss": 0.11914665251970291,
452
- "eval_runtime": 0.5866,
453
- "eval_samples_per_second": 320.511,
454
- "eval_steps_per_second": 10.229,
455
- "step": 204
456
- },
457
- {
458
- "epoch": 35.0,
459
- "grad_norm": 3.493698835372925,
460
- "learning_rate": 2.314814814814815e-05,
461
- "loss": 0.0353,
462
- "step": 210
463
  },
464
  {
465
  "epoch": 35.0,
466
- "eval_accuracy": 0.9680851063829787,
467
- "eval_loss": 0.14858229458332062,
468
- "eval_runtime": 0.5761,
469
- "eval_samples_per_second": 326.357,
470
- "eval_steps_per_second": 10.416,
471
- "step": 210
472
  },
473
  {
474
  "epoch": 36.0,
475
- "eval_accuracy": 0.973404255319149,
476
- "eval_loss": 0.08810416609048843,
477
- "eval_runtime": 0.5699,
478
- "eval_samples_per_second": 329.896,
479
- "eval_steps_per_second": 10.529,
480
- "step": 216
481
  },
482
  {
483
  "epoch": 36.666666666666664,
484
- "grad_norm": 12.2665376663208,
485
  "learning_rate": 2.1604938271604937e-05,
486
- "loss": 0.0523,
487
- "step": 220
488
  },
489
  {
490
  "epoch": 37.0,
491
- "eval_accuracy": 0.9680851063829787,
492
- "eval_loss": 0.05061895400285721,
493
- "eval_runtime": 0.5644,
494
- "eval_samples_per_second": 333.114,
495
- "eval_steps_per_second": 10.631,
496
- "step": 222
497
  },
498
  {
499
  "epoch": 38.0,
500
- "eval_accuracy": 0.9627659574468085,
501
- "eval_loss": 0.10941923409700394,
502
- "eval_runtime": 0.5579,
503
- "eval_samples_per_second": 336.995,
504
- "eval_steps_per_second": 10.755,
505
- "step": 228
506
- },
507
- {
508
- "epoch": 38.333333333333336,
509
- "grad_norm": 2.317680597305298,
510
- "learning_rate": 2.006172839506173e-05,
511
- "loss": 0.0471,
512
- "step": 230
513
  },
514
  {
515
  "epoch": 39.0,
516
- "eval_accuracy": 0.973404255319149,
517
- "eval_loss": 0.08581092208623886,
518
- "eval_runtime": 0.5654,
519
- "eval_samples_per_second": 332.53,
520
- "eval_steps_per_second": 10.613,
521
- "step": 234
522
  },
523
  {
524
  "epoch": 40.0,
525
- "grad_norm": 11.043506622314453,
526
  "learning_rate": 1.8518518518518518e-05,
527
- "loss": 0.0671,
528
- "step": 240
529
  },
530
  {
531
  "epoch": 40.0,
532
- "eval_accuracy": 0.9574468085106383,
533
- "eval_loss": 0.17757754027843475,
534
- "eval_runtime": 0.5949,
535
- "eval_samples_per_second": 316.026,
536
- "eval_steps_per_second": 10.086,
537
- "step": 240
538
  },
539
  {
540
  "epoch": 41.0,
541
- "eval_accuracy": 0.973404255319149,
542
- "eval_loss": 0.09706045687198639,
543
- "eval_runtime": 0.5628,
544
- "eval_samples_per_second": 334.044,
545
- "eval_steps_per_second": 10.661,
546
- "step": 246
547
- },
548
- {
549
- "epoch": 41.666666666666664,
550
- "grad_norm": 2.3154749870300293,
551
- "learning_rate": 1.697530864197531e-05,
552
- "loss": 0.0459,
553
- "step": 250
554
  },
555
  {
556
  "epoch": 42.0,
557
- "eval_accuracy": 0.9680851063829787,
558
- "eval_loss": 0.04420238360762596,
559
- "eval_runtime": 0.572,
560
- "eval_samples_per_second": 328.671,
561
- "eval_steps_per_second": 10.489,
562
- "step": 252
563
  },
564
  {
565
  "epoch": 43.0,
566
- "eval_accuracy": 0.9787234042553191,
567
- "eval_loss": 0.044726960361003876,
568
- "eval_runtime": 0.5631,
569
- "eval_samples_per_second": 333.838,
570
- "eval_steps_per_second": 10.654,
571
- "step": 258
572
  },
573
  {
574
  "epoch": 43.333333333333336,
575
- "grad_norm": 3.237978935241699,
576
  "learning_rate": 1.54320987654321e-05,
577
- "loss": 0.0296,
578
- "step": 260
579
  },
580
  {
581
  "epoch": 44.0,
582
- "eval_accuracy": 0.9787234042553191,
583
- "eval_loss": 0.06447551399469376,
584
- "eval_runtime": 0.5649,
585
- "eval_samples_per_second": 332.802,
586
- "eval_steps_per_second": 10.621,
587
- "step": 264
588
- },
589
- {
590
- "epoch": 45.0,
591
- "grad_norm": 15.342556953430176,
592
- "learning_rate": 1.388888888888889e-05,
593
- "loss": 0.0414,
594
- "step": 270
595
  },
596
  {
597
  "epoch": 45.0,
598
- "eval_accuracy": 0.973404255319149,
599
- "eval_loss": 0.07515815645456314,
600
- "eval_runtime": 0.5591,
601
- "eval_samples_per_second": 336.232,
602
- "eval_steps_per_second": 10.731,
603
- "step": 270
604
  },
605
  {
606
  "epoch": 46.0,
607
- "eval_accuracy": 0.9574468085106383,
608
- "eval_loss": 0.14192818105220795,
609
- "eval_runtime": 0.5561,
610
- "eval_samples_per_second": 338.082,
611
- "eval_steps_per_second": 10.79,
612
- "step": 276
613
  },
614
  {
615
  "epoch": 46.666666666666664,
616
- "grad_norm": 3.4594690799713135,
617
  "learning_rate": 1.2345679012345678e-05,
618
- "loss": 0.0352,
619
- "step": 280
620
  },
621
  {
622
  "epoch": 47.0,
623
- "eval_accuracy": 0.9840425531914894,
624
- "eval_loss": 0.04251508414745331,
625
- "eval_runtime": 0.5605,
626
- "eval_samples_per_second": 335.419,
627
- "eval_steps_per_second": 10.705,
628
- "step": 282
629
  },
630
  {
631
  "epoch": 48.0,
632
- "eval_accuracy": 0.9787234042553191,
633
- "eval_loss": 0.06410356611013412,
634
- "eval_runtime": 0.5687,
635
- "eval_samples_per_second": 330.571,
636
- "eval_steps_per_second": 10.55,
637
- "step": 288
638
- },
639
- {
640
- "epoch": 48.333333333333336,
641
- "grad_norm": 2.795865774154663,
642
- "learning_rate": 1.0802469135802469e-05,
643
- "loss": 0.0342,
644
- "step": 290
645
  },
646
  {
647
  "epoch": 49.0,
648
- "eval_accuracy": 0.9680851063829787,
649
- "eval_loss": 0.11264320462942123,
650
- "eval_runtime": 0.5453,
651
- "eval_samples_per_second": 344.776,
652
- "eval_steps_per_second": 11.003,
653
- "step": 294
654
  },
655
  {
656
  "epoch": 50.0,
657
- "grad_norm": 4.04909086227417,
658
  "learning_rate": 9.259259259259259e-06,
659
- "loss": 0.0277,
660
- "step": 300
661
  },
662
  {
663
  "epoch": 50.0,
664
- "eval_accuracy": 0.9840425531914894,
665
- "eval_loss": 0.054132696241140366,
666
- "eval_runtime": 0.5421,
667
- "eval_samples_per_second": 346.78,
668
- "eval_steps_per_second": 11.067,
669
- "step": 300
670
  },
671
  {
672
  "epoch": 51.0,
673
- "eval_accuracy": 0.9787234042553191,
674
- "eval_loss": 0.07527489215135574,
675
- "eval_runtime": 0.5567,
676
- "eval_samples_per_second": 337.676,
677
- "eval_steps_per_second": 10.777,
678
- "step": 306
679
- },
680
- {
681
- "epoch": 51.666666666666664,
682
- "grad_norm": 1.1342123746871948,
683
- "learning_rate": 7.71604938271605e-06,
684
- "loss": 0.0392,
685
- "step": 310
686
  },
687
  {
688
  "epoch": 52.0,
689
- "eval_accuracy": 0.9787234042553191,
690
- "eval_loss": 0.04125715419650078,
691
- "eval_runtime": 0.5747,
692
- "eval_samples_per_second": 327.145,
693
- "eval_steps_per_second": 10.441,
694
- "step": 312
695
  },
696
  {
697
  "epoch": 53.0,
698
- "eval_accuracy": 0.9627659574468085,
699
- "eval_loss": 0.11188509315252304,
700
- "eval_runtime": 0.5497,
701
- "eval_samples_per_second": 341.985,
702
- "eval_steps_per_second": 10.914,
703
- "step": 318
704
  },
705
  {
706
  "epoch": 53.333333333333336,
707
- "grad_norm": 4.390924453735352,
708
  "learning_rate": 6.172839506172839e-06,
709
- "loss": 0.0299,
710
- "step": 320
711
  },
712
  {
713
  "epoch": 54.0,
714
- "eval_accuracy": 0.9627659574468085,
715
- "eval_loss": 0.08185816556215286,
716
- "eval_runtime": 0.5785,
717
- "eval_samples_per_second": 324.999,
718
- "eval_steps_per_second": 10.372,
719
- "step": 324
720
- },
721
- {
722
- "epoch": 55.0,
723
- "grad_norm": 1.3617689609527588,
724
- "learning_rate": 4.6296296296296296e-06,
725
- "loss": 0.0295,
726
- "step": 330
727
  },
728
  {
729
  "epoch": 55.0,
730
- "eval_accuracy": 0.9840425531914894,
731
- "eval_loss": 0.0335795022547245,
732
- "eval_runtime": 0.5553,
733
- "eval_samples_per_second": 338.529,
734
- "eval_steps_per_second": 10.804,
735
- "step": 330
736
  },
737
  {
738
  "epoch": 56.0,
739
- "eval_accuracy": 0.9840425531914894,
740
- "eval_loss": 0.039023175835609436,
741
- "eval_runtime": 0.5576,
742
- "eval_samples_per_second": 337.161,
743
- "eval_steps_per_second": 10.76,
744
- "step": 336
745
  },
746
  {
747
  "epoch": 56.666666666666664,
748
- "grad_norm": 0.5556619763374329,
749
  "learning_rate": 3.0864197530864196e-06,
750
- "loss": 0.0253,
751
- "step": 340
752
  },
753
  {
754
  "epoch": 57.0,
755
- "eval_accuracy": 0.9840425531914894,
756
- "eval_loss": 0.03277648240327835,
757
- "eval_runtime": 0.5626,
758
- "eval_samples_per_second": 334.143,
759
- "eval_steps_per_second": 10.664,
760
- "step": 342
761
  },
762
  {
763
  "epoch": 58.0,
764
- "eval_accuracy": 0.9840425531914894,
765
- "eval_loss": 0.0343145877122879,
766
- "eval_runtime": 0.5708,
767
- "eval_samples_per_second": 329.368,
768
- "eval_steps_per_second": 10.512,
769
- "step": 348
770
- },
771
- {
772
- "epoch": 58.333333333333336,
773
- "grad_norm": 0.7165215611457825,
774
- "learning_rate": 1.5432098765432098e-06,
775
- "loss": 0.0264,
776
- "step": 350
777
  },
778
  {
779
  "epoch": 59.0,
780
- "eval_accuracy": 0.9840425531914894,
781
- "eval_loss": 0.03521186113357544,
782
- "eval_runtime": 0.5748,
783
- "eval_samples_per_second": 327.069,
784
- "eval_steps_per_second": 10.438,
785
- "step": 354
786
  },
787
  {
788
  "epoch": 60.0,
789
- "grad_norm": 5.298318386077881,
790
  "learning_rate": 0.0,
791
- "loss": 0.0308,
792
- "step": 360
793
  },
794
  {
795
  "epoch": 60.0,
796
- "eval_accuracy": 0.9840425531914894,
797
- "eval_loss": 0.03450946509838104,
798
- "eval_runtime": 0.5861,
799
- "eval_samples_per_second": 320.783,
800
- "eval_steps_per_second": 10.238,
801
- "step": 360
802
  },
803
  {
804
  "epoch": 60.0,
805
- "step": 360,
806
- "total_flos": 8.758829206639411e+17,
807
- "train_loss": 0.09168570356236563,
808
- "train_runtime": 380.8373,
809
- "train_samples_per_second": 117.531,
810
- "train_steps_per_second": 0.945
811
  }
812
  ],
813
  "logging_steps": 10,
814
- "max_steps": 360,
815
  "num_input_tokens_seen": 0,
816
  "num_train_epochs": 60,
817
  "save_steps": 500,
@@ -827,7 +701,7 @@
827
  "attributes": {}
828
  }
829
  },
830
- "total_flos": 8.758829206639411e+17,
831
  "train_batch_size": 32,
832
  "trial_name": null,
833
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9565217391304348,
3
+ "best_model_checkpoint": "vit-msn-small-wbc-blur-detector/checkpoint-60",
4
  "epoch": 60.0,
5
  "eval_steps": 500,
6
+ "global_step": 180,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.5760869565217391,
14
+ "eval_loss": 0.6225717663764954,
15
+ "eval_runtime": 0.2813,
16
+ "eval_samples_per_second": 327.047,
17
+ "eval_steps_per_second": 10.665,
18
+ "step": 3
 
 
 
 
 
 
 
19
  },
20
  {
21
  "epoch": 2.0,
22
+ "eval_accuracy": 0.8913043478260869,
23
+ "eval_loss": 0.41749870777130127,
24
+ "eval_runtime": 0.2698,
25
+ "eval_samples_per_second": 341.032,
26
+ "eval_steps_per_second": 11.121,
27
+ "step": 6
28
  },
29
  {
30
  "epoch": 3.0,
31
+ "eval_accuracy": 0.8369565217391305,
32
+ "eval_loss": 0.359576016664505,
33
+ "eval_runtime": 0.2715,
34
+ "eval_samples_per_second": 338.881,
35
+ "eval_steps_per_second": 11.05,
36
+ "step": 9
37
  },
38
  {
39
  "epoch": 3.3333333333333335,
40
+ "grad_norm": 5.03557014465332,
41
  "learning_rate": 2.777777777777778e-05,
42
+ "loss": 0.5806,
43
+ "step": 10
44
  },
45
  {
46
  "epoch": 4.0,
47
+ "eval_accuracy": 0.9130434782608695,
48
+ "eval_loss": 0.2136119157075882,
49
+ "eval_runtime": 0.2933,
50
+ "eval_samples_per_second": 313.644,
51
+ "eval_steps_per_second": 10.228,
52
+ "step": 12
 
 
 
 
 
 
 
53
  },
54
  {
55
  "epoch": 5.0,
56
+ "eval_accuracy": 0.9347826086956522,
57
+ "eval_loss": 0.18237723410129547,
58
+ "eval_runtime": 0.2735,
59
+ "eval_samples_per_second": 336.44,
60
+ "eval_steps_per_second": 10.971,
61
+ "step": 15
62
  },
63
  {
64
  "epoch": 6.0,
65
+ "eval_accuracy": 0.9130434782608695,
66
+ "eval_loss": 0.21223501861095428,
67
+ "eval_runtime": 0.2858,
68
+ "eval_samples_per_second": 321.851,
69
+ "eval_steps_per_second": 10.495,
70
+ "step": 18
71
  },
72
  {
73
  "epoch": 6.666666666666667,
74
+ "grad_norm": 29.32073402404785,
75
  "learning_rate": 4.938271604938271e-05,
76
+ "loss": 0.2353,
77
+ "step": 20
78
  },
79
  {
80
  "epoch": 7.0,
81
+ "eval_accuracy": 0.9456521739130435,
82
+ "eval_loss": 0.2082703560590744,
83
+ "eval_runtime": 0.2935,
84
+ "eval_samples_per_second": 313.469,
85
+ "eval_steps_per_second": 10.222,
86
+ "step": 21
87
  },
88
  {
89
  "epoch": 8.0,
90
+ "eval_accuracy": 0.9347826086956522,
91
+ "eval_loss": 0.1384642869234085,
92
+ "eval_runtime": 0.277,
93
+ "eval_samples_per_second": 332.147,
94
+ "eval_steps_per_second": 10.831,
95
+ "step": 24
 
 
 
 
 
 
 
96
  },
97
  {
98
  "epoch": 9.0,
99
+ "eval_accuracy": 0.9456521739130435,
100
+ "eval_loss": 0.15815076231956482,
101
+ "eval_runtime": 0.2722,
102
+ "eval_samples_per_second": 338.031,
103
+ "eval_steps_per_second": 11.023,
104
+ "step": 27
105
  },
106
  {
107
  "epoch": 10.0,
108
+ "grad_norm": 7.045929431915283,
109
  "learning_rate": 4.62962962962963e-05,
110
+ "loss": 0.1214,
111
+ "step": 30
112
  },
113
  {
114
  "epoch": 10.0,
115
+ "eval_accuracy": 0.9456521739130435,
116
+ "eval_loss": 0.18020795285701752,
117
+ "eval_runtime": 0.3138,
118
+ "eval_samples_per_second": 293.183,
119
+ "eval_steps_per_second": 9.56,
120
+ "step": 30
121
  },
122
  {
123
  "epoch": 11.0,
124
+ "eval_accuracy": 0.9456521739130435,
125
+ "eval_loss": 0.1709580272436142,
126
+ "eval_runtime": 0.2875,
127
+ "eval_samples_per_second": 319.953,
128
+ "eval_steps_per_second": 10.433,
129
+ "step": 33
 
 
 
 
 
 
 
130
  },
131
  {
132
  "epoch": 12.0,
133
+ "eval_accuracy": 0.9347826086956522,
134
+ "eval_loss": 0.2764737904071808,
135
+ "eval_runtime": 0.274,
136
+ "eval_samples_per_second": 335.737,
137
+ "eval_steps_per_second": 10.948,
138
+ "step": 36
139
  },
140
  {
141
  "epoch": 13.0,
142
+ "eval_accuracy": 0.9456521739130435,
143
+ "eval_loss": 0.22897538542747498,
144
+ "eval_runtime": 0.2743,
145
+ "eval_samples_per_second": 335.388,
146
+ "eval_steps_per_second": 10.937,
147
+ "step": 39
148
  },
149
  {
150
  "epoch": 13.333333333333334,
151
+ "grad_norm": 4.235286235809326,
152
  "learning_rate": 4.3209876543209875e-05,
153
+ "loss": 0.0934,
154
+ "step": 40
155
  },
156
  {
157
  "epoch": 14.0,
158
+ "eval_accuracy": 0.9239130434782609,
159
+ "eval_loss": 0.31263482570648193,
160
+ "eval_runtime": 0.2763,
161
+ "eval_samples_per_second": 332.946,
162
+ "eval_steps_per_second": 10.857,
163
+ "step": 42
 
 
 
 
 
 
 
164
  },
165
  {
166
  "epoch": 15.0,
167
+ "eval_accuracy": 0.9456521739130435,
168
+ "eval_loss": 0.17815206944942474,
169
+ "eval_runtime": 0.3126,
170
+ "eval_samples_per_second": 294.294,
171
+ "eval_steps_per_second": 9.597,
172
+ "step": 45
173
  },
174
  {
175
  "epoch": 16.0,
176
+ "eval_accuracy": 0.8804347826086957,
177
+ "eval_loss": 0.37992385029792786,
178
+ "eval_runtime": 0.2777,
179
+ "eval_samples_per_second": 331.253,
180
+ "eval_steps_per_second": 10.802,
181
+ "step": 48
182
  },
183
  {
184
  "epoch": 16.666666666666668,
185
+ "grad_norm": 8.808466911315918,
186
  "learning_rate": 4.012345679012346e-05,
187
+ "loss": 0.1525,
188
+ "step": 50
189
  },
190
  {
191
  "epoch": 17.0,
192
+ "eval_accuracy": 0.9456521739130435,
193
+ "eval_loss": 0.19111250340938568,
194
+ "eval_runtime": 0.2894,
195
+ "eval_samples_per_second": 317.845,
196
+ "eval_steps_per_second": 10.365,
197
+ "step": 51
198
  },
199
  {
200
  "epoch": 18.0,
201
+ "eval_accuracy": 0.9347826086956522,
202
+ "eval_loss": 0.22915299236774445,
203
+ "eval_runtime": 0.2754,
204
+ "eval_samples_per_second": 334.072,
205
+ "eval_steps_per_second": 10.894,
206
+ "step": 54
 
 
 
 
 
 
 
207
  },
208
  {
209
  "epoch": 19.0,
210
+ "eval_accuracy": 0.9130434782608695,
211
+ "eval_loss": 0.3240966498851776,
212
+ "eval_runtime": 0.29,
213
+ "eval_samples_per_second": 317.233,
214
+ "eval_steps_per_second": 10.345,
215
+ "step": 57
216
  },
217
  {
218
  "epoch": 20.0,
219
+ "grad_norm": 12.596190452575684,
220
  "learning_rate": 3.7037037037037037e-05,
221
+ "loss": 0.1551,
222
+ "step": 60
223
  },
224
  {
225
  "epoch": 20.0,
226
+ "eval_accuracy": 0.9565217391304348,
227
+ "eval_loss": 0.17970529198646545,
228
+ "eval_runtime": 0.2984,
229
+ "eval_samples_per_second": 308.312,
230
+ "eval_steps_per_second": 10.054,
231
+ "step": 60
232
  },
233
  {
234
  "epoch": 21.0,
235
+ "eval_accuracy": 0.9239130434782609,
236
+ "eval_loss": 0.2115555703639984,
237
+ "eval_runtime": 0.2749,
238
+ "eval_samples_per_second": 334.611,
239
+ "eval_steps_per_second": 10.911,
240
+ "step": 63
 
 
 
 
 
 
 
241
  },
242
  {
243
  "epoch": 22.0,
244
+ "eval_accuracy": 0.9347826086956522,
245
+ "eval_loss": 0.2774827480316162,
246
+ "eval_runtime": 0.2726,
247
+ "eval_samples_per_second": 337.516,
248
+ "eval_steps_per_second": 11.006,
249
+ "step": 66
250
  },
251
  {
252
  "epoch": 23.0,
253
+ "eval_accuracy": 0.9239130434782609,
254
+ "eval_loss": 0.28300002217292786,
255
+ "eval_runtime": 0.2723,
256
+ "eval_samples_per_second": 337.81,
257
+ "eval_steps_per_second": 11.016,
258
+ "step": 69
259
  },
260
  {
261
  "epoch": 23.333333333333332,
262
+ "grad_norm": 3.0055344104766846,
263
  "learning_rate": 3.395061728395062e-05,
264
+ "loss": 0.0701,
265
+ "step": 70
266
  },
267
  {
268
  "epoch": 24.0,
269
+ "eval_accuracy": 0.9347826086956522,
270
+ "eval_loss": 0.33308491110801697,
271
+ "eval_runtime": 0.2939,
272
+ "eval_samples_per_second": 313.029,
273
+ "eval_steps_per_second": 10.207,
274
+ "step": 72
 
 
 
 
 
 
 
275
  },
276
  {
277
  "epoch": 25.0,
278
+ "eval_accuracy": 0.9347826086956522,
279
+ "eval_loss": 0.27504873275756836,
280
+ "eval_runtime": 0.2721,
281
+ "eval_samples_per_second": 338.147,
282
+ "eval_steps_per_second": 11.027,
283
+ "step": 75
284
  },
285
  {
286
  "epoch": 26.0,
287
+ "eval_accuracy": 0.9456521739130435,
288
+ "eval_loss": 0.2002851665019989,
289
+ "eval_runtime": 0.3046,
290
+ "eval_samples_per_second": 302.042,
291
+ "eval_steps_per_second": 9.849,
292
+ "step": 78
293
  },
294
  {
295
  "epoch": 26.666666666666668,
296
+ "grad_norm": 3.5597548484802246,
297
  "learning_rate": 3.08641975308642e-05,
298
+ "loss": 0.0733,
299
+ "step": 80
300
  },
301
  {
302
  "epoch": 27.0,
303
+ "eval_accuracy": 0.9456521739130435,
304
+ "eval_loss": 0.2492789775133133,
305
+ "eval_runtime": 0.2789,
306
+ "eval_samples_per_second": 329.913,
307
+ "eval_steps_per_second": 10.758,
308
+ "step": 81
309
  },
310
  {
311
  "epoch": 28.0,
312
+ "eval_accuracy": 0.9456521739130435,
313
+ "eval_loss": 0.2807539105415344,
314
+ "eval_runtime": 0.2756,
315
+ "eval_samples_per_second": 333.87,
316
+ "eval_steps_per_second": 10.887,
317
+ "step": 84
 
 
 
 
 
 
 
318
  },
319
  {
320
  "epoch": 29.0,
321
+ "eval_accuracy": 0.9456521739130435,
322
+ "eval_loss": 0.2663654088973999,
323
+ "eval_runtime": 0.2778,
324
+ "eval_samples_per_second": 331.156,
325
+ "eval_steps_per_second": 10.799,
326
+ "step": 87
327
  },
328
  {
329
  "epoch": 30.0,
330
+ "grad_norm": 2.488931179046631,
331
  "learning_rate": 2.777777777777778e-05,
332
+ "loss": 0.0494,
333
+ "step": 90
334
  },
335
  {
336
  "epoch": 30.0,
337
+ "eval_accuracy": 0.9456521739130435,
338
+ "eval_loss": 0.36214956641197205,
339
+ "eval_runtime": 0.2768,
340
+ "eval_samples_per_second": 332.394,
341
+ "eval_steps_per_second": 10.839,
342
+ "step": 90
343
  },
344
  {
345
  "epoch": 31.0,
346
+ "eval_accuracy": 0.9456521739130435,
347
+ "eval_loss": 0.2520482838153839,
348
+ "eval_runtime": 0.2749,
349
+ "eval_samples_per_second": 334.646,
350
+ "eval_steps_per_second": 10.912,
351
+ "step": 93
 
 
 
 
 
 
 
352
  },
353
  {
354
  "epoch": 32.0,
355
+ "eval_accuracy": 0.9456521739130435,
356
+ "eval_loss": 0.2721933424472809,
357
+ "eval_runtime": 0.2755,
358
+ "eval_samples_per_second": 333.902,
359
+ "eval_steps_per_second": 10.888,
360
+ "step": 96
361
  },
362
  {
363
  "epoch": 33.0,
364
+ "eval_accuracy": 0.9456521739130435,
365
+ "eval_loss": 0.27847108244895935,
366
+ "eval_runtime": 0.277,
367
+ "eval_samples_per_second": 332.097,
368
+ "eval_steps_per_second": 10.829,
369
+ "step": 99
370
  },
371
  {
372
  "epoch": 33.333333333333336,
373
+ "grad_norm": 13.367582321166992,
374
  "learning_rate": 2.4691358024691357e-05,
375
+ "loss": 0.0806,
376
+ "step": 100
377
  },
378
  {
379
  "epoch": 34.0,
380
+ "eval_accuracy": 0.9456521739130435,
381
+ "eval_loss": 0.2530038356781006,
382
+ "eval_runtime": 0.2765,
383
+ "eval_samples_per_second": 332.674,
384
+ "eval_steps_per_second": 10.848,
385
+ "step": 102
 
 
 
 
 
 
 
386
  },
387
  {
388
  "epoch": 35.0,
389
+ "eval_accuracy": 0.9456521739130435,
390
+ "eval_loss": 0.22303055226802826,
391
+ "eval_runtime": 0.2793,
392
+ "eval_samples_per_second": 329.409,
393
+ "eval_steps_per_second": 10.742,
394
+ "step": 105
395
  },
396
  {
397
  "epoch": 36.0,
398
+ "eval_accuracy": 0.9456521739130435,
399
+ "eval_loss": 0.2482990175485611,
400
+ "eval_runtime": 0.2776,
401
+ "eval_samples_per_second": 331.464,
402
+ "eval_steps_per_second": 10.809,
403
+ "step": 108
404
  },
405
  {
406
  "epoch": 36.666666666666664,
407
+ "grad_norm": 7.69065523147583,
408
  "learning_rate": 2.1604938271604937e-05,
409
+ "loss": 0.0324,
410
+ "step": 110
411
  },
412
  {
413
  "epoch": 37.0,
414
+ "eval_accuracy": 0.9456521739130435,
415
+ "eval_loss": 0.29446083307266235,
416
+ "eval_runtime": 0.2954,
417
+ "eval_samples_per_second": 311.393,
418
+ "eval_steps_per_second": 10.154,
419
+ "step": 111
420
  },
421
  {
422
  "epoch": 38.0,
423
+ "eval_accuracy": 0.9456521739130435,
424
+ "eval_loss": 0.3244095742702484,
425
+ "eval_runtime": 0.2815,
426
+ "eval_samples_per_second": 326.826,
427
+ "eval_steps_per_second": 10.657,
428
+ "step": 114
 
 
 
 
 
 
 
429
  },
430
  {
431
  "epoch": 39.0,
432
+ "eval_accuracy": 0.9456521739130435,
433
+ "eval_loss": 0.3302266597747803,
434
+ "eval_runtime": 0.2668,
435
+ "eval_samples_per_second": 344.764,
436
+ "eval_steps_per_second": 11.242,
437
+ "step": 117
438
  },
439
  {
440
  "epoch": 40.0,
441
+ "grad_norm": 8.54801082611084,
442
  "learning_rate": 1.8518518518518518e-05,
443
+ "loss": 0.0435,
444
+ "step": 120
445
  },
446
  {
447
  "epoch": 40.0,
448
+ "eval_accuracy": 0.9456521739130435,
449
+ "eval_loss": 0.331495463848114,
450
+ "eval_runtime": 0.275,
451
+ "eval_samples_per_second": 334.557,
452
+ "eval_steps_per_second": 10.909,
453
+ "step": 120
454
  },
455
  {
456
  "epoch": 41.0,
457
+ "eval_accuracy": 0.9347826086956522,
458
+ "eval_loss": 0.303882896900177,
459
+ "eval_runtime": 0.2881,
460
+ "eval_samples_per_second": 319.318,
461
+ "eval_steps_per_second": 10.413,
462
+ "step": 123
 
 
 
 
 
 
 
463
  },
464
  {
465
  "epoch": 42.0,
466
+ "eval_accuracy": 0.9456521739130435,
467
+ "eval_loss": 0.36054134368896484,
468
+ "eval_runtime": 0.2765,
469
+ "eval_samples_per_second": 332.764,
470
+ "eval_steps_per_second": 10.851,
471
+ "step": 126
472
  },
473
  {
474
  "epoch": 43.0,
475
+ "eval_accuracy": 0.9347826086956522,
476
+ "eval_loss": 0.3643103539943695,
477
+ "eval_runtime": 0.2772,
478
+ "eval_samples_per_second": 331.854,
479
+ "eval_steps_per_second": 10.821,
480
+ "step": 129
481
  },
482
  {
483
  "epoch": 43.333333333333336,
484
+ "grad_norm": 5.6320648193359375,
485
  "learning_rate": 1.54320987654321e-05,
486
+ "loss": 0.0325,
487
+ "step": 130
488
  },
489
  {
490
  "epoch": 44.0,
491
+ "eval_accuracy": 0.9456521739130435,
492
+ "eval_loss": 0.34682697057724,
493
+ "eval_runtime": 0.2993,
494
+ "eval_samples_per_second": 307.404,
495
+ "eval_steps_per_second": 10.024,
496
+ "step": 132
 
 
 
 
 
 
 
497
  },
498
  {
499
  "epoch": 45.0,
500
+ "eval_accuracy": 0.9347826086956522,
501
+ "eval_loss": 0.33276745676994324,
502
+ "eval_runtime": 0.2866,
503
+ "eval_samples_per_second": 321.025,
504
+ "eval_steps_per_second": 10.468,
505
+ "step": 135
506
  },
507
  {
508
  "epoch": 46.0,
509
+ "eval_accuracy": 0.9347826086956522,
510
+ "eval_loss": 0.3260520398616791,
511
+ "eval_runtime": 0.2798,
512
+ "eval_samples_per_second": 328.753,
513
+ "eval_steps_per_second": 10.72,
514
+ "step": 138
515
  },
516
  {
517
  "epoch": 46.666666666666664,
518
+ "grad_norm": 2.9780068397521973,
519
  "learning_rate": 1.2345679012345678e-05,
520
+ "loss": 0.0541,
521
+ "step": 140
522
  },
523
  {
524
  "epoch": 47.0,
525
+ "eval_accuracy": 0.9347826086956522,
526
+ "eval_loss": 0.34095829725265503,
527
+ "eval_runtime": 0.2824,
528
+ "eval_samples_per_second": 325.795,
529
+ "eval_steps_per_second": 10.624,
530
+ "step": 141
531
  },
532
  {
533
  "epoch": 48.0,
534
+ "eval_accuracy": 0.9347826086956522,
535
+ "eval_loss": 0.3532644212245941,
536
+ "eval_runtime": 0.2895,
537
+ "eval_samples_per_second": 317.751,
538
+ "eval_steps_per_second": 10.361,
539
+ "step": 144
 
 
 
 
 
 
 
540
  },
541
  {
542
  "epoch": 49.0,
543
+ "eval_accuracy": 0.9347826086956522,
544
+ "eval_loss": 0.3564309775829315,
545
+ "eval_runtime": 0.3048,
546
+ "eval_samples_per_second": 301.792,
547
+ "eval_steps_per_second": 9.841,
548
+ "step": 147
549
  },
550
  {
551
  "epoch": 50.0,
552
+ "grad_norm": 3.5734236240386963,
553
  "learning_rate": 9.259259259259259e-06,
554
+ "loss": 0.022,
555
+ "step": 150
556
  },
557
  {
558
  "epoch": 50.0,
559
+ "eval_accuracy": 0.9347826086956522,
560
+ "eval_loss": 0.3557578921318054,
561
+ "eval_runtime": 0.2742,
562
+ "eval_samples_per_second": 335.507,
563
+ "eval_steps_per_second": 10.94,
564
+ "step": 150
565
  },
566
  {
567
  "epoch": 51.0,
568
+ "eval_accuracy": 0.9347826086956522,
569
+ "eval_loss": 0.34129852056503296,
570
+ "eval_runtime": 0.311,
571
+ "eval_samples_per_second": 295.815,
572
+ "eval_steps_per_second": 9.646,
573
+ "step": 153
 
 
 
 
 
 
 
574
  },
575
  {
576
  "epoch": 52.0,
577
+ "eval_accuracy": 0.9347826086956522,
578
+ "eval_loss": 0.33716654777526855,
579
+ "eval_runtime": 0.2804,
580
+ "eval_samples_per_second": 328.129,
581
+ "eval_steps_per_second": 10.7,
582
+ "step": 156
583
  },
584
  {
585
  "epoch": 53.0,
586
+ "eval_accuracy": 0.9347826086956522,
587
+ "eval_loss": 0.33803921937942505,
588
+ "eval_runtime": 0.301,
589
+ "eval_samples_per_second": 305.697,
590
+ "eval_steps_per_second": 9.968,
591
+ "step": 159
592
  },
593
  {
594
  "epoch": 53.333333333333336,
595
+ "grad_norm": 8.637144088745117,
596
  "learning_rate": 6.172839506172839e-06,
597
+ "loss": 0.0226,
598
+ "step": 160
599
  },
600
  {
601
  "epoch": 54.0,
602
+ "eval_accuracy": 0.9456521739130435,
603
+ "eval_loss": 0.32888463139533997,
604
+ "eval_runtime": 0.2786,
605
+ "eval_samples_per_second": 330.184,
606
+ "eval_steps_per_second": 10.767,
607
+ "step": 162
 
 
 
 
 
 
 
608
  },
609
  {
610
  "epoch": 55.0,
611
+ "eval_accuracy": 0.9347826086956522,
612
+ "eval_loss": 0.3256872296333313,
613
+ "eval_runtime": 0.2749,
614
+ "eval_samples_per_second": 334.655,
615
+ "eval_steps_per_second": 10.913,
616
+ "step": 165
617
  },
618
  {
619
  "epoch": 56.0,
620
+ "eval_accuracy": 0.9347826086956522,
621
+ "eval_loss": 0.32627877593040466,
622
+ "eval_runtime": 0.3023,
623
+ "eval_samples_per_second": 304.338,
624
+ "eval_steps_per_second": 9.924,
625
+ "step": 168
626
  },
627
  {
628
  "epoch": 56.666666666666664,
629
+ "grad_norm": 4.384215831756592,
630
  "learning_rate": 3.0864197530864196e-06,
631
+ "loss": 0.0358,
632
+ "step": 170
633
  },
634
  {
635
  "epoch": 57.0,
636
+ "eval_accuracy": 0.9347826086956522,
637
+ "eval_loss": 0.3316076099872589,
638
+ "eval_runtime": 0.3111,
639
+ "eval_samples_per_second": 295.734,
640
+ "eval_steps_per_second": 9.644,
641
+ "step": 171
642
  },
643
  {
644
  "epoch": 58.0,
645
+ "eval_accuracy": 0.9456521739130435,
646
+ "eval_loss": 0.3367496728897095,
647
+ "eval_runtime": 0.2975,
648
+ "eval_samples_per_second": 309.229,
649
+ "eval_steps_per_second": 10.084,
650
+ "step": 174
 
 
 
 
 
 
 
651
  },
652
  {
653
  "epoch": 59.0,
654
+ "eval_accuracy": 0.9456521739130435,
655
+ "eval_loss": 0.33811014890670776,
656
+ "eval_runtime": 0.2965,
657
+ "eval_samples_per_second": 310.296,
658
+ "eval_steps_per_second": 10.118,
659
+ "step": 177
660
  },
661
  {
662
  "epoch": 60.0,
663
+ "grad_norm": 6.692429065704346,
664
  "learning_rate": 0.0,
665
+ "loss": 0.0225,
666
+ "step": 180
667
  },
668
  {
669
  "epoch": 60.0,
670
+ "eval_accuracy": 0.9456521739130435,
671
+ "eval_loss": 0.33829161524772644,
672
+ "eval_runtime": 0.2793,
673
+ "eval_samples_per_second": 329.443,
674
+ "eval_steps_per_second": 10.743,
675
+ "step": 180
676
  },
677
  {
678
  "epoch": 60.0,
679
+ "step": 180,
680
+ "total_flos": 4.3676735454019584e+17,
681
+ "train_loss": 0.10428427308797836,
682
+ "train_runtime": 199.9604,
683
+ "train_samples_per_second": 111.622,
684
+ "train_steps_per_second": 0.9
685
  }
686
  ],
687
  "logging_steps": 10,
688
+ "max_steps": 180,
689
  "num_input_tokens_seen": 0,
690
  "num_train_epochs": 60,
691
  "save_steps": 500,
 
701
  "attributes": {}
702
  }
703
  },
704
+ "total_flos": 4.3676735454019584e+17,
705
  "train_batch_size": 32,
706
  "trial_name": null,
707
  "trial_params": null