Melo1512 commited on
Commit
7041518
·
verified ·
1 Parent(s): 3637a0e

End of training

Browse files
README.md CHANGED
@@ -33,7 +33,7 @@ should probably proofread and complete it, then remove this comment. -->
33
 
34
  This model is a fine-tuned version of [facebook/vit-msn-small](https://huggingface.co/facebook/vit-msn-small) on the imagefolder dataset.
35
  It achieves the following results on the evaluation set:
36
- - Loss: 0.0345
37
  - Accuracy: 0.9840
38
 
39
  ## Model description
 
33
 
34
  This model is a fine-tuned version of [facebook/vit-msn-small](https://huggingface.co/facebook/vit-msn-small) on the imagefolder dataset.
35
  It achieves the following results on the evaluation set:
36
+ - Loss: 0.0718
37
  - Accuracy: 0.9840
38
 
39
  ## Model description
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 60.0,
3
+ "eval_accuracy": 0.9840425531914894,
4
+ "eval_loss": 0.0718478411436081,
5
+ "eval_runtime": 0.629,
6
+ "eval_samples_per_second": 298.878,
7
+ "eval_steps_per_second": 9.539,
8
+ "total_flos": 8.758829206639411e+17,
9
+ "train_loss": 0.09168570356236563,
10
+ "train_runtime": 380.8373,
11
+ "train_samples_per_second": 117.531,
12
+ "train_steps_per_second": 0.945
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 60.0,
3
+ "eval_accuracy": 0.9840425531914894,
4
+ "eval_loss": 0.0718478411436081,
5
+ "eval_runtime": 0.629,
6
+ "eval_samples_per_second": 298.878,
7
+ "eval_steps_per_second": 9.539
8
+ }
runs/Dec11_14-08-23_ae1aa77fe319/events.out.tfevents.1733926534.ae1aa77fe319.236.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:234c971ea03b1d0bb3a5c3a399839cfe6bb57cc13b44dd2f3fc90923be2e0029
3
+ size 411
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 60.0,
3
+ "total_flos": 8.758829206639411e+17,
4
+ "train_loss": 0.09168570356236563,
5
+ "train_runtime": 380.8373,
6
+ "train_samples_per_second": 117.531,
7
+ "train_steps_per_second": 0.945
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,834 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9840425531914894,
3
+ "best_model_checkpoint": "vit-msn-small-wbc-blur-detector/checkpoint-72",
4
+ "epoch": 60.0,
5
+ "eval_steps": 500,
6
+ "global_step": 360,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.7340425531914894,
14
+ "eval_loss": 0.5712631344795227,
15
+ "eval_runtime": 0.5622,
16
+ "eval_samples_per_second": 334.415,
17
+ "eval_steps_per_second": 10.673,
18
+ "step": 6
19
+ },
20
+ {
21
+ "epoch": 1.6666666666666665,
22
+ "grad_norm": 10.44019603729248,
23
+ "learning_rate": 1.388888888888889e-05,
24
+ "loss": 0.6051,
25
+ "step": 10
26
+ },
27
+ {
28
+ "epoch": 2.0,
29
+ "eval_accuracy": 0.7659574468085106,
30
+ "eval_loss": 0.4693465232849121,
31
+ "eval_runtime": 0.5811,
32
+ "eval_samples_per_second": 323.506,
33
+ "eval_steps_per_second": 10.325,
34
+ "step": 12
35
+ },
36
+ {
37
+ "epoch": 3.0,
38
+ "eval_accuracy": 0.9414893617021277,
39
+ "eval_loss": 0.1643817126750946,
40
+ "eval_runtime": 0.6,
41
+ "eval_samples_per_second": 313.329,
42
+ "eval_steps_per_second": 10.0,
43
+ "step": 18
44
+ },
45
+ {
46
+ "epoch": 3.3333333333333335,
47
+ "grad_norm": 8.123635292053223,
48
+ "learning_rate": 2.777777777777778e-05,
49
+ "loss": 0.2544,
50
+ "step": 20
51
+ },
52
+ {
53
+ "epoch": 4.0,
54
+ "eval_accuracy": 0.9574468085106383,
55
+ "eval_loss": 0.08451231569051743,
56
+ "eval_runtime": 0.5515,
57
+ "eval_samples_per_second": 340.904,
58
+ "eval_steps_per_second": 10.88,
59
+ "step": 24
60
+ },
61
+ {
62
+ "epoch": 5.0,
63
+ "grad_norm": 37.864131927490234,
64
+ "learning_rate": 4.166666666666667e-05,
65
+ "loss": 0.1896,
66
+ "step": 30
67
+ },
68
+ {
69
+ "epoch": 5.0,
70
+ "eval_accuracy": 0.973404255319149,
71
+ "eval_loss": 0.09721191227436066,
72
+ "eval_runtime": 0.5728,
73
+ "eval_samples_per_second": 328.217,
74
+ "eval_steps_per_second": 10.475,
75
+ "step": 30
76
+ },
77
+ {
78
+ "epoch": 6.0,
79
+ "eval_accuracy": 0.9680851063829787,
80
+ "eval_loss": 0.1011401042342186,
81
+ "eval_runtime": 0.5505,
82
+ "eval_samples_per_second": 341.503,
83
+ "eval_steps_per_second": 10.899,
84
+ "step": 36
85
+ },
86
+ {
87
+ "epoch": 6.666666666666667,
88
+ "grad_norm": 11.042017936706543,
89
+ "learning_rate": 4.938271604938271e-05,
90
+ "loss": 0.2534,
91
+ "step": 40
92
+ },
93
+ {
94
+ "epoch": 7.0,
95
+ "eval_accuracy": 0.9787234042553191,
96
+ "eval_loss": 0.08934129774570465,
97
+ "eval_runtime": 0.5635,
98
+ "eval_samples_per_second": 333.602,
99
+ "eval_steps_per_second": 10.647,
100
+ "step": 42
101
+ },
102
+ {
103
+ "epoch": 8.0,
104
+ "eval_accuracy": 0.9627659574468085,
105
+ "eval_loss": 0.08766720443964005,
106
+ "eval_runtime": 0.5541,
107
+ "eval_samples_per_second": 339.311,
108
+ "eval_steps_per_second": 10.829,
109
+ "step": 48
110
+ },
111
+ {
112
+ "epoch": 8.333333333333334,
113
+ "grad_norm": 8.848210334777832,
114
+ "learning_rate": 4.783950617283951e-05,
115
+ "loss": 0.1535,
116
+ "step": 50
117
+ },
118
+ {
119
+ "epoch": 9.0,
120
+ "eval_accuracy": 0.973404255319149,
121
+ "eval_loss": 0.10303648561239243,
122
+ "eval_runtime": 0.5537,
123
+ "eval_samples_per_second": 339.542,
124
+ "eval_steps_per_second": 10.836,
125
+ "step": 54
126
+ },
127
+ {
128
+ "epoch": 10.0,
129
+ "grad_norm": 8.038219451904297,
130
+ "learning_rate": 4.62962962962963e-05,
131
+ "loss": 0.1277,
132
+ "step": 60
133
+ },
134
+ {
135
+ "epoch": 10.0,
136
+ "eval_accuracy": 0.973404255319149,
137
+ "eval_loss": 0.07765703648328781,
138
+ "eval_runtime": 0.5522,
139
+ "eval_samples_per_second": 340.451,
140
+ "eval_steps_per_second": 10.865,
141
+ "step": 60
142
+ },
143
+ {
144
+ "epoch": 11.0,
145
+ "eval_accuracy": 0.9680851063829787,
146
+ "eval_loss": 0.08228272944688797,
147
+ "eval_runtime": 0.5522,
148
+ "eval_samples_per_second": 340.441,
149
+ "eval_steps_per_second": 10.865,
150
+ "step": 66
151
+ },
152
+ {
153
+ "epoch": 11.666666666666666,
154
+ "grad_norm": 11.488448143005371,
155
+ "learning_rate": 4.4753086419753084e-05,
156
+ "loss": 0.1147,
157
+ "step": 70
158
+ },
159
+ {
160
+ "epoch": 12.0,
161
+ "eval_accuracy": 0.9840425531914894,
162
+ "eval_loss": 0.0718478411436081,
163
+ "eval_runtime": 0.5684,
164
+ "eval_samples_per_second": 330.757,
165
+ "eval_steps_per_second": 10.556,
166
+ "step": 72
167
+ },
168
+ {
169
+ "epoch": 13.0,
170
+ "eval_accuracy": 0.9521276595744681,
171
+ "eval_loss": 0.09054908156394958,
172
+ "eval_runtime": 0.5801,
173
+ "eval_samples_per_second": 324.086,
174
+ "eval_steps_per_second": 10.343,
175
+ "step": 78
176
+ },
177
+ {
178
+ "epoch": 13.333333333333334,
179
+ "grad_norm": 10.379966735839844,
180
+ "learning_rate": 4.3209876543209875e-05,
181
+ "loss": 0.112,
182
+ "step": 80
183
+ },
184
+ {
185
+ "epoch": 14.0,
186
+ "eval_accuracy": 0.9202127659574468,
187
+ "eval_loss": 0.21216550469398499,
188
+ "eval_runtime": 0.5499,
189
+ "eval_samples_per_second": 341.878,
190
+ "eval_steps_per_second": 10.911,
191
+ "step": 84
192
+ },
193
+ {
194
+ "epoch": 15.0,
195
+ "grad_norm": 6.255307674407959,
196
+ "learning_rate": 4.166666666666667e-05,
197
+ "loss": 0.1115,
198
+ "step": 90
199
+ },
200
+ {
201
+ "epoch": 15.0,
202
+ "eval_accuracy": 0.9414893617021277,
203
+ "eval_loss": 0.1408630907535553,
204
+ "eval_runtime": 0.6004,
205
+ "eval_samples_per_second": 313.12,
206
+ "eval_steps_per_second": 9.993,
207
+ "step": 90
208
+ },
209
+ {
210
+ "epoch": 16.0,
211
+ "eval_accuracy": 0.973404255319149,
212
+ "eval_loss": 0.08175182342529297,
213
+ "eval_runtime": 0.5516,
214
+ "eval_samples_per_second": 340.81,
215
+ "eval_steps_per_second": 10.877,
216
+ "step": 96
217
+ },
218
+ {
219
+ "epoch": 16.666666666666668,
220
+ "grad_norm": 32.63232421875,
221
+ "learning_rate": 4.012345679012346e-05,
222
+ "loss": 0.107,
223
+ "step": 100
224
+ },
225
+ {
226
+ "epoch": 17.0,
227
+ "eval_accuracy": 0.973404255319149,
228
+ "eval_loss": 0.059448737651109695,
229
+ "eval_runtime": 0.5763,
230
+ "eval_samples_per_second": 326.236,
231
+ "eval_steps_per_second": 10.412,
232
+ "step": 102
233
+ },
234
+ {
235
+ "epoch": 18.0,
236
+ "eval_accuracy": 0.9308510638297872,
237
+ "eval_loss": 0.16710200905799866,
238
+ "eval_runtime": 0.5588,
239
+ "eval_samples_per_second": 336.405,
240
+ "eval_steps_per_second": 10.736,
241
+ "step": 108
242
+ },
243
+ {
244
+ "epoch": 18.333333333333332,
245
+ "grad_norm": 3.7432363033294678,
246
+ "learning_rate": 3.8580246913580246e-05,
247
+ "loss": 0.0941,
248
+ "step": 110
249
+ },
250
+ {
251
+ "epoch": 19.0,
252
+ "eval_accuracy": 0.9308510638297872,
253
+ "eval_loss": 0.140838161110878,
254
+ "eval_runtime": 0.5919,
255
+ "eval_samples_per_second": 317.613,
256
+ "eval_steps_per_second": 10.137,
257
+ "step": 114
258
+ },
259
+ {
260
+ "epoch": 20.0,
261
+ "grad_norm": 7.966294288635254,
262
+ "learning_rate": 3.7037037037037037e-05,
263
+ "loss": 0.0629,
264
+ "step": 120
265
+ },
266
+ {
267
+ "epoch": 20.0,
268
+ "eval_accuracy": 0.9414893617021277,
269
+ "eval_loss": 0.13265569508075714,
270
+ "eval_runtime": 0.5698,
271
+ "eval_samples_per_second": 329.925,
272
+ "eval_steps_per_second": 10.53,
273
+ "step": 120
274
+ },
275
+ {
276
+ "epoch": 21.0,
277
+ "eval_accuracy": 0.9680851063829787,
278
+ "eval_loss": 0.08957220613956451,
279
+ "eval_runtime": 0.5645,
280
+ "eval_samples_per_second": 333.023,
281
+ "eval_steps_per_second": 10.628,
282
+ "step": 126
283
+ },
284
+ {
285
+ "epoch": 21.666666666666668,
286
+ "grad_norm": 6.942417144775391,
287
+ "learning_rate": 3.5493827160493834e-05,
288
+ "loss": 0.081,
289
+ "step": 130
290
+ },
291
+ {
292
+ "epoch": 22.0,
293
+ "eval_accuracy": 0.9574468085106383,
294
+ "eval_loss": 0.09119919687509537,
295
+ "eval_runtime": 0.5916,
296
+ "eval_samples_per_second": 317.756,
297
+ "eval_steps_per_second": 10.141,
298
+ "step": 132
299
+ },
300
+ {
301
+ "epoch": 23.0,
302
+ "eval_accuracy": 0.9521276595744681,
303
+ "eval_loss": 0.10360775887966156,
304
+ "eval_runtime": 0.5817,
305
+ "eval_samples_per_second": 323.216,
306
+ "eval_steps_per_second": 10.315,
307
+ "step": 138
308
+ },
309
+ {
310
+ "epoch": 23.333333333333332,
311
+ "grad_norm": 3.9243404865264893,
312
+ "learning_rate": 3.395061728395062e-05,
313
+ "loss": 0.0706,
314
+ "step": 140
315
+ },
316
+ {
317
+ "epoch": 24.0,
318
+ "eval_accuracy": 0.9521276595744681,
319
+ "eval_loss": 0.07820819318294525,
320
+ "eval_runtime": 0.5573,
321
+ "eval_samples_per_second": 337.319,
322
+ "eval_steps_per_second": 10.766,
323
+ "step": 144
324
+ },
325
+ {
326
+ "epoch": 25.0,
327
+ "grad_norm": 5.995626449584961,
328
+ "learning_rate": 3.240740740740741e-05,
329
+ "loss": 0.0728,
330
+ "step": 150
331
+ },
332
+ {
333
+ "epoch": 25.0,
334
+ "eval_accuracy": 0.9627659574468085,
335
+ "eval_loss": 0.06730703264474869,
336
+ "eval_runtime": 0.564,
337
+ "eval_samples_per_second": 333.307,
338
+ "eval_steps_per_second": 10.637,
339
+ "step": 150
340
+ },
341
+ {
342
+ "epoch": 26.0,
343
+ "eval_accuracy": 0.9627659574468085,
344
+ "eval_loss": 0.13579747080802917,
345
+ "eval_runtime": 0.5839,
346
+ "eval_samples_per_second": 321.994,
347
+ "eval_steps_per_second": 10.276,
348
+ "step": 156
349
+ },
350
+ {
351
+ "epoch": 26.666666666666668,
352
+ "grad_norm": 3.335559606552124,
353
+ "learning_rate": 3.08641975308642e-05,
354
+ "loss": 0.0535,
355
+ "step": 160
356
+ },
357
+ {
358
+ "epoch": 27.0,
359
+ "eval_accuracy": 0.9574468085106383,
360
+ "eval_loss": 0.09251847118139267,
361
+ "eval_runtime": 0.5476,
362
+ "eval_samples_per_second": 343.33,
363
+ "eval_steps_per_second": 10.957,
364
+ "step": 162
365
+ },
366
+ {
367
+ "epoch": 28.0,
368
+ "eval_accuracy": 0.973404255319149,
369
+ "eval_loss": 0.09499593824148178,
370
+ "eval_runtime": 0.557,
371
+ "eval_samples_per_second": 337.516,
372
+ "eval_steps_per_second": 10.772,
373
+ "step": 168
374
+ },
375
+ {
376
+ "epoch": 28.333333333333332,
377
+ "grad_norm": 6.770501136779785,
378
+ "learning_rate": 2.9320987654320992e-05,
379
+ "loss": 0.058,
380
+ "step": 170
381
+ },
382
+ {
383
+ "epoch": 29.0,
384
+ "eval_accuracy": 0.9574468085106383,
385
+ "eval_loss": 0.09998849779367447,
386
+ "eval_runtime": 0.5597,
387
+ "eval_samples_per_second": 335.871,
388
+ "eval_steps_per_second": 10.719,
389
+ "step": 174
390
+ },
391
+ {
392
+ "epoch": 30.0,
393
+ "grad_norm": 19.26597023010254,
394
+ "learning_rate": 2.777777777777778e-05,
395
+ "loss": 0.0662,
396
+ "step": 180
397
+ },
398
+ {
399
+ "epoch": 30.0,
400
+ "eval_accuracy": 0.9414893617021277,
401
+ "eval_loss": 0.251209557056427,
402
+ "eval_runtime": 0.5694,
403
+ "eval_samples_per_second": 330.181,
404
+ "eval_steps_per_second": 10.538,
405
+ "step": 180
406
+ },
407
+ {
408
+ "epoch": 31.0,
409
+ "eval_accuracy": 0.9680851063829787,
410
+ "eval_loss": 0.06487108021974564,
411
+ "eval_runtime": 0.5919,
412
+ "eval_samples_per_second": 317.599,
413
+ "eval_steps_per_second": 10.136,
414
+ "step": 186
415
+ },
416
+ {
417
+ "epoch": 31.666666666666668,
418
+ "grad_norm": 1.7159186601638794,
419
+ "learning_rate": 2.623456790123457e-05,
420
+ "loss": 0.0564,
421
+ "step": 190
422
+ },
423
+ {
424
+ "epoch": 32.0,
425
+ "eval_accuracy": 0.9521276595744681,
426
+ "eval_loss": 0.13227558135986328,
427
+ "eval_runtime": 0.5679,
428
+ "eval_samples_per_second": 331.05,
429
+ "eval_steps_per_second": 10.565,
430
+ "step": 192
431
+ },
432
+ {
433
+ "epoch": 33.0,
434
+ "eval_accuracy": 0.9680851063829787,
435
+ "eval_loss": 0.06702585518360138,
436
+ "eval_runtime": 0.591,
437
+ "eval_samples_per_second": 318.119,
438
+ "eval_steps_per_second": 10.153,
439
+ "step": 198
440
+ },
441
+ {
442
+ "epoch": 33.333333333333336,
443
+ "grad_norm": 0.7996916770935059,
444
+ "learning_rate": 2.4691358024691357e-05,
445
+ "loss": 0.0591,
446
+ "step": 200
447
+ },
448
+ {
449
+ "epoch": 34.0,
450
+ "eval_accuracy": 0.9627659574468085,
451
+ "eval_loss": 0.11914665251970291,
452
+ "eval_runtime": 0.5866,
453
+ "eval_samples_per_second": 320.511,
454
+ "eval_steps_per_second": 10.229,
455
+ "step": 204
456
+ },
457
+ {
458
+ "epoch": 35.0,
459
+ "grad_norm": 3.493698835372925,
460
+ "learning_rate": 2.314814814814815e-05,
461
+ "loss": 0.0353,
462
+ "step": 210
463
+ },
464
+ {
465
+ "epoch": 35.0,
466
+ "eval_accuracy": 0.9680851063829787,
467
+ "eval_loss": 0.14858229458332062,
468
+ "eval_runtime": 0.5761,
469
+ "eval_samples_per_second": 326.357,
470
+ "eval_steps_per_second": 10.416,
471
+ "step": 210
472
+ },
473
+ {
474
+ "epoch": 36.0,
475
+ "eval_accuracy": 0.973404255319149,
476
+ "eval_loss": 0.08810416609048843,
477
+ "eval_runtime": 0.5699,
478
+ "eval_samples_per_second": 329.896,
479
+ "eval_steps_per_second": 10.529,
480
+ "step": 216
481
+ },
482
+ {
483
+ "epoch": 36.666666666666664,
484
+ "grad_norm": 12.2665376663208,
485
+ "learning_rate": 2.1604938271604937e-05,
486
+ "loss": 0.0523,
487
+ "step": 220
488
+ },
489
+ {
490
+ "epoch": 37.0,
491
+ "eval_accuracy": 0.9680851063829787,
492
+ "eval_loss": 0.05061895400285721,
493
+ "eval_runtime": 0.5644,
494
+ "eval_samples_per_second": 333.114,
495
+ "eval_steps_per_second": 10.631,
496
+ "step": 222
497
+ },
498
+ {
499
+ "epoch": 38.0,
500
+ "eval_accuracy": 0.9627659574468085,
501
+ "eval_loss": 0.10941923409700394,
502
+ "eval_runtime": 0.5579,
503
+ "eval_samples_per_second": 336.995,
504
+ "eval_steps_per_second": 10.755,
505
+ "step": 228
506
+ },
507
+ {
508
+ "epoch": 38.333333333333336,
509
+ "grad_norm": 2.317680597305298,
510
+ "learning_rate": 2.006172839506173e-05,
511
+ "loss": 0.0471,
512
+ "step": 230
513
+ },
514
+ {
515
+ "epoch": 39.0,
516
+ "eval_accuracy": 0.973404255319149,
517
+ "eval_loss": 0.08581092208623886,
518
+ "eval_runtime": 0.5654,
519
+ "eval_samples_per_second": 332.53,
520
+ "eval_steps_per_second": 10.613,
521
+ "step": 234
522
+ },
523
+ {
524
+ "epoch": 40.0,
525
+ "grad_norm": 11.043506622314453,
526
+ "learning_rate": 1.8518518518518518e-05,
527
+ "loss": 0.0671,
528
+ "step": 240
529
+ },
530
+ {
531
+ "epoch": 40.0,
532
+ "eval_accuracy": 0.9574468085106383,
533
+ "eval_loss": 0.17757754027843475,
534
+ "eval_runtime": 0.5949,
535
+ "eval_samples_per_second": 316.026,
536
+ "eval_steps_per_second": 10.086,
537
+ "step": 240
538
+ },
539
+ {
540
+ "epoch": 41.0,
541
+ "eval_accuracy": 0.973404255319149,
542
+ "eval_loss": 0.09706045687198639,
543
+ "eval_runtime": 0.5628,
544
+ "eval_samples_per_second": 334.044,
545
+ "eval_steps_per_second": 10.661,
546
+ "step": 246
547
+ },
548
+ {
549
+ "epoch": 41.666666666666664,
550
+ "grad_norm": 2.3154749870300293,
551
+ "learning_rate": 1.697530864197531e-05,
552
+ "loss": 0.0459,
553
+ "step": 250
554
+ },
555
+ {
556
+ "epoch": 42.0,
557
+ "eval_accuracy": 0.9680851063829787,
558
+ "eval_loss": 0.04420238360762596,
559
+ "eval_runtime": 0.572,
560
+ "eval_samples_per_second": 328.671,
561
+ "eval_steps_per_second": 10.489,
562
+ "step": 252
563
+ },
564
+ {
565
+ "epoch": 43.0,
566
+ "eval_accuracy": 0.9787234042553191,
567
+ "eval_loss": 0.044726960361003876,
568
+ "eval_runtime": 0.5631,
569
+ "eval_samples_per_second": 333.838,
570
+ "eval_steps_per_second": 10.654,
571
+ "step": 258
572
+ },
573
+ {
574
+ "epoch": 43.333333333333336,
575
+ "grad_norm": 3.237978935241699,
576
+ "learning_rate": 1.54320987654321e-05,
577
+ "loss": 0.0296,
578
+ "step": 260
579
+ },
580
+ {
581
+ "epoch": 44.0,
582
+ "eval_accuracy": 0.9787234042553191,
583
+ "eval_loss": 0.06447551399469376,
584
+ "eval_runtime": 0.5649,
585
+ "eval_samples_per_second": 332.802,
586
+ "eval_steps_per_second": 10.621,
587
+ "step": 264
588
+ },
589
+ {
590
+ "epoch": 45.0,
591
+ "grad_norm": 15.342556953430176,
592
+ "learning_rate": 1.388888888888889e-05,
593
+ "loss": 0.0414,
594
+ "step": 270
595
+ },
596
+ {
597
+ "epoch": 45.0,
598
+ "eval_accuracy": 0.973404255319149,
599
+ "eval_loss": 0.07515815645456314,
600
+ "eval_runtime": 0.5591,
601
+ "eval_samples_per_second": 336.232,
602
+ "eval_steps_per_second": 10.731,
603
+ "step": 270
604
+ },
605
+ {
606
+ "epoch": 46.0,
607
+ "eval_accuracy": 0.9574468085106383,
608
+ "eval_loss": 0.14192818105220795,
609
+ "eval_runtime": 0.5561,
610
+ "eval_samples_per_second": 338.082,
611
+ "eval_steps_per_second": 10.79,
612
+ "step": 276
613
+ },
614
+ {
615
+ "epoch": 46.666666666666664,
616
+ "grad_norm": 3.4594690799713135,
617
+ "learning_rate": 1.2345679012345678e-05,
618
+ "loss": 0.0352,
619
+ "step": 280
620
+ },
621
+ {
622
+ "epoch": 47.0,
623
+ "eval_accuracy": 0.9840425531914894,
624
+ "eval_loss": 0.04251508414745331,
625
+ "eval_runtime": 0.5605,
626
+ "eval_samples_per_second": 335.419,
627
+ "eval_steps_per_second": 10.705,
628
+ "step": 282
629
+ },
630
+ {
631
+ "epoch": 48.0,
632
+ "eval_accuracy": 0.9787234042553191,
633
+ "eval_loss": 0.06410356611013412,
634
+ "eval_runtime": 0.5687,
635
+ "eval_samples_per_second": 330.571,
636
+ "eval_steps_per_second": 10.55,
637
+ "step": 288
638
+ },
639
+ {
640
+ "epoch": 48.333333333333336,
641
+ "grad_norm": 2.795865774154663,
642
+ "learning_rate": 1.0802469135802469e-05,
643
+ "loss": 0.0342,
644
+ "step": 290
645
+ },
646
+ {
647
+ "epoch": 49.0,
648
+ "eval_accuracy": 0.9680851063829787,
649
+ "eval_loss": 0.11264320462942123,
650
+ "eval_runtime": 0.5453,
651
+ "eval_samples_per_second": 344.776,
652
+ "eval_steps_per_second": 11.003,
653
+ "step": 294
654
+ },
655
+ {
656
+ "epoch": 50.0,
657
+ "grad_norm": 4.04909086227417,
658
+ "learning_rate": 9.259259259259259e-06,
659
+ "loss": 0.0277,
660
+ "step": 300
661
+ },
662
+ {
663
+ "epoch": 50.0,
664
+ "eval_accuracy": 0.9840425531914894,
665
+ "eval_loss": 0.054132696241140366,
666
+ "eval_runtime": 0.5421,
667
+ "eval_samples_per_second": 346.78,
668
+ "eval_steps_per_second": 11.067,
669
+ "step": 300
670
+ },
671
+ {
672
+ "epoch": 51.0,
673
+ "eval_accuracy": 0.9787234042553191,
674
+ "eval_loss": 0.07527489215135574,
675
+ "eval_runtime": 0.5567,
676
+ "eval_samples_per_second": 337.676,
677
+ "eval_steps_per_second": 10.777,
678
+ "step": 306
679
+ },
680
+ {
681
+ "epoch": 51.666666666666664,
682
+ "grad_norm": 1.1342123746871948,
683
+ "learning_rate": 7.71604938271605e-06,
684
+ "loss": 0.0392,
685
+ "step": 310
686
+ },
687
+ {
688
+ "epoch": 52.0,
689
+ "eval_accuracy": 0.9787234042553191,
690
+ "eval_loss": 0.04125715419650078,
691
+ "eval_runtime": 0.5747,
692
+ "eval_samples_per_second": 327.145,
693
+ "eval_steps_per_second": 10.441,
694
+ "step": 312
695
+ },
696
+ {
697
+ "epoch": 53.0,
698
+ "eval_accuracy": 0.9627659574468085,
699
+ "eval_loss": 0.11188509315252304,
700
+ "eval_runtime": 0.5497,
701
+ "eval_samples_per_second": 341.985,
702
+ "eval_steps_per_second": 10.914,
703
+ "step": 318
704
+ },
705
+ {
706
+ "epoch": 53.333333333333336,
707
+ "grad_norm": 4.390924453735352,
708
+ "learning_rate": 6.172839506172839e-06,
709
+ "loss": 0.0299,
710
+ "step": 320
711
+ },
712
+ {
713
+ "epoch": 54.0,
714
+ "eval_accuracy": 0.9627659574468085,
715
+ "eval_loss": 0.08185816556215286,
716
+ "eval_runtime": 0.5785,
717
+ "eval_samples_per_second": 324.999,
718
+ "eval_steps_per_second": 10.372,
719
+ "step": 324
720
+ },
721
+ {
722
+ "epoch": 55.0,
723
+ "grad_norm": 1.3617689609527588,
724
+ "learning_rate": 4.6296296296296296e-06,
725
+ "loss": 0.0295,
726
+ "step": 330
727
+ },
728
+ {
729
+ "epoch": 55.0,
730
+ "eval_accuracy": 0.9840425531914894,
731
+ "eval_loss": 0.0335795022547245,
732
+ "eval_runtime": 0.5553,
733
+ "eval_samples_per_second": 338.529,
734
+ "eval_steps_per_second": 10.804,
735
+ "step": 330
736
+ },
737
+ {
738
+ "epoch": 56.0,
739
+ "eval_accuracy": 0.9840425531914894,
740
+ "eval_loss": 0.039023175835609436,
741
+ "eval_runtime": 0.5576,
742
+ "eval_samples_per_second": 337.161,
743
+ "eval_steps_per_second": 10.76,
744
+ "step": 336
745
+ },
746
+ {
747
+ "epoch": 56.666666666666664,
748
+ "grad_norm": 0.5556619763374329,
749
+ "learning_rate": 3.0864197530864196e-06,
750
+ "loss": 0.0253,
751
+ "step": 340
752
+ },
753
+ {
754
+ "epoch": 57.0,
755
+ "eval_accuracy": 0.9840425531914894,
756
+ "eval_loss": 0.03277648240327835,
757
+ "eval_runtime": 0.5626,
758
+ "eval_samples_per_second": 334.143,
759
+ "eval_steps_per_second": 10.664,
760
+ "step": 342
761
+ },
762
+ {
763
+ "epoch": 58.0,
764
+ "eval_accuracy": 0.9840425531914894,
765
+ "eval_loss": 0.0343145877122879,
766
+ "eval_runtime": 0.5708,
767
+ "eval_samples_per_second": 329.368,
768
+ "eval_steps_per_second": 10.512,
769
+ "step": 348
770
+ },
771
+ {
772
+ "epoch": 58.333333333333336,
773
+ "grad_norm": 0.7165215611457825,
774
+ "learning_rate": 1.5432098765432098e-06,
775
+ "loss": 0.0264,
776
+ "step": 350
777
+ },
778
+ {
779
+ "epoch": 59.0,
780
+ "eval_accuracy": 0.9840425531914894,
781
+ "eval_loss": 0.03521186113357544,
782
+ "eval_runtime": 0.5748,
783
+ "eval_samples_per_second": 327.069,
784
+ "eval_steps_per_second": 10.438,
785
+ "step": 354
786
+ },
787
+ {
788
+ "epoch": 60.0,
789
+ "grad_norm": 5.298318386077881,
790
+ "learning_rate": 0.0,
791
+ "loss": 0.0308,
792
+ "step": 360
793
+ },
794
+ {
795
+ "epoch": 60.0,
796
+ "eval_accuracy": 0.9840425531914894,
797
+ "eval_loss": 0.03450946509838104,
798
+ "eval_runtime": 0.5861,
799
+ "eval_samples_per_second": 320.783,
800
+ "eval_steps_per_second": 10.238,
801
+ "step": 360
802
+ },
803
+ {
804
+ "epoch": 60.0,
805
+ "step": 360,
806
+ "total_flos": 8.758829206639411e+17,
807
+ "train_loss": 0.09168570356236563,
808
+ "train_runtime": 380.8373,
809
+ "train_samples_per_second": 117.531,
810
+ "train_steps_per_second": 0.945
811
+ }
812
+ ],
813
+ "logging_steps": 10,
814
+ "max_steps": 360,
815
+ "num_input_tokens_seen": 0,
816
+ "num_train_epochs": 60,
817
+ "save_steps": 500,
818
+ "stateful_callbacks": {
819
+ "TrainerControl": {
820
+ "args": {
821
+ "should_epoch_stop": false,
822
+ "should_evaluate": false,
823
+ "should_log": false,
824
+ "should_save": true,
825
+ "should_training_stop": true
826
+ },
827
+ "attributes": {}
828
+ }
829
+ },
830
+ "total_flos": 8.758829206639411e+17,
831
+ "train_batch_size": 32,
832
+ "trial_name": null,
833
+ "trial_params": null
834
+ }