Melo1512 commited on
Commit
bf95d4c
·
verified ·
1 Parent(s): 23f8c79

End of training

Browse files
README.md CHANGED
@@ -23,7 +23,7 @@ model-index:
23
  metrics:
24
  - name: Accuracy
25
  type: accuracy
26
- value: 0.875
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -33,8 +33,8 @@ should probably proofread and complete it, then remove this comment. -->
33
 
34
  This model is a fine-tuned version of [facebook/vit-msn-small](https://huggingface.co/facebook/vit-msn-small) on the imagefolder dataset.
35
  It achieves the following results on the evaluation set:
36
- - Loss: 0.3460
37
- - Accuracy: 0.875
38
 
39
  ## Model description
40
 
 
23
  metrics:
24
  - name: Accuracy
25
  type: accuracy
26
+ value: 0.9166666666666666
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
33
 
34
  This model is a fine-tuned version of [facebook/vit-msn-small](https://huggingface.co/facebook/vit-msn-small) on the imagefolder dataset.
35
  It achieves the following results on the evaluation set:
36
+ - Loss: 0.3267
37
+ - Accuracy: 0.9167
38
 
39
  ## Model description
40
 
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 71.42857142857143,
3
+ "eval_accuracy": 0.9166666666666666,
4
+ "eval_loss": 0.32666629552841187,
5
+ "eval_runtime": 0.6636,
6
+ "eval_samples_per_second": 216.992,
7
+ "eval_steps_per_second": 4.521,
8
+ "total_flos": 5.956344520589353e+17,
9
+ "train_loss": 0.420122013092041,
10
+ "train_runtime": 326.0769,
11
+ "train_samples_per_second": 130.644,
12
+ "train_steps_per_second": 0.307
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 71.42857142857143,
3
+ "eval_accuracy": 0.9166666666666666,
4
+ "eval_loss": 0.32666629552841187,
5
+ "eval_runtime": 0.6636,
6
+ "eval_samples_per_second": 216.992,
7
+ "eval_steps_per_second": 4.521
8
+ }
runs/Jan27_17-36-47_db1093ce036b/events.out.tfevents.1737999760.db1093ce036b.224.13 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aba0b9a87ddf06d3dcb060be2f5daa2663797604169303edcca562e77cafe45f
3
+ size 405
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 71.42857142857143,
3
+ "total_flos": 5.956344520589353e+17,
4
+ "train_loss": 0.420122013092041,
5
+ "train_runtime": 326.0769,
6
+ "train_samples_per_second": 130.644,
7
+ "train_steps_per_second": 0.307
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,760 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.32666629552841187,
3
+ "best_model_checkpoint": "vit-msn-small-beta-fia-manually-enhanced-HSV_test_5/checkpoint-44",
4
+ "epoch": 71.42857142857143,
5
+ "eval_steps": 500,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7142857142857143,
13
+ "eval_accuracy": 0.22916666666666666,
14
+ "eval_loss": 1.1105738878250122,
15
+ "eval_runtime": 0.6997,
16
+ "eval_samples_per_second": 205.812,
17
+ "eval_steps_per_second": 4.288,
18
+ "step": 1
19
+ },
20
+ {
21
+ "epoch": 1.4285714285714286,
22
+ "eval_accuracy": 0.2569444444444444,
23
+ "eval_loss": 1.098416805267334,
24
+ "eval_runtime": 0.6273,
25
+ "eval_samples_per_second": 229.555,
26
+ "eval_steps_per_second": 4.782,
27
+ "step": 2
28
+ },
29
+ {
30
+ "epoch": 2.857142857142857,
31
+ "eval_accuracy": 0.4097222222222222,
32
+ "eval_loss": 1.039996862411499,
33
+ "eval_runtime": 0.6547,
34
+ "eval_samples_per_second": 219.941,
35
+ "eval_steps_per_second": 4.582,
36
+ "step": 4
37
+ },
38
+ {
39
+ "epoch": 3.571428571428571,
40
+ "eval_accuracy": 0.5486111111111112,
41
+ "eval_loss": 0.99596107006073,
42
+ "eval_runtime": 0.6487,
43
+ "eval_samples_per_second": 221.995,
44
+ "eval_steps_per_second": 4.625,
45
+ "step": 5
46
+ },
47
+ {
48
+ "epoch": 5.0,
49
+ "eval_accuracy": 0.7291666666666666,
50
+ "eval_loss": 0.8868050575256348,
51
+ "eval_runtime": 0.7036,
52
+ "eval_samples_per_second": 204.658,
53
+ "eval_steps_per_second": 4.264,
54
+ "step": 7
55
+ },
56
+ {
57
+ "epoch": 5.714285714285714,
58
+ "eval_accuracy": 0.7777777777777778,
59
+ "eval_loss": 0.8263272047042847,
60
+ "eval_runtime": 0.7254,
61
+ "eval_samples_per_second": 198.509,
62
+ "eval_steps_per_second": 4.136,
63
+ "step": 8
64
+ },
65
+ {
66
+ "epoch": 6.428571428571429,
67
+ "eval_accuracy": 0.8055555555555556,
68
+ "eval_loss": 0.7650943994522095,
69
+ "eval_runtime": 0.6223,
70
+ "eval_samples_per_second": 231.4,
71
+ "eval_steps_per_second": 4.821,
72
+ "step": 9
73
+ },
74
+ {
75
+ "epoch": 7.142857142857143,
76
+ "grad_norm": 13.31224536895752,
77
+ "learning_rate": 4.000000000000001e-06,
78
+ "loss": 0.9808,
79
+ "step": 10
80
+ },
81
+ {
82
+ "epoch": 7.857142857142857,
83
+ "eval_accuracy": 0.8125,
84
+ "eval_loss": 0.6520677208900452,
85
+ "eval_runtime": 0.6795,
86
+ "eval_samples_per_second": 211.923,
87
+ "eval_steps_per_second": 4.415,
88
+ "step": 11
89
+ },
90
+ {
91
+ "epoch": 8.571428571428571,
92
+ "eval_accuracy": 0.8125,
93
+ "eval_loss": 0.605199933052063,
94
+ "eval_runtime": 0.6616,
95
+ "eval_samples_per_second": 217.658,
96
+ "eval_steps_per_second": 4.535,
97
+ "step": 12
98
+ },
99
+ {
100
+ "epoch": 10.0,
101
+ "eval_accuracy": 0.8125,
102
+ "eval_loss": 0.538772702217102,
103
+ "eval_runtime": 0.6326,
104
+ "eval_samples_per_second": 227.627,
105
+ "eval_steps_per_second": 4.742,
106
+ "step": 14
107
+ },
108
+ {
109
+ "epoch": 10.714285714285714,
110
+ "eval_accuracy": 0.8125,
111
+ "eval_loss": 0.5174447894096375,
112
+ "eval_runtime": 0.6152,
113
+ "eval_samples_per_second": 234.055,
114
+ "eval_steps_per_second": 4.876,
115
+ "step": 15
116
+ },
117
+ {
118
+ "epoch": 11.428571428571429,
119
+ "eval_accuracy": 0.8125,
120
+ "eval_loss": 0.503182590007782,
121
+ "eval_runtime": 0.6377,
122
+ "eval_samples_per_second": 225.804,
123
+ "eval_steps_per_second": 4.704,
124
+ "step": 16
125
+ },
126
+ {
127
+ "epoch": 12.857142857142858,
128
+ "eval_accuracy": 0.8125,
129
+ "eval_loss": 0.5022291541099548,
130
+ "eval_runtime": 0.6473,
131
+ "eval_samples_per_second": 222.471,
132
+ "eval_steps_per_second": 4.635,
133
+ "step": 18
134
+ },
135
+ {
136
+ "epoch": 13.571428571428571,
137
+ "eval_accuracy": 0.8194444444444444,
138
+ "eval_loss": 0.5044423341751099,
139
+ "eval_runtime": 0.6293,
140
+ "eval_samples_per_second": 228.838,
141
+ "eval_steps_per_second": 4.767,
142
+ "step": 19
143
+ },
144
+ {
145
+ "epoch": 14.285714285714286,
146
+ "grad_norm": 3.2047011852264404,
147
+ "learning_rate": 8.000000000000001e-06,
148
+ "loss": 0.5431,
149
+ "step": 20
150
+ },
151
+ {
152
+ "epoch": 15.0,
153
+ "eval_accuracy": 0.8263888888888888,
154
+ "eval_loss": 0.47730180621147156,
155
+ "eval_runtime": 0.7212,
156
+ "eval_samples_per_second": 199.678,
157
+ "eval_steps_per_second": 4.16,
158
+ "step": 21
159
+ },
160
+ {
161
+ "epoch": 15.714285714285714,
162
+ "eval_accuracy": 0.8333333333333334,
163
+ "eval_loss": 0.4439104497432709,
164
+ "eval_runtime": 0.6519,
165
+ "eval_samples_per_second": 220.904,
166
+ "eval_steps_per_second": 4.602,
167
+ "step": 22
168
+ },
169
+ {
170
+ "epoch": 16.428571428571427,
171
+ "eval_accuracy": 0.8402777777777778,
172
+ "eval_loss": 0.41979074478149414,
173
+ "eval_runtime": 0.6167,
174
+ "eval_samples_per_second": 233.497,
175
+ "eval_steps_per_second": 4.865,
176
+ "step": 23
177
+ },
178
+ {
179
+ "epoch": 17.857142857142858,
180
+ "eval_accuracy": 0.8819444444444444,
181
+ "eval_loss": 0.38726139068603516,
182
+ "eval_runtime": 0.626,
183
+ "eval_samples_per_second": 230.043,
184
+ "eval_steps_per_second": 4.793,
185
+ "step": 25
186
+ },
187
+ {
188
+ "epoch": 18.571428571428573,
189
+ "eval_accuracy": 0.8888888888888888,
190
+ "eval_loss": 0.37299442291259766,
191
+ "eval_runtime": 0.6291,
192
+ "eval_samples_per_second": 228.912,
193
+ "eval_steps_per_second": 4.769,
194
+ "step": 26
195
+ },
196
+ {
197
+ "epoch": 20.0,
198
+ "eval_accuracy": 0.9027777777777778,
199
+ "eval_loss": 0.3773989677429199,
200
+ "eval_runtime": 0.63,
201
+ "eval_samples_per_second": 228.574,
202
+ "eval_steps_per_second": 4.762,
203
+ "step": 28
204
+ },
205
+ {
206
+ "epoch": 20.714285714285715,
207
+ "eval_accuracy": 0.9097222222222222,
208
+ "eval_loss": 0.37053972482681274,
209
+ "eval_runtime": 0.6212,
210
+ "eval_samples_per_second": 231.817,
211
+ "eval_steps_per_second": 4.83,
212
+ "step": 29
213
+ },
214
+ {
215
+ "epoch": 21.428571428571427,
216
+ "grad_norm": 5.860249996185303,
217
+ "learning_rate": 9.333333333333334e-06,
218
+ "loss": 0.4028,
219
+ "step": 30
220
+ },
221
+ {
222
+ "epoch": 21.428571428571427,
223
+ "eval_accuracy": 0.9097222222222222,
224
+ "eval_loss": 0.3586524724960327,
225
+ "eval_runtime": 0.6377,
226
+ "eval_samples_per_second": 225.813,
227
+ "eval_steps_per_second": 4.704,
228
+ "step": 30
229
+ },
230
+ {
231
+ "epoch": 22.857142857142858,
232
+ "eval_accuracy": 0.8958333333333334,
233
+ "eval_loss": 0.36622118949890137,
234
+ "eval_runtime": 0.6654,
235
+ "eval_samples_per_second": 216.406,
236
+ "eval_steps_per_second": 4.508,
237
+ "step": 32
238
+ },
239
+ {
240
+ "epoch": 23.571428571428573,
241
+ "eval_accuracy": 0.8680555555555556,
242
+ "eval_loss": 0.37790825963020325,
243
+ "eval_runtime": 0.6384,
244
+ "eval_samples_per_second": 225.558,
245
+ "eval_steps_per_second": 4.699,
246
+ "step": 33
247
+ },
248
+ {
249
+ "epoch": 25.0,
250
+ "eval_accuracy": 0.8263888888888888,
251
+ "eval_loss": 0.43221160769462585,
252
+ "eval_runtime": 0.6264,
253
+ "eval_samples_per_second": 229.872,
254
+ "eval_steps_per_second": 4.789,
255
+ "step": 35
256
+ },
257
+ {
258
+ "epoch": 25.714285714285715,
259
+ "eval_accuracy": 0.8333333333333334,
260
+ "eval_loss": 0.39439037442207336,
261
+ "eval_runtime": 0.6534,
262
+ "eval_samples_per_second": 220.396,
263
+ "eval_steps_per_second": 4.592,
264
+ "step": 36
265
+ },
266
+ {
267
+ "epoch": 26.428571428571427,
268
+ "eval_accuracy": 0.8888888888888888,
269
+ "eval_loss": 0.3585418462753296,
270
+ "eval_runtime": 0.6327,
271
+ "eval_samples_per_second": 227.579,
272
+ "eval_steps_per_second": 4.741,
273
+ "step": 37
274
+ },
275
+ {
276
+ "epoch": 27.857142857142858,
277
+ "eval_accuracy": 0.8888888888888888,
278
+ "eval_loss": 0.3607942461967468,
279
+ "eval_runtime": 0.6624,
280
+ "eval_samples_per_second": 217.391,
281
+ "eval_steps_per_second": 4.529,
282
+ "step": 39
283
+ },
284
+ {
285
+ "epoch": 28.571428571428573,
286
+ "grad_norm": 3.4132988452911377,
287
+ "learning_rate": 8.000000000000001e-06,
288
+ "loss": 0.3497,
289
+ "step": 40
290
+ },
291
+ {
292
+ "epoch": 28.571428571428573,
293
+ "eval_accuracy": 0.8472222222222222,
294
+ "eval_loss": 0.39719662070274353,
295
+ "eval_runtime": 0.643,
296
+ "eval_samples_per_second": 223.955,
297
+ "eval_steps_per_second": 4.666,
298
+ "step": 40
299
+ },
300
+ {
301
+ "epoch": 30.0,
302
+ "eval_accuracy": 0.8611111111111112,
303
+ "eval_loss": 0.3804582953453064,
304
+ "eval_runtime": 0.6301,
305
+ "eval_samples_per_second": 228.526,
306
+ "eval_steps_per_second": 4.761,
307
+ "step": 42
308
+ },
309
+ {
310
+ "epoch": 30.714285714285715,
311
+ "eval_accuracy": 0.8819444444444444,
312
+ "eval_loss": 0.3610667586326599,
313
+ "eval_runtime": 0.6248,
314
+ "eval_samples_per_second": 230.471,
315
+ "eval_steps_per_second": 4.801,
316
+ "step": 43
317
+ },
318
+ {
319
+ "epoch": 31.428571428571427,
320
+ "eval_accuracy": 0.9166666666666666,
321
+ "eval_loss": 0.32666629552841187,
322
+ "eval_runtime": 0.6456,
323
+ "eval_samples_per_second": 223.058,
324
+ "eval_steps_per_second": 4.647,
325
+ "step": 44
326
+ },
327
+ {
328
+ "epoch": 32.857142857142854,
329
+ "eval_accuracy": 0.9027777777777778,
330
+ "eval_loss": 0.3402611017227173,
331
+ "eval_runtime": 0.6337,
332
+ "eval_samples_per_second": 227.247,
333
+ "eval_steps_per_second": 4.734,
334
+ "step": 46
335
+ },
336
+ {
337
+ "epoch": 33.57142857142857,
338
+ "eval_accuracy": 0.875,
339
+ "eval_loss": 0.37514248490333557,
340
+ "eval_runtime": 0.7032,
341
+ "eval_samples_per_second": 204.765,
342
+ "eval_steps_per_second": 4.266,
343
+ "step": 47
344
+ },
345
+ {
346
+ "epoch": 35.0,
347
+ "eval_accuracy": 0.8680555555555556,
348
+ "eval_loss": 0.3801332414150238,
349
+ "eval_runtime": 0.659,
350
+ "eval_samples_per_second": 218.502,
351
+ "eval_steps_per_second": 4.552,
352
+ "step": 49
353
+ },
354
+ {
355
+ "epoch": 35.714285714285715,
356
+ "grad_norm": 6.472348213195801,
357
+ "learning_rate": 6.666666666666667e-06,
358
+ "loss": 0.3278,
359
+ "step": 50
360
+ },
361
+ {
362
+ "epoch": 35.714285714285715,
363
+ "eval_accuracy": 0.8958333333333334,
364
+ "eval_loss": 0.34991347789764404,
365
+ "eval_runtime": 0.6712,
366
+ "eval_samples_per_second": 214.545,
367
+ "eval_steps_per_second": 4.47,
368
+ "step": 50
369
+ },
370
+ {
371
+ "epoch": 36.42857142857143,
372
+ "eval_accuracy": 0.8958333333333334,
373
+ "eval_loss": 0.33839675784111023,
374
+ "eval_runtime": 0.6365,
375
+ "eval_samples_per_second": 226.231,
376
+ "eval_steps_per_second": 4.713,
377
+ "step": 51
378
+ },
379
+ {
380
+ "epoch": 37.857142857142854,
381
+ "eval_accuracy": 0.8541666666666666,
382
+ "eval_loss": 0.3642105460166931,
383
+ "eval_runtime": 0.6301,
384
+ "eval_samples_per_second": 228.534,
385
+ "eval_steps_per_second": 4.761,
386
+ "step": 53
387
+ },
388
+ {
389
+ "epoch": 38.57142857142857,
390
+ "eval_accuracy": 0.8194444444444444,
391
+ "eval_loss": 0.3996630907058716,
392
+ "eval_runtime": 0.6609,
393
+ "eval_samples_per_second": 217.89,
394
+ "eval_steps_per_second": 4.539,
395
+ "step": 54
396
+ },
397
+ {
398
+ "epoch": 40.0,
399
+ "eval_accuracy": 0.8402777777777778,
400
+ "eval_loss": 0.3843066394329071,
401
+ "eval_runtime": 0.6636,
402
+ "eval_samples_per_second": 216.984,
403
+ "eval_steps_per_second": 4.521,
404
+ "step": 56
405
+ },
406
+ {
407
+ "epoch": 40.714285714285715,
408
+ "eval_accuracy": 0.8680555555555556,
409
+ "eval_loss": 0.3675690293312073,
410
+ "eval_runtime": 0.6194,
411
+ "eval_samples_per_second": 232.479,
412
+ "eval_steps_per_second": 4.843,
413
+ "step": 57
414
+ },
415
+ {
416
+ "epoch": 41.42857142857143,
417
+ "eval_accuracy": 0.9027777777777778,
418
+ "eval_loss": 0.3464236557483673,
419
+ "eval_runtime": 0.6267,
420
+ "eval_samples_per_second": 229.775,
421
+ "eval_steps_per_second": 4.787,
422
+ "step": 58
423
+ },
424
+ {
425
+ "epoch": 42.857142857142854,
426
+ "grad_norm": 6.519013404846191,
427
+ "learning_rate": 5.333333333333334e-06,
428
+ "loss": 0.3334,
429
+ "step": 60
430
+ },
431
+ {
432
+ "epoch": 42.857142857142854,
433
+ "eval_accuracy": 0.8819444444444444,
434
+ "eval_loss": 0.36175864934921265,
435
+ "eval_runtime": 0.7106,
436
+ "eval_samples_per_second": 202.636,
437
+ "eval_steps_per_second": 4.222,
438
+ "step": 60
439
+ },
440
+ {
441
+ "epoch": 43.57142857142857,
442
+ "eval_accuracy": 0.8194444444444444,
443
+ "eval_loss": 0.40056005120277405,
444
+ "eval_runtime": 0.6703,
445
+ "eval_samples_per_second": 214.814,
446
+ "eval_steps_per_second": 4.475,
447
+ "step": 61
448
+ },
449
+ {
450
+ "epoch": 45.0,
451
+ "eval_accuracy": 0.7638888888888888,
452
+ "eval_loss": 0.49312081933021545,
453
+ "eval_runtime": 0.7051,
454
+ "eval_samples_per_second": 204.23,
455
+ "eval_steps_per_second": 4.255,
456
+ "step": 63
457
+ },
458
+ {
459
+ "epoch": 45.714285714285715,
460
+ "eval_accuracy": 0.7708333333333334,
461
+ "eval_loss": 0.48445218801498413,
462
+ "eval_runtime": 0.71,
463
+ "eval_samples_per_second": 202.808,
464
+ "eval_steps_per_second": 4.225,
465
+ "step": 64
466
+ },
467
+ {
468
+ "epoch": 46.42857142857143,
469
+ "eval_accuracy": 0.7916666666666666,
470
+ "eval_loss": 0.4485209286212921,
471
+ "eval_runtime": 0.6303,
472
+ "eval_samples_per_second": 228.459,
473
+ "eval_steps_per_second": 4.76,
474
+ "step": 65
475
+ },
476
+ {
477
+ "epoch": 47.857142857142854,
478
+ "eval_accuracy": 0.8472222222222222,
479
+ "eval_loss": 0.378328800201416,
480
+ "eval_runtime": 0.6131,
481
+ "eval_samples_per_second": 234.855,
482
+ "eval_steps_per_second": 4.893,
483
+ "step": 67
484
+ },
485
+ {
486
+ "epoch": 48.57142857142857,
487
+ "eval_accuracy": 0.8472222222222222,
488
+ "eval_loss": 0.37234801054000854,
489
+ "eval_runtime": 0.6727,
490
+ "eval_samples_per_second": 214.05,
491
+ "eval_steps_per_second": 4.459,
492
+ "step": 68
493
+ },
494
+ {
495
+ "epoch": 50.0,
496
+ "grad_norm": 5.204492092132568,
497
+ "learning_rate": 4.000000000000001e-06,
498
+ "loss": 0.3334,
499
+ "step": 70
500
+ },
501
+ {
502
+ "epoch": 50.0,
503
+ "eval_accuracy": 0.8125,
504
+ "eval_loss": 0.407737672328949,
505
+ "eval_runtime": 0.6505,
506
+ "eval_samples_per_second": 221.382,
507
+ "eval_steps_per_second": 4.612,
508
+ "step": 70
509
+ },
510
+ {
511
+ "epoch": 50.714285714285715,
512
+ "eval_accuracy": 0.7986111111111112,
513
+ "eval_loss": 0.4380877912044525,
514
+ "eval_runtime": 0.7093,
515
+ "eval_samples_per_second": 203.024,
516
+ "eval_steps_per_second": 4.23,
517
+ "step": 71
518
+ },
519
+ {
520
+ "epoch": 51.42857142857143,
521
+ "eval_accuracy": 0.7847222222222222,
522
+ "eval_loss": 0.46269893646240234,
523
+ "eval_runtime": 0.6731,
524
+ "eval_samples_per_second": 213.937,
525
+ "eval_steps_per_second": 4.457,
526
+ "step": 72
527
+ },
528
+ {
529
+ "epoch": 52.857142857142854,
530
+ "eval_accuracy": 0.7986111111111112,
531
+ "eval_loss": 0.44445788860321045,
532
+ "eval_runtime": 0.6372,
533
+ "eval_samples_per_second": 225.995,
534
+ "eval_steps_per_second": 4.708,
535
+ "step": 74
536
+ },
537
+ {
538
+ "epoch": 53.57142857142857,
539
+ "eval_accuracy": 0.8125,
540
+ "eval_loss": 0.41410741209983826,
541
+ "eval_runtime": 0.6728,
542
+ "eval_samples_per_second": 214.026,
543
+ "eval_steps_per_second": 4.459,
544
+ "step": 75
545
+ },
546
+ {
547
+ "epoch": 55.0,
548
+ "eval_accuracy": 0.8680555555555556,
549
+ "eval_loss": 0.3488573431968689,
550
+ "eval_runtime": 0.6306,
551
+ "eval_samples_per_second": 228.368,
552
+ "eval_steps_per_second": 4.758,
553
+ "step": 77
554
+ },
555
+ {
556
+ "epoch": 55.714285714285715,
557
+ "eval_accuracy": 0.8958333333333334,
558
+ "eval_loss": 0.33705562353134155,
559
+ "eval_runtime": 0.6859,
560
+ "eval_samples_per_second": 209.945,
561
+ "eval_steps_per_second": 4.374,
562
+ "step": 78
563
+ },
564
+ {
565
+ "epoch": 56.42857142857143,
566
+ "eval_accuracy": 0.8888888888888888,
567
+ "eval_loss": 0.3358408510684967,
568
+ "eval_runtime": 0.6867,
569
+ "eval_samples_per_second": 209.701,
570
+ "eval_steps_per_second": 4.369,
571
+ "step": 79
572
+ },
573
+ {
574
+ "epoch": 57.142857142857146,
575
+ "grad_norm": 5.973431587219238,
576
+ "learning_rate": 2.666666666666667e-06,
577
+ "loss": 0.3105,
578
+ "step": 80
579
+ },
580
+ {
581
+ "epoch": 57.857142857142854,
582
+ "eval_accuracy": 0.8680555555555556,
583
+ "eval_loss": 0.3538711965084076,
584
+ "eval_runtime": 0.7307,
585
+ "eval_samples_per_second": 197.072,
586
+ "eval_steps_per_second": 4.106,
587
+ "step": 81
588
+ },
589
+ {
590
+ "epoch": 58.57142857142857,
591
+ "eval_accuracy": 0.8541666666666666,
592
+ "eval_loss": 0.3678491413593292,
593
+ "eval_runtime": 0.6126,
594
+ "eval_samples_per_second": 235.055,
595
+ "eval_steps_per_second": 4.897,
596
+ "step": 82
597
+ },
598
+ {
599
+ "epoch": 60.0,
600
+ "eval_accuracy": 0.8263888888888888,
601
+ "eval_loss": 0.3930552899837494,
602
+ "eval_runtime": 0.633,
603
+ "eval_samples_per_second": 227.478,
604
+ "eval_steps_per_second": 4.739,
605
+ "step": 84
606
+ },
607
+ {
608
+ "epoch": 60.714285714285715,
609
+ "eval_accuracy": 0.8263888888888888,
610
+ "eval_loss": 0.3938286006450653,
611
+ "eval_runtime": 0.6516,
612
+ "eval_samples_per_second": 220.996,
613
+ "eval_steps_per_second": 4.604,
614
+ "step": 85
615
+ },
616
+ {
617
+ "epoch": 61.42857142857143,
618
+ "eval_accuracy": 0.8472222222222222,
619
+ "eval_loss": 0.3896949589252472,
620
+ "eval_runtime": 0.6582,
621
+ "eval_samples_per_second": 218.793,
622
+ "eval_steps_per_second": 4.558,
623
+ "step": 86
624
+ },
625
+ {
626
+ "epoch": 62.857142857142854,
627
+ "eval_accuracy": 0.8611111111111112,
628
+ "eval_loss": 0.3637922406196594,
629
+ "eval_runtime": 0.6651,
630
+ "eval_samples_per_second": 216.505,
631
+ "eval_steps_per_second": 4.511,
632
+ "step": 88
633
+ },
634
+ {
635
+ "epoch": 63.57142857142857,
636
+ "eval_accuracy": 0.875,
637
+ "eval_loss": 0.34960028529167175,
638
+ "eval_runtime": 0.6331,
639
+ "eval_samples_per_second": 227.443,
640
+ "eval_steps_per_second": 4.738,
641
+ "step": 89
642
+ },
643
+ {
644
+ "epoch": 64.28571428571429,
645
+ "grad_norm": 6.167888164520264,
646
+ "learning_rate": 1.3333333333333334e-06,
647
+ "loss": 0.3061,
648
+ "step": 90
649
+ },
650
+ {
651
+ "epoch": 65.0,
652
+ "eval_accuracy": 0.8958333333333334,
653
+ "eval_loss": 0.3304632306098938,
654
+ "eval_runtime": 0.6686,
655
+ "eval_samples_per_second": 215.369,
656
+ "eval_steps_per_second": 4.487,
657
+ "step": 91
658
+ },
659
+ {
660
+ "epoch": 65.71428571428571,
661
+ "eval_accuracy": 0.9027777777777778,
662
+ "eval_loss": 0.3283728361129761,
663
+ "eval_runtime": 0.6659,
664
+ "eval_samples_per_second": 216.261,
665
+ "eval_steps_per_second": 4.505,
666
+ "step": 92
667
+ },
668
+ {
669
+ "epoch": 66.42857142857143,
670
+ "eval_accuracy": 0.8958333333333334,
671
+ "eval_loss": 0.3283740282058716,
672
+ "eval_runtime": 0.6699,
673
+ "eval_samples_per_second": 214.972,
674
+ "eval_steps_per_second": 4.479,
675
+ "step": 93
676
+ },
677
+ {
678
+ "epoch": 67.85714285714286,
679
+ "eval_accuracy": 0.8958333333333334,
680
+ "eval_loss": 0.33374664187431335,
681
+ "eval_runtime": 0.668,
682
+ "eval_samples_per_second": 215.572,
683
+ "eval_steps_per_second": 4.491,
684
+ "step": 95
685
+ },
686
+ {
687
+ "epoch": 68.57142857142857,
688
+ "eval_accuracy": 0.8888888888888888,
689
+ "eval_loss": 0.33741050958633423,
690
+ "eval_runtime": 0.6191,
691
+ "eval_samples_per_second": 232.594,
692
+ "eval_steps_per_second": 4.846,
693
+ "step": 96
694
+ },
695
+ {
696
+ "epoch": 70.0,
697
+ "eval_accuracy": 0.875,
698
+ "eval_loss": 0.34418821334838867,
699
+ "eval_runtime": 0.6959,
700
+ "eval_samples_per_second": 206.929,
701
+ "eval_steps_per_second": 4.311,
702
+ "step": 98
703
+ },
704
+ {
705
+ "epoch": 70.71428571428571,
706
+ "eval_accuracy": 0.875,
707
+ "eval_loss": 0.34521356225013733,
708
+ "eval_runtime": 0.6376,
709
+ "eval_samples_per_second": 225.852,
710
+ "eval_steps_per_second": 4.705,
711
+ "step": 99
712
+ },
713
+ {
714
+ "epoch": 71.42857142857143,
715
+ "grad_norm": 5.671431541442871,
716
+ "learning_rate": 0.0,
717
+ "loss": 0.3137,
718
+ "step": 100
719
+ },
720
+ {
721
+ "epoch": 71.42857142857143,
722
+ "eval_accuracy": 0.875,
723
+ "eval_loss": 0.3459942042827606,
724
+ "eval_runtime": 0.7141,
725
+ "eval_samples_per_second": 201.642,
726
+ "eval_steps_per_second": 4.201,
727
+ "step": 100
728
+ },
729
+ {
730
+ "epoch": 71.42857142857143,
731
+ "step": 100,
732
+ "total_flos": 5.956344520589353e+17,
733
+ "train_loss": 0.420122013092041,
734
+ "train_runtime": 326.0769,
735
+ "train_samples_per_second": 130.644,
736
+ "train_steps_per_second": 0.307
737
+ }
738
+ ],
739
+ "logging_steps": 10,
740
+ "max_steps": 100,
741
+ "num_input_tokens_seen": 0,
742
+ "num_train_epochs": 100,
743
+ "save_steps": 500,
744
+ "stateful_callbacks": {
745
+ "TrainerControl": {
746
+ "args": {
747
+ "should_epoch_stop": false,
748
+ "should_evaluate": false,
749
+ "should_log": false,
750
+ "should_save": true,
751
+ "should_training_stop": true
752
+ },
753
+ "attributes": {}
754
+ }
755
+ },
756
+ "total_flos": 5.956344520589353e+17,
757
+ "train_batch_size": 64,
758
+ "trial_name": null,
759
+ "trial_params": null
760
+ }