Augusto777 commited on
Commit
035d62b
·
verified ·
1 Parent(s): b5ed72a

End of training

Browse files
README.md CHANGED
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.6521739130434783
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +32,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [microsoft/swinv2-tiny-patch4-window8-256](https://huggingface.co/microsoft/swinv2-tiny-patch4-window8-256) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.9880
36
- - Accuracy: 0.6522
37
 
38
  ## Model description
39
 
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.6739130434782609
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [microsoft/swinv2-tiny-patch4-window8-256](https://huggingface.co/microsoft/swinv2-tiny-patch4-window8-256) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 1.0510
36
+ - Accuracy: 0.6739
37
 
38
  ## Model description
39
 
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 68.57,
3
+ "eval_accuracy": 0.6739130434782609,
4
+ "eval_loss": 1.0509947538375854,
5
+ "eval_runtime": 0.4916,
6
+ "eval_samples_per_second": 93.569,
7
+ "eval_steps_per_second": 6.102,
8
+ "train_loss": 1.113492695490519,
9
+ "train_runtime": 374.3307,
10
+ "train_samples_per_second": 45.521,
11
+ "train_steps_per_second": 0.641
12
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 68.57,
3
+ "eval_accuracy": 0.6739130434782609,
4
+ "eval_loss": 1.0509947538375854,
5
+ "eval_runtime": 0.4916,
6
+ "eval_samples_per_second": 93.569,
7
+ "eval_steps_per_second": 6.102
8
+ }
runs/Dec03_17-59-04_DESKTOP-SKBE9FB/events.out.tfevents.1733270806.DESKTOP-SKBE9FB.17260.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b164b0a54f5e63625583ce568f6dee04d45d3c0b285cb32673c764979aaf5cc9
3
+ size 411
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 68.57,
3
+ "train_loss": 1.113492695490519,
4
+ "train_runtime": 374.3307,
5
+ "train_samples_per_second": 45.521,
6
+ "train_steps_per_second": 0.641
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,795 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6739130434782609,
3
+ "best_model_checkpoint": "SW2-DMAE-2\\checkpoint-168",
4
+ "epoch": 68.57142857142857,
5
+ "eval_steps": 500,
6
+ "global_step": 240,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.86,
13
+ "eval_accuracy": 0.10869565217391304,
14
+ "eval_loss": 1.6269055604934692,
15
+ "eval_runtime": 0.2761,
16
+ "eval_samples_per_second": 166.629,
17
+ "eval_steps_per_second": 10.867,
18
+ "step": 3
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_accuracy": 0.10869565217391304,
23
+ "eval_loss": 1.6078405380249023,
24
+ "eval_runtime": 0.2401,
25
+ "eval_samples_per_second": 191.623,
26
+ "eval_steps_per_second": 12.497,
27
+ "step": 7
28
+ },
29
+ {
30
+ "epoch": 2.86,
31
+ "learning_rate": 1.4375e-05,
32
+ "loss": 1.618,
33
+ "step": 10
34
+ },
35
+ {
36
+ "epoch": 2.86,
37
+ "eval_accuracy": 0.10869565217391304,
38
+ "eval_loss": 1.585246205329895,
39
+ "eval_runtime": 0.3111,
40
+ "eval_samples_per_second": 147.877,
41
+ "eval_steps_per_second": 9.644,
42
+ "step": 10
43
+ },
44
+ {
45
+ "epoch": 4.0,
46
+ "eval_accuracy": 0.10869565217391304,
47
+ "eval_loss": 1.539771556854248,
48
+ "eval_runtime": 0.2925,
49
+ "eval_samples_per_second": 157.267,
50
+ "eval_steps_per_second": 10.257,
51
+ "step": 14
52
+ },
53
+ {
54
+ "epoch": 4.86,
55
+ "eval_accuracy": 0.10869565217391304,
56
+ "eval_loss": 1.4947997331619263,
57
+ "eval_runtime": 0.2826,
58
+ "eval_samples_per_second": 162.793,
59
+ "eval_steps_per_second": 10.617,
60
+ "step": 17
61
+ },
62
+ {
63
+ "epoch": 5.71,
64
+ "learning_rate": 1.375e-05,
65
+ "loss": 1.5162,
66
+ "step": 20
67
+ },
68
+ {
69
+ "epoch": 6.0,
70
+ "eval_accuracy": 0.10869565217391304,
71
+ "eval_loss": 1.434383749961853,
72
+ "eval_runtime": 0.2591,
73
+ "eval_samples_per_second": 177.566,
74
+ "eval_steps_per_second": 11.58,
75
+ "step": 21
76
+ },
77
+ {
78
+ "epoch": 6.86,
79
+ "eval_accuracy": 0.10869565217391304,
80
+ "eval_loss": 1.3878703117370605,
81
+ "eval_runtime": 0.2371,
82
+ "eval_samples_per_second": 194.049,
83
+ "eval_steps_per_second": 12.655,
84
+ "step": 24
85
+ },
86
+ {
87
+ "epoch": 8.0,
88
+ "eval_accuracy": 0.17391304347826086,
89
+ "eval_loss": 1.328822135925293,
90
+ "eval_runtime": 0.2481,
91
+ "eval_samples_per_second": 185.442,
92
+ "eval_steps_per_second": 12.094,
93
+ "step": 28
94
+ },
95
+ {
96
+ "epoch": 8.57,
97
+ "learning_rate": 1.3125e-05,
98
+ "loss": 1.3459,
99
+ "step": 30
100
+ },
101
+ {
102
+ "epoch": 8.86,
103
+ "eval_accuracy": 0.45652173913043476,
104
+ "eval_loss": 1.2925546169281006,
105
+ "eval_runtime": 0.2481,
106
+ "eval_samples_per_second": 185.442,
107
+ "eval_steps_per_second": 12.094,
108
+ "step": 31
109
+ },
110
+ {
111
+ "epoch": 10.0,
112
+ "eval_accuracy": 0.45652173913043476,
113
+ "eval_loss": 1.2562181949615479,
114
+ "eval_runtime": 0.2541,
115
+ "eval_samples_per_second": 181.006,
116
+ "eval_steps_per_second": 11.805,
117
+ "step": 35
118
+ },
119
+ {
120
+ "epoch": 10.86,
121
+ "eval_accuracy": 0.45652173913043476,
122
+ "eval_loss": 1.238446593284607,
123
+ "eval_runtime": 0.2411,
124
+ "eval_samples_per_second": 190.828,
125
+ "eval_steps_per_second": 12.445,
126
+ "step": 38
127
+ },
128
+ {
129
+ "epoch": 11.43,
130
+ "learning_rate": 1.25e-05,
131
+ "loss": 1.2384,
132
+ "step": 40
133
+ },
134
+ {
135
+ "epoch": 12.0,
136
+ "eval_accuracy": 0.45652173913043476,
137
+ "eval_loss": 1.2205413579940796,
138
+ "eval_runtime": 0.2537,
139
+ "eval_samples_per_second": 181.326,
140
+ "eval_steps_per_second": 11.826,
141
+ "step": 42
142
+ },
143
+ {
144
+ "epoch": 12.86,
145
+ "eval_accuracy": 0.45652173913043476,
146
+ "eval_loss": 1.2173599004745483,
147
+ "eval_runtime": 0.2401,
148
+ "eval_samples_per_second": 191.624,
149
+ "eval_steps_per_second": 12.497,
150
+ "step": 45
151
+ },
152
+ {
153
+ "epoch": 14.0,
154
+ "eval_accuracy": 0.45652173913043476,
155
+ "eval_loss": 1.2131370306015015,
156
+ "eval_runtime": 0.2691,
157
+ "eval_samples_per_second": 170.965,
158
+ "eval_steps_per_second": 11.15,
159
+ "step": 49
160
+ },
161
+ {
162
+ "epoch": 14.29,
163
+ "learning_rate": 1.1874999999999999e-05,
164
+ "loss": 1.2049,
165
+ "step": 50
166
+ },
167
+ {
168
+ "epoch": 14.86,
169
+ "eval_accuracy": 0.45652173913043476,
170
+ "eval_loss": 1.2104469537734985,
171
+ "eval_runtime": 0.2431,
172
+ "eval_samples_per_second": 189.258,
173
+ "eval_steps_per_second": 12.343,
174
+ "step": 52
175
+ },
176
+ {
177
+ "epoch": 16.0,
178
+ "eval_accuracy": 0.45652173913043476,
179
+ "eval_loss": 1.208552598953247,
180
+ "eval_runtime": 0.2451,
181
+ "eval_samples_per_second": 187.713,
182
+ "eval_steps_per_second": 12.242,
183
+ "step": 56
184
+ },
185
+ {
186
+ "epoch": 16.86,
187
+ "eval_accuracy": 0.45652173913043476,
188
+ "eval_loss": 1.2076021432876587,
189
+ "eval_runtime": 0.2556,
190
+ "eval_samples_per_second": 179.993,
191
+ "eval_steps_per_second": 11.739,
192
+ "step": 59
193
+ },
194
+ {
195
+ "epoch": 17.14,
196
+ "learning_rate": 1.125e-05,
197
+ "loss": 1.1815,
198
+ "step": 60
199
+ },
200
+ {
201
+ "epoch": 18.0,
202
+ "eval_accuracy": 0.45652173913043476,
203
+ "eval_loss": 1.2051774263381958,
204
+ "eval_runtime": 0.2431,
205
+ "eval_samples_per_second": 189.258,
206
+ "eval_steps_per_second": 12.343,
207
+ "step": 63
208
+ },
209
+ {
210
+ "epoch": 18.86,
211
+ "eval_accuracy": 0.45652173913043476,
212
+ "eval_loss": 1.204942226409912,
213
+ "eval_runtime": 0.3966,
214
+ "eval_samples_per_second": 115.986,
215
+ "eval_steps_per_second": 7.564,
216
+ "step": 66
217
+ },
218
+ {
219
+ "epoch": 20.0,
220
+ "learning_rate": 1.0625e-05,
221
+ "loss": 1.1826,
222
+ "step": 70
223
+ },
224
+ {
225
+ "epoch": 20.0,
226
+ "eval_accuracy": 0.45652173913043476,
227
+ "eval_loss": 1.2018660306930542,
228
+ "eval_runtime": 0.3121,
229
+ "eval_samples_per_second": 147.402,
230
+ "eval_steps_per_second": 9.613,
231
+ "step": 70
232
+ },
233
+ {
234
+ "epoch": 20.86,
235
+ "eval_accuracy": 0.45652173913043476,
236
+ "eval_loss": 1.1960418224334717,
237
+ "eval_runtime": 0.3191,
238
+ "eval_samples_per_second": 144.168,
239
+ "eval_steps_per_second": 9.402,
240
+ "step": 73
241
+ },
242
+ {
243
+ "epoch": 22.0,
244
+ "eval_accuracy": 0.45652173913043476,
245
+ "eval_loss": 1.1926813125610352,
246
+ "eval_runtime": 0.3096,
247
+ "eval_samples_per_second": 148.592,
248
+ "eval_steps_per_second": 9.691,
249
+ "step": 77
250
+ },
251
+ {
252
+ "epoch": 22.86,
253
+ "learning_rate": 9.999999999999999e-06,
254
+ "loss": 1.1647,
255
+ "step": 80
256
+ },
257
+ {
258
+ "epoch": 22.86,
259
+ "eval_accuracy": 0.45652173913043476,
260
+ "eval_loss": 1.1927775144577026,
261
+ "eval_runtime": 0.2491,
262
+ "eval_samples_per_second": 184.697,
263
+ "eval_steps_per_second": 12.045,
264
+ "step": 80
265
+ },
266
+ {
267
+ "epoch": 24.0,
268
+ "eval_accuracy": 0.45652173913043476,
269
+ "eval_loss": 1.1924192905426025,
270
+ "eval_runtime": 0.2991,
271
+ "eval_samples_per_second": 153.812,
272
+ "eval_steps_per_second": 10.031,
273
+ "step": 84
274
+ },
275
+ {
276
+ "epoch": 24.86,
277
+ "eval_accuracy": 0.45652173913043476,
278
+ "eval_loss": 1.1902908086776733,
279
+ "eval_runtime": 0.3005,
280
+ "eval_samples_per_second": 153.097,
281
+ "eval_steps_per_second": 9.985,
282
+ "step": 87
283
+ },
284
+ {
285
+ "epoch": 25.71,
286
+ "learning_rate": 9.375000000000001e-06,
287
+ "loss": 1.1568,
288
+ "step": 90
289
+ },
290
+ {
291
+ "epoch": 26.0,
292
+ "eval_accuracy": 0.45652173913043476,
293
+ "eval_loss": 1.1878631114959717,
294
+ "eval_runtime": 0.2861,
295
+ "eval_samples_per_second": 160.803,
296
+ "eval_steps_per_second": 10.487,
297
+ "step": 91
298
+ },
299
+ {
300
+ "epoch": 26.86,
301
+ "eval_accuracy": 0.45652173913043476,
302
+ "eval_loss": 1.1913325786590576,
303
+ "eval_runtime": 0.2806,
304
+ "eval_samples_per_second": 163.951,
305
+ "eval_steps_per_second": 10.692,
306
+ "step": 94
307
+ },
308
+ {
309
+ "epoch": 28.0,
310
+ "eval_accuracy": 0.4782608695652174,
311
+ "eval_loss": 1.204640507698059,
312
+ "eval_runtime": 0.2621,
313
+ "eval_samples_per_second": 175.533,
314
+ "eval_steps_per_second": 11.448,
315
+ "step": 98
316
+ },
317
+ {
318
+ "epoch": 28.57,
319
+ "learning_rate": 8.750000000000001e-06,
320
+ "loss": 1.1432,
321
+ "step": 100
322
+ },
323
+ {
324
+ "epoch": 28.86,
325
+ "eval_accuracy": 0.4782608695652174,
326
+ "eval_loss": 1.193393588066101,
327
+ "eval_runtime": 0.2361,
328
+ "eval_samples_per_second": 194.871,
329
+ "eval_steps_per_second": 12.709,
330
+ "step": 101
331
+ },
332
+ {
333
+ "epoch": 30.0,
334
+ "eval_accuracy": 0.4782608695652174,
335
+ "eval_loss": 1.166512131690979,
336
+ "eval_runtime": 0.2421,
337
+ "eval_samples_per_second": 190.04,
338
+ "eval_steps_per_second": 12.394,
339
+ "step": 105
340
+ },
341
+ {
342
+ "epoch": 30.86,
343
+ "eval_accuracy": 0.4782608695652174,
344
+ "eval_loss": 1.1600818634033203,
345
+ "eval_runtime": 0.2381,
346
+ "eval_samples_per_second": 193.234,
347
+ "eval_steps_per_second": 12.602,
348
+ "step": 108
349
+ },
350
+ {
351
+ "epoch": 31.43,
352
+ "learning_rate": 8.125e-06,
353
+ "loss": 1.1112,
354
+ "step": 110
355
+ },
356
+ {
357
+ "epoch": 32.0,
358
+ "eval_accuracy": 0.5,
359
+ "eval_loss": 1.1623895168304443,
360
+ "eval_runtime": 0.2801,
361
+ "eval_samples_per_second": 164.249,
362
+ "eval_steps_per_second": 10.712,
363
+ "step": 112
364
+ },
365
+ {
366
+ "epoch": 32.86,
367
+ "eval_accuracy": 0.5217391304347826,
368
+ "eval_loss": 1.1663668155670166,
369
+ "eval_runtime": 0.3126,
370
+ "eval_samples_per_second": 147.162,
371
+ "eval_steps_per_second": 9.597,
372
+ "step": 115
373
+ },
374
+ {
375
+ "epoch": 34.0,
376
+ "eval_accuracy": 0.5,
377
+ "eval_loss": 1.1692047119140625,
378
+ "eval_runtime": 0.2586,
379
+ "eval_samples_per_second": 177.904,
380
+ "eval_steps_per_second": 11.602,
381
+ "step": 119
382
+ },
383
+ {
384
+ "epoch": 34.29,
385
+ "learning_rate": 7.5e-06,
386
+ "loss": 1.1132,
387
+ "step": 120
388
+ },
389
+ {
390
+ "epoch": 34.86,
391
+ "eval_accuracy": 0.5434782608695652,
392
+ "eval_loss": 1.1513336896896362,
393
+ "eval_runtime": 0.2416,
394
+ "eval_samples_per_second": 190.427,
395
+ "eval_steps_per_second": 12.419,
396
+ "step": 122
397
+ },
398
+ {
399
+ "epoch": 36.0,
400
+ "eval_accuracy": 0.5869565217391305,
401
+ "eval_loss": 1.1384443044662476,
402
+ "eval_runtime": 0.2461,
403
+ "eval_samples_per_second": 186.95,
404
+ "eval_steps_per_second": 12.192,
405
+ "step": 126
406
+ },
407
+ {
408
+ "epoch": 36.86,
409
+ "eval_accuracy": 0.6086956521739131,
410
+ "eval_loss": 1.127366542816162,
411
+ "eval_runtime": 0.2446,
412
+ "eval_samples_per_second": 188.091,
413
+ "eval_steps_per_second": 12.267,
414
+ "step": 129
415
+ },
416
+ {
417
+ "epoch": 37.14,
418
+ "learning_rate": 6.875e-06,
419
+ "loss": 1.0642,
420
+ "step": 130
421
+ },
422
+ {
423
+ "epoch": 38.0,
424
+ "eval_accuracy": 0.5869565217391305,
425
+ "eval_loss": 1.1442575454711914,
426
+ "eval_runtime": 0.2586,
427
+ "eval_samples_per_second": 177.903,
428
+ "eval_steps_per_second": 11.602,
429
+ "step": 133
430
+ },
431
+ {
432
+ "epoch": 38.86,
433
+ "eval_accuracy": 0.5,
434
+ "eval_loss": 1.1651057004928589,
435
+ "eval_runtime": 0.2496,
436
+ "eval_samples_per_second": 184.324,
437
+ "eval_steps_per_second": 12.021,
438
+ "step": 136
439
+ },
440
+ {
441
+ "epoch": 40.0,
442
+ "learning_rate": 6.25e-06,
443
+ "loss": 1.0439,
444
+ "step": 140
445
+ },
446
+ {
447
+ "epoch": 40.0,
448
+ "eval_accuracy": 0.5,
449
+ "eval_loss": 1.149288296699524,
450
+ "eval_runtime": 0.2441,
451
+ "eval_samples_per_second": 188.478,
452
+ "eval_steps_per_second": 12.292,
453
+ "step": 140
454
+ },
455
+ {
456
+ "epoch": 40.86,
457
+ "eval_accuracy": 0.5217391304347826,
458
+ "eval_loss": 1.1330839395523071,
459
+ "eval_runtime": 0.2811,
460
+ "eval_samples_per_second": 163.664,
461
+ "eval_steps_per_second": 10.674,
462
+ "step": 143
463
+ },
464
+ {
465
+ "epoch": 42.0,
466
+ "eval_accuracy": 0.5869565217391305,
467
+ "eval_loss": 1.1032251119613647,
468
+ "eval_runtime": 0.2521,
469
+ "eval_samples_per_second": 182.499,
470
+ "eval_steps_per_second": 11.902,
471
+ "step": 147
472
+ },
473
+ {
474
+ "epoch": 42.86,
475
+ "learning_rate": 5.625e-06,
476
+ "loss": 1.0362,
477
+ "step": 150
478
+ },
479
+ {
480
+ "epoch": 42.86,
481
+ "eval_accuracy": 0.6304347826086957,
482
+ "eval_loss": 1.0988132953643799,
483
+ "eval_runtime": 0.2916,
484
+ "eval_samples_per_second": 157.767,
485
+ "eval_steps_per_second": 10.289,
486
+ "step": 150
487
+ },
488
+ {
489
+ "epoch": 44.0,
490
+ "eval_accuracy": 0.5869565217391305,
491
+ "eval_loss": 1.1092532873153687,
492
+ "eval_runtime": 0.2506,
493
+ "eval_samples_per_second": 183.583,
494
+ "eval_steps_per_second": 11.973,
495
+ "step": 154
496
+ },
497
+ {
498
+ "epoch": 44.86,
499
+ "eval_accuracy": 0.5869565217391305,
500
+ "eval_loss": 1.1101136207580566,
501
+ "eval_runtime": 0.22,
502
+ "eval_samples_per_second": 209.044,
503
+ "eval_steps_per_second": 13.633,
504
+ "step": 157
505
+ },
506
+ {
507
+ "epoch": 45.71,
508
+ "learning_rate": 4.9999999999999996e-06,
509
+ "loss": 1.0177,
510
+ "step": 160
511
+ },
512
+ {
513
+ "epoch": 46.0,
514
+ "eval_accuracy": 0.6304347826086957,
515
+ "eval_loss": 1.0903021097183228,
516
+ "eval_runtime": 0.3931,
517
+ "eval_samples_per_second": 117.022,
518
+ "eval_steps_per_second": 7.632,
519
+ "step": 161
520
+ },
521
+ {
522
+ "epoch": 46.86,
523
+ "eval_accuracy": 0.6521739130434783,
524
+ "eval_loss": 1.069130301475525,
525
+ "eval_runtime": 0.4281,
526
+ "eval_samples_per_second": 107.447,
527
+ "eval_steps_per_second": 7.007,
528
+ "step": 164
529
+ },
530
+ {
531
+ "epoch": 48.0,
532
+ "eval_accuracy": 0.6739130434782609,
533
+ "eval_loss": 1.0509947538375854,
534
+ "eval_runtime": 0.2701,
535
+ "eval_samples_per_second": 170.332,
536
+ "eval_steps_per_second": 11.109,
537
+ "step": 168
538
+ },
539
+ {
540
+ "epoch": 48.57,
541
+ "learning_rate": 4.3750000000000005e-06,
542
+ "loss": 1.0,
543
+ "step": 170
544
+ },
545
+ {
546
+ "epoch": 48.86,
547
+ "eval_accuracy": 0.6521739130434783,
548
+ "eval_loss": 1.0451492071151733,
549
+ "eval_runtime": 0.2466,
550
+ "eval_samples_per_second": 186.563,
551
+ "eval_steps_per_second": 12.167,
552
+ "step": 171
553
+ },
554
+ {
555
+ "epoch": 50.0,
556
+ "eval_accuracy": 0.6521739130434783,
557
+ "eval_loss": 1.0425117015838623,
558
+ "eval_runtime": 0.2591,
559
+ "eval_samples_per_second": 177.566,
560
+ "eval_steps_per_second": 11.58,
561
+ "step": 175
562
+ },
563
+ {
564
+ "epoch": 50.86,
565
+ "eval_accuracy": 0.6086956521739131,
566
+ "eval_loss": 1.0512455701828003,
567
+ "eval_runtime": 0.2671,
568
+ "eval_samples_per_second": 172.246,
569
+ "eval_steps_per_second": 11.233,
570
+ "step": 178
571
+ },
572
+ {
573
+ "epoch": 51.43,
574
+ "learning_rate": 3.75e-06,
575
+ "loss": 0.9636,
576
+ "step": 180
577
+ },
578
+ {
579
+ "epoch": 52.0,
580
+ "eval_accuracy": 0.6304347826086957,
581
+ "eval_loss": 1.044124722480774,
582
+ "eval_runtime": 0.2541,
583
+ "eval_samples_per_second": 181.062,
584
+ "eval_steps_per_second": 11.808,
585
+ "step": 182
586
+ },
587
+ {
588
+ "epoch": 52.86,
589
+ "eval_accuracy": 0.6521739130434783,
590
+ "eval_loss": 1.0401920080184937,
591
+ "eval_runtime": 0.2676,
592
+ "eval_samples_per_second": 171.917,
593
+ "eval_steps_per_second": 11.212,
594
+ "step": 185
595
+ },
596
+ {
597
+ "epoch": 54.0,
598
+ "eval_accuracy": 0.6521739130434783,
599
+ "eval_loss": 1.0161322355270386,
600
+ "eval_runtime": 0.2421,
601
+ "eval_samples_per_second": 190.039,
602
+ "eval_steps_per_second": 12.394,
603
+ "step": 189
604
+ },
605
+ {
606
+ "epoch": 54.29,
607
+ "learning_rate": 3.125e-06,
608
+ "loss": 0.9744,
609
+ "step": 190
610
+ },
611
+ {
612
+ "epoch": 54.86,
613
+ "eval_accuracy": 0.6521739130434783,
614
+ "eval_loss": 1.0072776079177856,
615
+ "eval_runtime": 0.3861,
616
+ "eval_samples_per_second": 119.144,
617
+ "eval_steps_per_second": 7.77,
618
+ "step": 192
619
+ },
620
+ {
621
+ "epoch": 56.0,
622
+ "eval_accuracy": 0.6521739130434783,
623
+ "eval_loss": 1.0047576427459717,
624
+ "eval_runtime": 0.2431,
625
+ "eval_samples_per_second": 189.257,
626
+ "eval_steps_per_second": 12.343,
627
+ "step": 196
628
+ },
629
+ {
630
+ "epoch": 56.86,
631
+ "eval_accuracy": 0.6521739130434783,
632
+ "eval_loss": 0.9993048310279846,
633
+ "eval_runtime": 0.2251,
634
+ "eval_samples_per_second": 204.398,
635
+ "eval_steps_per_second": 13.33,
636
+ "step": 199
637
+ },
638
+ {
639
+ "epoch": 57.14,
640
+ "learning_rate": 2.4999999999999998e-06,
641
+ "loss": 0.9233,
642
+ "step": 200
643
+ },
644
+ {
645
+ "epoch": 58.0,
646
+ "eval_accuracy": 0.6521739130434783,
647
+ "eval_loss": 0.9939430952072144,
648
+ "eval_runtime": 0.2431,
649
+ "eval_samples_per_second": 189.258,
650
+ "eval_steps_per_second": 12.343,
651
+ "step": 203
652
+ },
653
+ {
654
+ "epoch": 58.86,
655
+ "eval_accuracy": 0.6521739130434783,
656
+ "eval_loss": 0.9938895106315613,
657
+ "eval_runtime": 0.2396,
658
+ "eval_samples_per_second": 192.017,
659
+ "eval_steps_per_second": 12.523,
660
+ "step": 206
661
+ },
662
+ {
663
+ "epoch": 60.0,
664
+ "learning_rate": 1.875e-06,
665
+ "loss": 0.9452,
666
+ "step": 210
667
+ },
668
+ {
669
+ "epoch": 60.0,
670
+ "eval_accuracy": 0.6521739130434783,
671
+ "eval_loss": 0.9975183010101318,
672
+ "eval_runtime": 0.2521,
673
+ "eval_samples_per_second": 182.499,
674
+ "eval_steps_per_second": 11.902,
675
+ "step": 210
676
+ },
677
+ {
678
+ "epoch": 60.86,
679
+ "eval_accuracy": 0.6086956521739131,
680
+ "eval_loss": 0.998058557510376,
681
+ "eval_runtime": 0.2471,
682
+ "eval_samples_per_second": 186.193,
683
+ "eval_steps_per_second": 12.143,
684
+ "step": 213
685
+ },
686
+ {
687
+ "epoch": 62.0,
688
+ "eval_accuracy": 0.6086956521739131,
689
+ "eval_loss": 0.9985237121582031,
690
+ "eval_runtime": 0.2421,
691
+ "eval_samples_per_second": 190.04,
692
+ "eval_steps_per_second": 12.394,
693
+ "step": 217
694
+ },
695
+ {
696
+ "epoch": 62.86,
697
+ "learning_rate": 1.2499999999999999e-06,
698
+ "loss": 0.9183,
699
+ "step": 220
700
+ },
701
+ {
702
+ "epoch": 62.86,
703
+ "eval_accuracy": 0.6086956521739131,
704
+ "eval_loss": 0.9968777298927307,
705
+ "eval_runtime": 0.2391,
706
+ "eval_samples_per_second": 192.425,
707
+ "eval_steps_per_second": 12.549,
708
+ "step": 220
709
+ },
710
+ {
711
+ "epoch": 64.0,
712
+ "eval_accuracy": 0.6304347826086957,
713
+ "eval_loss": 0.99575275182724,
714
+ "eval_runtime": 0.2351,
715
+ "eval_samples_per_second": 195.701,
716
+ "eval_steps_per_second": 12.763,
717
+ "step": 224
718
+ },
719
+ {
720
+ "epoch": 64.86,
721
+ "eval_accuracy": 0.6086956521739131,
722
+ "eval_loss": 0.9928344488143921,
723
+ "eval_runtime": 0.2406,
724
+ "eval_samples_per_second": 191.216,
725
+ "eval_steps_per_second": 12.471,
726
+ "step": 227
727
+ },
728
+ {
729
+ "epoch": 65.71,
730
+ "learning_rate": 6.249999999999999e-07,
731
+ "loss": 0.9449,
732
+ "step": 230
733
+ },
734
+ {
735
+ "epoch": 66.0,
736
+ "eval_accuracy": 0.6086956521739131,
737
+ "eval_loss": 0.9906012415885925,
738
+ "eval_runtime": 0.2456,
739
+ "eval_samples_per_second": 187.316,
740
+ "eval_steps_per_second": 12.216,
741
+ "step": 231
742
+ },
743
+ {
744
+ "epoch": 66.86,
745
+ "eval_accuracy": 0.6304347826086957,
746
+ "eval_loss": 0.9892796874046326,
747
+ "eval_runtime": 0.2501,
748
+ "eval_samples_per_second": 183.959,
749
+ "eval_steps_per_second": 11.997,
750
+ "step": 234
751
+ },
752
+ {
753
+ "epoch": 68.0,
754
+ "eval_accuracy": 0.6521739130434783,
755
+ "eval_loss": 0.9880576133728027,
756
+ "eval_runtime": 0.2471,
757
+ "eval_samples_per_second": 186.193,
758
+ "eval_steps_per_second": 12.143,
759
+ "step": 238
760
+ },
761
+ {
762
+ "epoch": 68.57,
763
+ "learning_rate": 0.0,
764
+ "loss": 0.9154,
765
+ "step": 240
766
+ },
767
+ {
768
+ "epoch": 68.57,
769
+ "eval_accuracy": 0.6521739130434783,
770
+ "eval_loss": 0.9880411028862,
771
+ "eval_runtime": 0.2566,
772
+ "eval_samples_per_second": 179.291,
773
+ "eval_steps_per_second": 11.693,
774
+ "step": 240
775
+ },
776
+ {
777
+ "epoch": 68.57,
778
+ "step": 240,
779
+ "total_flos": 4.754181186964685e+17,
780
+ "train_loss": 1.113492695490519,
781
+ "train_runtime": 374.3307,
782
+ "train_samples_per_second": 45.521,
783
+ "train_steps_per_second": 0.641
784
+ }
785
+ ],
786
+ "logging_steps": 10,
787
+ "max_steps": 240,
788
+ "num_input_tokens_seen": 0,
789
+ "num_train_epochs": 80,
790
+ "save_steps": 500,
791
+ "total_flos": 4.754181186964685e+17,
792
+ "train_batch_size": 16,
793
+ "trial_name": null,
794
+ "trial_params": null
795
+ }