minhah commited on
Commit
cec16d1
·
verified ·
1 Parent(s): 7aecb0d

End of training

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. all_results.json +8 -0
  3. test_results.json +8 -0
  4. trainer_state.json +642 -0
README.md CHANGED
@@ -17,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [MCG-NJU/videomae-base](https://huggingface.co/MCG-NJU/videomae-base) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.7033
21
  - Accuracy: 0.3481
22
 
23
  ## Model description
 
17
 
18
  This model is a fine-tuned version of [MCG-NJU/videomae-base](https://huggingface.co/MCG-NJU/videomae-base) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 1.7031
21
  - Accuracy: 0.3481
22
 
23
  ## Model description
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.09,
3
+ "eval_accuracy": 0.34811715481171546,
4
+ "eval_loss": 1.7031110525131226,
5
+ "eval_runtime": 266.484,
6
+ "eval_samples_per_second": 4.484,
7
+ "eval_steps_per_second": 0.281
8
+ }
test_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.09,
3
+ "eval_accuracy": 0.34811715481171546,
4
+ "eval_loss": 1.7031110525131226,
5
+ "eval_runtime": 266.484,
6
+ "eval_samples_per_second": 4.484,
7
+ "eval_steps_per_second": 0.281
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,642 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.34081346423562414,
3
+ "best_model_checkpoint": "videomae-base-finetuned-elderf1/checkpoint-73",
4
+ "epoch": 9.0875,
5
+ "eval_steps": 500,
6
+ "global_step": 720,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.01,
13
+ "grad_norm": 4.3531270027160645,
14
+ "learning_rate": 0.0001388888888888889,
15
+ "loss": 1.7775,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.03,
20
+ "grad_norm": 5.537198543548584,
21
+ "learning_rate": 0.0002777777777777778,
22
+ "loss": 1.6877,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.04,
27
+ "grad_norm": 5.151556968688965,
28
+ "learning_rate": 0.0004166666666666667,
29
+ "loss": 1.7997,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.06,
34
+ "grad_norm": 5.214244365692139,
35
+ "learning_rate": 0.0005555555555555556,
36
+ "loss": 1.8051,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.07,
41
+ "grad_norm": 1.6136474609375,
42
+ "learning_rate": 0.0006944444444444445,
43
+ "loss": 1.7065,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.08,
48
+ "grad_norm": 5.423031330108643,
49
+ "learning_rate": 0.0008333333333333334,
50
+ "loss": 1.7922,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.1,
55
+ "grad_norm": 3.5037877559661865,
56
+ "learning_rate": 0.0009722222222222222,
57
+ "loss": 1.7358,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.1,
62
+ "eval_accuracy": 0.34081346423562414,
63
+ "eval_loss": 1.692335844039917,
64
+ "eval_runtime": 379.7283,
65
+ "eval_samples_per_second": 3.755,
66
+ "eval_steps_per_second": 0.237,
67
+ "step": 73
68
+ },
69
+ {
70
+ "epoch": 1.01,
71
+ "grad_norm": 3.8850014209747314,
72
+ "learning_rate": 0.0009876543209876543,
73
+ "loss": 1.7738,
74
+ "step": 80
75
+ },
76
+ {
77
+ "epoch": 1.02,
78
+ "grad_norm": 6.418526649475098,
79
+ "learning_rate": 0.0009722222222222222,
80
+ "loss": 1.8192,
81
+ "step": 90
82
+ },
83
+ {
84
+ "epoch": 1.04,
85
+ "grad_norm": 6.298271179199219,
86
+ "learning_rate": 0.0009567901234567902,
87
+ "loss": 1.7833,
88
+ "step": 100
89
+ },
90
+ {
91
+ "epoch": 1.05,
92
+ "grad_norm": 4.652337551116943,
93
+ "learning_rate": 0.000941358024691358,
94
+ "loss": 1.7534,
95
+ "step": 110
96
+ },
97
+ {
98
+ "epoch": 1.07,
99
+ "grad_norm": 3.369626522064209,
100
+ "learning_rate": 0.000925925925925926,
101
+ "loss": 1.7376,
102
+ "step": 120
103
+ },
104
+ {
105
+ "epoch": 1.08,
106
+ "grad_norm": 2.182107448577881,
107
+ "learning_rate": 0.0009104938271604939,
108
+ "loss": 1.6406,
109
+ "step": 130
110
+ },
111
+ {
112
+ "epoch": 1.09,
113
+ "grad_norm": 3.763148069381714,
114
+ "learning_rate": 0.0008950617283950618,
115
+ "loss": 1.7163,
116
+ "step": 140
117
+ },
118
+ {
119
+ "epoch": 1.1,
120
+ "eval_accuracy": 0.3373071528751753,
121
+ "eval_loss": 1.66623854637146,
122
+ "eval_runtime": 421.0248,
123
+ "eval_samples_per_second": 3.387,
124
+ "eval_steps_per_second": 0.214,
125
+ "step": 146
126
+ },
127
+ {
128
+ "epoch": 2.01,
129
+ "grad_norm": 4.109099864959717,
130
+ "learning_rate": 0.0008796296296296296,
131
+ "loss": 1.6734,
132
+ "step": 150
133
+ },
134
+ {
135
+ "epoch": 2.02,
136
+ "grad_norm": 3.7187507152557373,
137
+ "learning_rate": 0.0008641975308641975,
138
+ "loss": 1.7535,
139
+ "step": 160
140
+ },
141
+ {
142
+ "epoch": 2.03,
143
+ "grad_norm": 2.9596164226531982,
144
+ "learning_rate": 0.0008487654320987654,
145
+ "loss": 1.7282,
146
+ "step": 170
147
+ },
148
+ {
149
+ "epoch": 2.05,
150
+ "grad_norm": 5.126859188079834,
151
+ "learning_rate": 0.0008333333333333334,
152
+ "loss": 1.7021,
153
+ "step": 180
154
+ },
155
+ {
156
+ "epoch": 2.06,
157
+ "grad_norm": 2.36877703666687,
158
+ "learning_rate": 0.0008179012345679012,
159
+ "loss": 1.6977,
160
+ "step": 190
161
+ },
162
+ {
163
+ "epoch": 2.08,
164
+ "grad_norm": 4.42868185043335,
165
+ "learning_rate": 0.0008024691358024692,
166
+ "loss": 1.6882,
167
+ "step": 200
168
+ },
169
+ {
170
+ "epoch": 2.09,
171
+ "grad_norm": 3.4869942665100098,
172
+ "learning_rate": 0.0007870370370370372,
173
+ "loss": 1.7018,
174
+ "step": 210
175
+ },
176
+ {
177
+ "epoch": 2.1,
178
+ "eval_accuracy": 0.34081346423562414,
179
+ "eval_loss": 1.6378456354141235,
180
+ "eval_runtime": 423.5487,
181
+ "eval_samples_per_second": 3.367,
182
+ "eval_steps_per_second": 0.212,
183
+ "step": 219
184
+ },
185
+ {
186
+ "epoch": 3.0,
187
+ "grad_norm": 3.7394754886627197,
188
+ "learning_rate": 0.0007716049382716049,
189
+ "loss": 1.7356,
190
+ "step": 220
191
+ },
192
+ {
193
+ "epoch": 3.02,
194
+ "grad_norm": 1.3488916158676147,
195
+ "learning_rate": 0.0007561728395061729,
196
+ "loss": 1.7024,
197
+ "step": 230
198
+ },
199
+ {
200
+ "epoch": 3.03,
201
+ "grad_norm": 3.196521282196045,
202
+ "learning_rate": 0.0007407407407407407,
203
+ "loss": 1.7094,
204
+ "step": 240
205
+ },
206
+ {
207
+ "epoch": 3.04,
208
+ "grad_norm": 3.9984352588653564,
209
+ "learning_rate": 0.0007253086419753087,
210
+ "loss": 1.6481,
211
+ "step": 250
212
+ },
213
+ {
214
+ "epoch": 3.06,
215
+ "grad_norm": 3.6886866092681885,
216
+ "learning_rate": 0.0007098765432098766,
217
+ "loss": 1.652,
218
+ "step": 260
219
+ },
220
+ {
221
+ "epoch": 3.07,
222
+ "grad_norm": 2.245149850845337,
223
+ "learning_rate": 0.0006944444444444445,
224
+ "loss": 1.7479,
225
+ "step": 270
226
+ },
227
+ {
228
+ "epoch": 3.08,
229
+ "grad_norm": 4.62326192855835,
230
+ "learning_rate": 0.0006790123456790124,
231
+ "loss": 1.7129,
232
+ "step": 280
233
+ },
234
+ {
235
+ "epoch": 3.1,
236
+ "grad_norm": 4.474867343902588,
237
+ "learning_rate": 0.0006635802469135802,
238
+ "loss": 1.7334,
239
+ "step": 290
240
+ },
241
+ {
242
+ "epoch": 3.1,
243
+ "eval_accuracy": 0.34011220196353437,
244
+ "eval_loss": 1.6562532186508179,
245
+ "eval_runtime": 383.4418,
246
+ "eval_samples_per_second": 3.719,
247
+ "eval_steps_per_second": 0.235,
248
+ "step": 292
249
+ },
250
+ {
251
+ "epoch": 4.01,
252
+ "grad_norm": 1.4795947074890137,
253
+ "learning_rate": 0.0006481481481481481,
254
+ "loss": 1.7427,
255
+ "step": 300
256
+ },
257
+ {
258
+ "epoch": 4.03,
259
+ "grad_norm": 2.173116683959961,
260
+ "learning_rate": 0.0006327160493827161,
261
+ "loss": 1.6894,
262
+ "step": 310
263
+ },
264
+ {
265
+ "epoch": 4.04,
266
+ "grad_norm": 2.731816291809082,
267
+ "learning_rate": 0.0006172839506172839,
268
+ "loss": 1.6818,
269
+ "step": 320
270
+ },
271
+ {
272
+ "epoch": 4.05,
273
+ "grad_norm": 2.120103120803833,
274
+ "learning_rate": 0.0006018518518518519,
275
+ "loss": 1.6782,
276
+ "step": 330
277
+ },
278
+ {
279
+ "epoch": 4.07,
280
+ "grad_norm": 3.7916502952575684,
281
+ "learning_rate": 0.0005864197530864199,
282
+ "loss": 1.7388,
283
+ "step": 340
284
+ },
285
+ {
286
+ "epoch": 4.08,
287
+ "grad_norm": 2.852003812789917,
288
+ "learning_rate": 0.0005709876543209876,
289
+ "loss": 1.7422,
290
+ "step": 350
291
+ },
292
+ {
293
+ "epoch": 4.09,
294
+ "grad_norm": 6.300606727600098,
295
+ "learning_rate": 0.0005555555555555556,
296
+ "loss": 1.672,
297
+ "step": 360
298
+ },
299
+ {
300
+ "epoch": 4.1,
301
+ "eval_accuracy": 0.23983169705469845,
302
+ "eval_loss": 1.6567574739456177,
303
+ "eval_runtime": 310.9306,
304
+ "eval_samples_per_second": 4.586,
305
+ "eval_steps_per_second": 0.289,
306
+ "step": 365
307
+ },
308
+ {
309
+ "epoch": 5.01,
310
+ "grad_norm": 5.469892501831055,
311
+ "learning_rate": 0.0005401234567901234,
312
+ "loss": 1.6958,
313
+ "step": 370
314
+ },
315
+ {
316
+ "epoch": 5.02,
317
+ "grad_norm": 3.4026269912719727,
318
+ "learning_rate": 0.0005246913580246914,
319
+ "loss": 1.7383,
320
+ "step": 380
321
+ },
322
+ {
323
+ "epoch": 5.03,
324
+ "grad_norm": 2.7335870265960693,
325
+ "learning_rate": 0.0005092592592592593,
326
+ "loss": 1.6763,
327
+ "step": 390
328
+ },
329
+ {
330
+ "epoch": 5.05,
331
+ "grad_norm": 4.079995155334473,
332
+ "learning_rate": 0.0004938271604938272,
333
+ "loss": 1.693,
334
+ "step": 400
335
+ },
336
+ {
337
+ "epoch": 5.06,
338
+ "grad_norm": 2.8691930770874023,
339
+ "learning_rate": 0.0004783950617283951,
340
+ "loss": 1.7293,
341
+ "step": 410
342
+ },
343
+ {
344
+ "epoch": 5.08,
345
+ "grad_norm": 2.4010772705078125,
346
+ "learning_rate": 0.000462962962962963,
347
+ "loss": 1.6812,
348
+ "step": 420
349
+ },
350
+ {
351
+ "epoch": 5.09,
352
+ "grad_norm": 4.233634948730469,
353
+ "learning_rate": 0.0004475308641975309,
354
+ "loss": 1.7095,
355
+ "step": 430
356
+ },
357
+ {
358
+ "epoch": 5.1,
359
+ "eval_accuracy": 0.3387096774193548,
360
+ "eval_loss": 1.6313395500183105,
361
+ "eval_runtime": 306.7441,
362
+ "eval_samples_per_second": 4.649,
363
+ "eval_steps_per_second": 0.293,
364
+ "step": 438
365
+ },
366
+ {
367
+ "epoch": 6.0,
368
+ "grad_norm": 2.844386100769043,
369
+ "learning_rate": 0.00043209876543209873,
370
+ "loss": 1.6502,
371
+ "step": 440
372
+ },
373
+ {
374
+ "epoch": 6.02,
375
+ "grad_norm": 3.7761685848236084,
376
+ "learning_rate": 0.0004166666666666667,
377
+ "loss": 1.6892,
378
+ "step": 450
379
+ },
380
+ {
381
+ "epoch": 6.03,
382
+ "grad_norm": 2.8903658390045166,
383
+ "learning_rate": 0.0004012345679012346,
384
+ "loss": 1.6006,
385
+ "step": 460
386
+ },
387
+ {
388
+ "epoch": 6.04,
389
+ "grad_norm": 2.548739194869995,
390
+ "learning_rate": 0.00038580246913580245,
391
+ "loss": 1.7113,
392
+ "step": 470
393
+ },
394
+ {
395
+ "epoch": 6.06,
396
+ "grad_norm": 3.1980948448181152,
397
+ "learning_rate": 0.00037037037037037035,
398
+ "loss": 1.7259,
399
+ "step": 480
400
+ },
401
+ {
402
+ "epoch": 6.07,
403
+ "grad_norm": 3.119049549102783,
404
+ "learning_rate": 0.0003549382716049383,
405
+ "loss": 1.6634,
406
+ "step": 490
407
+ },
408
+ {
409
+ "epoch": 6.09,
410
+ "grad_norm": 3.465067148208618,
411
+ "learning_rate": 0.0003395061728395062,
412
+ "loss": 1.71,
413
+ "step": 500
414
+ },
415
+ {
416
+ "epoch": 6.1,
417
+ "grad_norm": 1.507797122001648,
418
+ "learning_rate": 0.00032407407407407406,
419
+ "loss": 1.7119,
420
+ "step": 510
421
+ },
422
+ {
423
+ "epoch": 6.1,
424
+ "eval_accuracy": 0.34081346423562414,
425
+ "eval_loss": 1.6309115886688232,
426
+ "eval_runtime": 295.3653,
427
+ "eval_samples_per_second": 4.828,
428
+ "eval_steps_per_second": 0.305,
429
+ "step": 511
430
+ },
431
+ {
432
+ "epoch": 7.01,
433
+ "grad_norm": 1.939512848854065,
434
+ "learning_rate": 0.00030864197530864197,
435
+ "loss": 1.6486,
436
+ "step": 520
437
+ },
438
+ {
439
+ "epoch": 7.03,
440
+ "grad_norm": 2.4698586463928223,
441
+ "learning_rate": 0.00029320987654320993,
442
+ "loss": 1.6754,
443
+ "step": 530
444
+ },
445
+ {
446
+ "epoch": 7.04,
447
+ "grad_norm": 1.9678858518600464,
448
+ "learning_rate": 0.0002777777777777778,
449
+ "loss": 1.6025,
450
+ "step": 540
451
+ },
452
+ {
453
+ "epoch": 7.05,
454
+ "grad_norm": 3.6356217861175537,
455
+ "learning_rate": 0.0002623456790123457,
456
+ "loss": 1.6792,
457
+ "step": 550
458
+ },
459
+ {
460
+ "epoch": 7.07,
461
+ "grad_norm": 2.781039237976074,
462
+ "learning_rate": 0.0002469135802469136,
463
+ "loss": 1.7165,
464
+ "step": 560
465
+ },
466
+ {
467
+ "epoch": 7.08,
468
+ "grad_norm": 4.021714687347412,
469
+ "learning_rate": 0.0002314814814814815,
470
+ "loss": 1.6836,
471
+ "step": 570
472
+ },
473
+ {
474
+ "epoch": 7.1,
475
+ "grad_norm": 4.392849922180176,
476
+ "learning_rate": 0.00021604938271604937,
477
+ "loss": 1.6981,
478
+ "step": 580
479
+ },
480
+ {
481
+ "epoch": 7.1,
482
+ "eval_accuracy": 0.3288920056100982,
483
+ "eval_loss": 1.6518133878707886,
484
+ "eval_runtime": 319.5699,
485
+ "eval_samples_per_second": 4.462,
486
+ "eval_steps_per_second": 0.282,
487
+ "step": 584
488
+ },
489
+ {
490
+ "epoch": 8.01,
491
+ "grad_norm": 2.291691303253174,
492
+ "learning_rate": 0.0002006172839506173,
493
+ "loss": 1.6548,
494
+ "step": 590
495
+ },
496
+ {
497
+ "epoch": 8.02,
498
+ "grad_norm": 4.06191873550415,
499
+ "learning_rate": 0.00018518518518518518,
500
+ "loss": 1.5957,
501
+ "step": 600
502
+ },
503
+ {
504
+ "epoch": 8.04,
505
+ "grad_norm": 3.735381603240967,
506
+ "learning_rate": 0.0001697530864197531,
507
+ "loss": 1.6757,
508
+ "step": 610
509
+ },
510
+ {
511
+ "epoch": 8.05,
512
+ "grad_norm": 3.7696151733398438,
513
+ "learning_rate": 0.00015432098765432098,
514
+ "loss": 1.6349,
515
+ "step": 620
516
+ },
517
+ {
518
+ "epoch": 8.06,
519
+ "grad_norm": 2.210860013961792,
520
+ "learning_rate": 0.0001388888888888889,
521
+ "loss": 1.7561,
522
+ "step": 630
523
+ },
524
+ {
525
+ "epoch": 8.08,
526
+ "grad_norm": 2.812994956970215,
527
+ "learning_rate": 0.0001234567901234568,
528
+ "loss": 1.6331,
529
+ "step": 640
530
+ },
531
+ {
532
+ "epoch": 8.09,
533
+ "grad_norm": 3.606325626373291,
534
+ "learning_rate": 0.00010802469135802468,
535
+ "loss": 1.7066,
536
+ "step": 650
537
+ },
538
+ {
539
+ "epoch": 8.1,
540
+ "eval_accuracy": 0.33099579242636745,
541
+ "eval_loss": 1.6313475370407104,
542
+ "eval_runtime": 304.3275,
543
+ "eval_samples_per_second": 4.686,
544
+ "eval_steps_per_second": 0.296,
545
+ "step": 657
546
+ },
547
+ {
548
+ "epoch": 9.0,
549
+ "grad_norm": 2.993828296661377,
550
+ "learning_rate": 9.259259259259259e-05,
551
+ "loss": 1.6485,
552
+ "step": 660
553
+ },
554
+ {
555
+ "epoch": 9.02,
556
+ "grad_norm": 1.9592925310134888,
557
+ "learning_rate": 7.716049382716049e-05,
558
+ "loss": 1.6776,
559
+ "step": 670
560
+ },
561
+ {
562
+ "epoch": 9.03,
563
+ "grad_norm": 3.105025291442871,
564
+ "learning_rate": 6.17283950617284e-05,
565
+ "loss": 1.6466,
566
+ "step": 680
567
+ },
568
+ {
569
+ "epoch": 9.05,
570
+ "grad_norm": 3.643643856048584,
571
+ "learning_rate": 4.6296296296296294e-05,
572
+ "loss": 1.6544,
573
+ "step": 690
574
+ },
575
+ {
576
+ "epoch": 9.06,
577
+ "grad_norm": 2.5237057209014893,
578
+ "learning_rate": 3.08641975308642e-05,
579
+ "loss": 1.6616,
580
+ "step": 700
581
+ },
582
+ {
583
+ "epoch": 9.07,
584
+ "grad_norm": 3.834527015686035,
585
+ "learning_rate": 1.54320987654321e-05,
586
+ "loss": 1.6271,
587
+ "step": 710
588
+ },
589
+ {
590
+ "epoch": 9.09,
591
+ "grad_norm": 1.777999997138977,
592
+ "learning_rate": 0.0,
593
+ "loss": 1.6476,
594
+ "step": 720
595
+ },
596
+ {
597
+ "epoch": 9.09,
598
+ "eval_accuracy": 0.3288920056100982,
599
+ "eval_loss": 1.6337770223617554,
600
+ "eval_runtime": 340.2408,
601
+ "eval_samples_per_second": 4.191,
602
+ "eval_steps_per_second": 0.265,
603
+ "step": 720
604
+ },
605
+ {
606
+ "epoch": 9.09,
607
+ "step": 720,
608
+ "total_flos": 1.4231811343419113e+19,
609
+ "train_loss": 1.6995894723468357,
610
+ "train_runtime": 6776.3985,
611
+ "train_samples_per_second": 1.7,
612
+ "train_steps_per_second": 0.106
613
+ },
614
+ {
615
+ "epoch": 9.09,
616
+ "eval_accuracy": 0.34811715481171546,
617
+ "eval_loss": 1.703281283378601,
618
+ "eval_runtime": 256.932,
619
+ "eval_samples_per_second": 4.651,
620
+ "eval_steps_per_second": 0.292,
621
+ "step": 720
622
+ },
623
+ {
624
+ "epoch": 9.09,
625
+ "eval_accuracy": 0.34811715481171546,
626
+ "eval_loss": 1.7031110525131226,
627
+ "eval_runtime": 266.484,
628
+ "eval_samples_per_second": 4.484,
629
+ "eval_steps_per_second": 0.281,
630
+ "step": 720
631
+ }
632
+ ],
633
+ "logging_steps": 10,
634
+ "max_steps": 720,
635
+ "num_input_tokens_seen": 0,
636
+ "num_train_epochs": 9223372036854775807,
637
+ "save_steps": 500,
638
+ "total_flos": 1.4231811343419113e+19,
639
+ "train_batch_size": 16,
640
+ "trial_name": null,
641
+ "trial_params": null
642
+ }