JackWong0911 commited on
Commit
396a608
·
verified ·
1 Parent(s): 551f54f

End of training

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.25,
3
- "eval_accuracy": 0.8387096774193549,
4
- "eval_loss": 0.6151520609855652,
5
- "eval_runtime": 18.1208,
6
- "eval_samples_per_second": 8.554,
7
- "eval_steps_per_second": 8.554
8
  }
 
1
  {
2
  "epoch": 3.25,
3
+ "eval_accuracy": 0.8709677419354839,
4
+ "eval_loss": 0.41407766938209534,
5
+ "eval_runtime": 44.3003,
6
+ "eval_samples_per_second": 3.499,
7
+ "eval_steps_per_second": 3.499
8
  }
runs/Feb16_05-22-13_de4bb4676102/events.out.tfevents.1708062338.de4bb4676102.1456.2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a7b4a65f5dd858eb91f7803678b3b4274f86bfbc4b49ee343f2bdcca74bfac5
3
- size 411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd1bd5be46b9dac4107ffc16862dc51585a976f51b7049e441720e82a08c0e61
3
+ size 734
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.25,
3
- "eval_accuracy": 0.8387096774193549,
4
- "eval_loss": 0.6151520609855652,
5
- "eval_runtime": 18.1208,
6
- "eval_samples_per_second": 8.554,
7
- "eval_steps_per_second": 8.554
8
  }
 
1
  {
2
  "epoch": 3.25,
3
+ "eval_accuracy": 0.8709677419354839,
4
+ "eval_loss": 0.41407766938209534,
5
+ "eval_runtime": 44.3003,
6
+ "eval_samples_per_second": 3.499,
7
+ "eval_steps_per_second": 3.499
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.8571428571428571,
3
- "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-900",
4
  "epoch": 3.25,
5
  "eval_steps": 500,
6
  "global_step": 1200,
@@ -11,784 +11,784 @@
11
  {
12
  "epoch": 0.01,
13
  "learning_rate": 4.166666666666667e-06,
14
- "loss": 2.5348,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.02,
19
  "learning_rate": 8.333333333333334e-06,
20
- "loss": 2.2499,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.03,
25
  "learning_rate": 1.25e-05,
26
- "loss": 2.5052,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.03,
31
  "learning_rate": 1.6666666666666667e-05,
32
- "loss": 2.6626,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.04,
37
  "learning_rate": 2.0833333333333336e-05,
38
- "loss": 2.2731,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.05,
43
  "learning_rate": 2.5e-05,
44
- "loss": 2.3693,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.06,
49
  "learning_rate": 2.916666666666667e-05,
50
- "loss": 2.2157,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.07,
55
  "learning_rate": 3.3333333333333335e-05,
56
- "loss": 2.1835,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.07,
61
  "learning_rate": 3.7500000000000003e-05,
62
- "loss": 2.023,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 0.08,
67
  "learning_rate": 4.166666666666667e-05,
68
- "loss": 2.1581,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 0.09,
73
  "learning_rate": 4.5833333333333334e-05,
74
- "loss": 2.1551,
75
  "step": 110
76
  },
77
  {
78
  "epoch": 0.1,
79
  "learning_rate": 5e-05,
80
- "loss": 1.8589,
81
  "step": 120
82
  },
83
  {
84
  "epoch": 0.11,
85
  "learning_rate": 4.9537037037037035e-05,
86
- "loss": 2.6667,
87
  "step": 130
88
  },
89
  {
90
  "epoch": 0.12,
91
  "learning_rate": 4.9074074074074075e-05,
92
- "loss": 2.0886,
93
  "step": 140
94
  },
95
  {
96
  "epoch": 0.12,
97
  "learning_rate": 4.8611111111111115e-05,
98
- "loss": 2.2096,
99
  "step": 150
100
  },
101
  {
102
  "epoch": 0.13,
103
  "learning_rate": 4.814814814814815e-05,
104
- "loss": 2.0123,
105
  "step": 160
106
  },
107
  {
108
  "epoch": 0.14,
109
  "learning_rate": 4.768518518518519e-05,
110
- "loss": 1.8907,
111
  "step": 170
112
  },
113
  {
114
  "epoch": 0.15,
115
  "learning_rate": 4.722222222222222e-05,
116
- "loss": 1.7564,
117
  "step": 180
118
  },
119
  {
120
  "epoch": 0.16,
121
  "learning_rate": 4.675925925925926e-05,
122
- "loss": 1.7499,
123
  "step": 190
124
  },
125
  {
126
  "epoch": 0.17,
127
  "learning_rate": 4.62962962962963e-05,
128
- "loss": 1.9893,
129
  "step": 200
130
  },
131
  {
132
  "epoch": 0.17,
133
  "learning_rate": 4.5833333333333334e-05,
134
- "loss": 1.9123,
135
  "step": 210
136
  },
137
  {
138
  "epoch": 0.18,
139
  "learning_rate": 4.5370370370370374e-05,
140
- "loss": 2.0462,
141
  "step": 220
142
  },
143
  {
144
  "epoch": 0.19,
145
  "learning_rate": 4.490740740740741e-05,
146
- "loss": 1.8366,
147
  "step": 230
148
  },
149
  {
150
  "epoch": 0.2,
151
  "learning_rate": 4.4444444444444447e-05,
152
- "loss": 2.006,
153
  "step": 240
154
  },
155
  {
156
  "epoch": 0.21,
157
  "learning_rate": 4.3981481481481486e-05,
158
- "loss": 1.5225,
159
  "step": 250
160
  },
161
  {
162
  "epoch": 0.22,
163
  "learning_rate": 4.351851851851852e-05,
164
- "loss": 1.4465,
165
  "step": 260
166
  },
167
  {
168
  "epoch": 0.23,
169
  "learning_rate": 4.305555555555556e-05,
170
- "loss": 1.9501,
171
  "step": 270
172
  },
173
  {
174
  "epoch": 0.23,
175
  "learning_rate": 4.259259259259259e-05,
176
- "loss": 1.5535,
177
  "step": 280
178
  },
179
  {
180
  "epoch": 0.24,
181
  "learning_rate": 4.212962962962963e-05,
182
- "loss": 1.5687,
183
  "step": 290
184
  },
185
  {
186
  "epoch": 0.25,
187
  "learning_rate": 4.166666666666667e-05,
188
- "loss": 1.4673,
189
  "step": 300
190
  },
191
  {
192
  "epoch": 0.25,
193
- "eval_accuracy": 0.35714285714285715,
194
- "eval_loss": 1.834161639213562,
195
- "eval_runtime": 8.1144,
196
- "eval_samples_per_second": 8.627,
197
- "eval_steps_per_second": 8.627,
198
  "step": 300
199
  },
200
  {
201
  "epoch": 1.01,
202
  "learning_rate": 4.1203703703703705e-05,
203
- "loss": 1.9569,
204
  "step": 310
205
  },
206
  {
207
  "epoch": 1.02,
208
  "learning_rate": 4.074074074074074e-05,
209
- "loss": 1.6084,
210
  "step": 320
211
  },
212
  {
213
  "epoch": 1.02,
214
  "learning_rate": 4.027777777777778e-05,
215
- "loss": 0.9802,
216
  "step": 330
217
  },
218
  {
219
  "epoch": 1.03,
220
  "learning_rate": 3.981481481481482e-05,
221
- "loss": 1.5647,
222
  "step": 340
223
  },
224
  {
225
  "epoch": 1.04,
226
  "learning_rate": 3.935185185185186e-05,
227
- "loss": 1.1866,
228
  "step": 350
229
  },
230
  {
231
  "epoch": 1.05,
232
  "learning_rate": 3.888888888888889e-05,
233
- "loss": 0.6345,
234
  "step": 360
235
  },
236
  {
237
  "epoch": 1.06,
238
  "learning_rate": 3.8425925925925924e-05,
239
- "loss": 0.9199,
240
  "step": 370
241
  },
242
  {
243
  "epoch": 1.07,
244
  "learning_rate": 3.7962962962962964e-05,
245
- "loss": 1.0264,
246
  "step": 380
247
  },
248
  {
249
  "epoch": 1.07,
250
  "learning_rate": 3.7500000000000003e-05,
251
- "loss": 1.2867,
252
  "step": 390
253
  },
254
  {
255
  "epoch": 1.08,
256
  "learning_rate": 3.7037037037037037e-05,
257
- "loss": 0.8381,
258
  "step": 400
259
  },
260
  {
261
  "epoch": 1.09,
262
  "learning_rate": 3.6574074074074076e-05,
263
- "loss": 1.6984,
264
  "step": 410
265
  },
266
  {
267
  "epoch": 1.1,
268
  "learning_rate": 3.611111111111111e-05,
269
- "loss": 0.994,
270
  "step": 420
271
  },
272
  {
273
  "epoch": 1.11,
274
  "learning_rate": 3.564814814814815e-05,
275
- "loss": 1.5705,
276
  "step": 430
277
  },
278
  {
279
  "epoch": 1.12,
280
  "learning_rate": 3.518518518518519e-05,
281
- "loss": 0.1452,
282
  "step": 440
283
  },
284
  {
285
  "epoch": 1.12,
286
  "learning_rate": 3.472222222222222e-05,
287
- "loss": 0.6541,
288
  "step": 450
289
  },
290
  {
291
  "epoch": 1.13,
292
  "learning_rate": 3.425925925925926e-05,
293
- "loss": 1.0954,
294
  "step": 460
295
  },
296
  {
297
  "epoch": 1.14,
298
  "learning_rate": 3.3796296296296295e-05,
299
- "loss": 1.6305,
300
  "step": 470
301
  },
302
  {
303
  "epoch": 1.15,
304
  "learning_rate": 3.3333333333333335e-05,
305
- "loss": 2.5766,
306
  "step": 480
307
  },
308
  {
309
  "epoch": 1.16,
310
  "learning_rate": 3.2870370370370375e-05,
311
- "loss": 1.4136,
312
  "step": 490
313
  },
314
  {
315
  "epoch": 1.17,
316
  "learning_rate": 3.240740740740741e-05,
317
- "loss": 0.3009,
318
  "step": 500
319
  },
320
  {
321
  "epoch": 1.18,
322
  "learning_rate": 3.194444444444444e-05,
323
- "loss": 1.3079,
324
  "step": 510
325
  },
326
  {
327
  "epoch": 1.18,
328
  "learning_rate": 3.148148148148148e-05,
329
- "loss": 1.6593,
330
  "step": 520
331
  },
332
  {
333
  "epoch": 1.19,
334
  "learning_rate": 3.101851851851852e-05,
335
- "loss": 0.8459,
336
  "step": 530
337
  },
338
  {
339
  "epoch": 1.2,
340
  "learning_rate": 3.055555555555556e-05,
341
- "loss": 0.609,
342
  "step": 540
343
  },
344
  {
345
  "epoch": 1.21,
346
  "learning_rate": 3.0092592592592593e-05,
347
- "loss": 0.8575,
348
  "step": 550
349
  },
350
  {
351
  "epoch": 1.22,
352
  "learning_rate": 2.962962962962963e-05,
353
- "loss": 0.425,
354
  "step": 560
355
  },
356
  {
357
  "epoch": 1.23,
358
  "learning_rate": 2.916666666666667e-05,
359
- "loss": 1.4892,
360
  "step": 570
361
  },
362
  {
363
  "epoch": 1.23,
364
  "learning_rate": 2.8703703703703706e-05,
365
- "loss": 0.9041,
366
  "step": 580
367
  },
368
  {
369
  "epoch": 1.24,
370
  "learning_rate": 2.824074074074074e-05,
371
- "loss": 0.5981,
372
  "step": 590
373
  },
374
  {
375
  "epoch": 1.25,
376
  "learning_rate": 2.777777777777778e-05,
377
- "loss": 1.3231,
378
  "step": 600
379
  },
380
  {
381
  "epoch": 1.25,
382
- "eval_accuracy": 0.5571428571428572,
383
- "eval_loss": 1.3841789960861206,
384
- "eval_runtime": 8.1929,
385
- "eval_samples_per_second": 8.544,
386
- "eval_steps_per_second": 8.544,
387
  "step": 600
388
  },
389
  {
390
  "epoch": 2.01,
391
  "learning_rate": 2.7314814814814816e-05,
392
- "loss": 0.6209,
393
  "step": 610
394
  },
395
  {
396
  "epoch": 2.02,
397
  "learning_rate": 2.6851851851851855e-05,
398
- "loss": 2.3085,
399
  "step": 620
400
  },
401
  {
402
  "epoch": 2.02,
403
  "learning_rate": 2.6388888888888892e-05,
404
- "loss": 1.2361,
405
  "step": 630
406
  },
407
  {
408
  "epoch": 2.03,
409
  "learning_rate": 2.5925925925925925e-05,
410
- "loss": 0.4469,
411
  "step": 640
412
  },
413
  {
414
  "epoch": 2.04,
415
  "learning_rate": 2.5462962962962965e-05,
416
- "loss": 0.7314,
417
  "step": 650
418
  },
419
  {
420
  "epoch": 2.05,
421
  "learning_rate": 2.5e-05,
422
- "loss": 0.8052,
423
  "step": 660
424
  },
425
  {
426
  "epoch": 2.06,
427
  "learning_rate": 2.4537037037037038e-05,
428
- "loss": 0.8913,
429
  "step": 670
430
  },
431
  {
432
  "epoch": 2.07,
433
  "learning_rate": 2.4074074074074074e-05,
434
- "loss": 0.501,
435
  "step": 680
436
  },
437
  {
438
  "epoch": 2.08,
439
  "learning_rate": 2.361111111111111e-05,
440
- "loss": 0.2084,
441
  "step": 690
442
  },
443
  {
444
  "epoch": 2.08,
445
  "learning_rate": 2.314814814814815e-05,
446
- "loss": 1.2433,
447
  "step": 700
448
  },
449
  {
450
  "epoch": 2.09,
451
  "learning_rate": 2.2685185185185187e-05,
452
- "loss": 0.7629,
453
  "step": 710
454
  },
455
  {
456
  "epoch": 2.1,
457
  "learning_rate": 2.2222222222222223e-05,
458
- "loss": 0.6061,
459
  "step": 720
460
  },
461
  {
462
  "epoch": 2.11,
463
  "learning_rate": 2.175925925925926e-05,
464
- "loss": 0.087,
465
  "step": 730
466
  },
467
  {
468
  "epoch": 2.12,
469
  "learning_rate": 2.1296296296296296e-05,
470
- "loss": 1.1573,
471
  "step": 740
472
  },
473
  {
474
  "epoch": 2.12,
475
  "learning_rate": 2.0833333333333336e-05,
476
- "loss": 0.3543,
477
  "step": 750
478
  },
479
  {
480
  "epoch": 2.13,
481
  "learning_rate": 2.037037037037037e-05,
482
- "loss": 0.1171,
483
  "step": 760
484
  },
485
  {
486
  "epoch": 2.14,
487
  "learning_rate": 1.990740740740741e-05,
488
- "loss": 0.043,
489
  "step": 770
490
  },
491
  {
492
  "epoch": 2.15,
493
  "learning_rate": 1.9444444444444445e-05,
494
- "loss": 0.3266,
495
  "step": 780
496
  },
497
  {
498
  "epoch": 2.16,
499
  "learning_rate": 1.8981481481481482e-05,
500
- "loss": 1.0509,
501
  "step": 790
502
  },
503
  {
504
  "epoch": 2.17,
505
  "learning_rate": 1.8518518518518518e-05,
506
- "loss": 0.0125,
507
  "step": 800
508
  },
509
  {
510
  "epoch": 2.17,
511
  "learning_rate": 1.8055555555555555e-05,
512
- "loss": 0.534,
513
  "step": 810
514
  },
515
  {
516
  "epoch": 2.18,
517
  "learning_rate": 1.7592592592592595e-05,
518
- "loss": 1.2978,
519
  "step": 820
520
  },
521
  {
522
  "epoch": 2.19,
523
  "learning_rate": 1.712962962962963e-05,
524
- "loss": 0.205,
525
  "step": 830
526
  },
527
  {
528
  "epoch": 2.2,
529
  "learning_rate": 1.6666666666666667e-05,
530
- "loss": 0.2479,
531
  "step": 840
532
  },
533
  {
534
  "epoch": 2.21,
535
  "learning_rate": 1.6203703703703704e-05,
536
- "loss": 0.2374,
537
  "step": 850
538
  },
539
  {
540
  "epoch": 2.22,
541
  "learning_rate": 1.574074074074074e-05,
542
- "loss": 0.4977,
543
  "step": 860
544
  },
545
  {
546
  "epoch": 2.23,
547
  "learning_rate": 1.527777777777778e-05,
548
- "loss": 0.164,
549
  "step": 870
550
  },
551
  {
552
  "epoch": 2.23,
553
  "learning_rate": 1.4814814814814815e-05,
554
- "loss": 0.5244,
555
  "step": 880
556
  },
557
  {
558
  "epoch": 2.24,
559
  "learning_rate": 1.4351851851851853e-05,
560
- "loss": 0.7385,
561
  "step": 890
562
  },
563
  {
564
  "epoch": 2.25,
565
  "learning_rate": 1.388888888888889e-05,
566
- "loss": 0.1856,
567
  "step": 900
568
  },
569
  {
570
  "epoch": 2.25,
571
- "eval_accuracy": 0.8571428571428571,
572
- "eval_loss": 0.4796161651611328,
573
- "eval_runtime": 8.0524,
574
- "eval_samples_per_second": 8.693,
575
- "eval_steps_per_second": 8.693,
576
  "step": 900
577
  },
578
  {
579
  "epoch": 3.01,
580
  "learning_rate": 1.3425925925925928e-05,
581
- "loss": 0.0568,
582
  "step": 910
583
  },
584
  {
585
  "epoch": 3.02,
586
  "learning_rate": 1.2962962962962962e-05,
587
- "loss": 0.264,
588
  "step": 920
589
  },
590
  {
591
  "epoch": 3.02,
592
  "learning_rate": 1.25e-05,
593
- "loss": 0.2365,
594
  "step": 930
595
  },
596
  {
597
  "epoch": 3.03,
598
  "learning_rate": 1.2037037037037037e-05,
599
- "loss": 0.0268,
600
  "step": 940
601
  },
602
  {
603
  "epoch": 3.04,
604
  "learning_rate": 1.1574074074074075e-05,
605
- "loss": 0.2831,
606
  "step": 950
607
  },
608
  {
609
  "epoch": 3.05,
610
  "learning_rate": 1.1111111111111112e-05,
611
- "loss": 0.3282,
612
  "step": 960
613
  },
614
  {
615
  "epoch": 3.06,
616
  "learning_rate": 1.0648148148148148e-05,
617
- "loss": 0.547,
618
  "step": 970
619
  },
620
  {
621
  "epoch": 3.07,
622
  "learning_rate": 1.0185185185185185e-05,
623
- "loss": 0.3342,
624
  "step": 980
625
  },
626
  {
627
  "epoch": 3.08,
628
  "learning_rate": 9.722222222222223e-06,
629
- "loss": 0.3915,
630
  "step": 990
631
  },
632
  {
633
  "epoch": 3.08,
634
  "learning_rate": 9.259259259259259e-06,
635
- "loss": 0.8406,
636
  "step": 1000
637
  },
638
  {
639
  "epoch": 3.09,
640
  "learning_rate": 8.796296296296297e-06,
641
- "loss": 0.1584,
642
  "step": 1010
643
  },
644
  {
645
  "epoch": 3.1,
646
  "learning_rate": 8.333333333333334e-06,
647
- "loss": 0.0057,
648
  "step": 1020
649
  },
650
  {
651
  "epoch": 3.11,
652
  "learning_rate": 7.87037037037037e-06,
653
- "loss": 0.0056,
654
  "step": 1030
655
  },
656
  {
657
  "epoch": 3.12,
658
  "learning_rate": 7.4074074074074075e-06,
659
- "loss": 0.0188,
660
  "step": 1040
661
  },
662
  {
663
  "epoch": 3.12,
664
  "learning_rate": 6.944444444444445e-06,
665
- "loss": 0.3416,
666
  "step": 1050
667
  },
668
  {
669
  "epoch": 3.13,
670
  "learning_rate": 6.481481481481481e-06,
671
- "loss": 0.0079,
672
  "step": 1060
673
  },
674
  {
675
  "epoch": 3.14,
676
  "learning_rate": 6.0185185185185185e-06,
677
- "loss": 0.0132,
678
  "step": 1070
679
  },
680
  {
681
  "epoch": 3.15,
682
  "learning_rate": 5.555555555555556e-06,
683
- "loss": 0.3391,
684
  "step": 1080
685
  },
686
  {
687
  "epoch": 3.16,
688
  "learning_rate": 5.092592592592592e-06,
689
- "loss": 0.702,
690
  "step": 1090
691
  },
692
  {
693
  "epoch": 3.17,
694
  "learning_rate": 4.6296296296296296e-06,
695
- "loss": 0.2409,
696
  "step": 1100
697
  },
698
  {
699
  "epoch": 3.17,
700
  "learning_rate": 4.166666666666667e-06,
701
- "loss": 0.0807,
702
  "step": 1110
703
  },
704
  {
705
  "epoch": 3.18,
706
  "learning_rate": 3.7037037037037037e-06,
707
- "loss": 0.0162,
708
  "step": 1120
709
  },
710
  {
711
  "epoch": 3.19,
712
  "learning_rate": 3.2407407407407406e-06,
713
- "loss": 0.4397,
714
  "step": 1130
715
  },
716
  {
717
  "epoch": 3.2,
718
  "learning_rate": 2.777777777777778e-06,
719
- "loss": 0.0262,
720
  "step": 1140
721
  },
722
  {
723
  "epoch": 3.21,
724
  "learning_rate": 2.3148148148148148e-06,
725
- "loss": 0.0113,
726
  "step": 1150
727
  },
728
  {
729
  "epoch": 3.22,
730
  "learning_rate": 1.8518518518518519e-06,
731
- "loss": 0.0421,
732
  "step": 1160
733
  },
734
  {
735
  "epoch": 3.23,
736
  "learning_rate": 1.388888888888889e-06,
737
- "loss": 0.0142,
738
  "step": 1170
739
  },
740
  {
741
  "epoch": 3.23,
742
  "learning_rate": 9.259259259259259e-07,
743
- "loss": 0.0231,
744
  "step": 1180
745
  },
746
  {
747
  "epoch": 3.24,
748
  "learning_rate": 4.6296296296296297e-07,
749
- "loss": 0.0173,
750
  "step": 1190
751
  },
752
  {
753
  "epoch": 3.25,
754
  "learning_rate": 0.0,
755
- "loss": 0.3141,
756
  "step": 1200
757
  },
758
  {
759
  "epoch": 3.25,
760
- "eval_accuracy": 0.8142857142857143,
761
- "eval_loss": 0.49507665634155273,
762
- "eval_runtime": 8.1862,
763
- "eval_samples_per_second": 8.551,
764
- "eval_steps_per_second": 8.551,
765
  "step": 1200
766
  },
767
  {
768
  "epoch": 3.25,
769
  "step": 1200,
770
  "total_flos": 1.495384188125184e+18,
771
- "train_loss": 0.9936102718735734,
772
- "train_runtime": 297.6864,
773
- "train_samples_per_second": 4.031,
774
- "train_steps_per_second": 4.031
775
  },
776
  {
777
  "epoch": 3.25,
778
- "eval_accuracy": 0.8387096774193549,
779
- "eval_loss": 0.6151520609855652,
780
- "eval_runtime": 18.5662,
781
- "eval_samples_per_second": 8.348,
782
- "eval_steps_per_second": 8.348,
783
  "step": 1200
784
  },
785
  {
786
  "epoch": 3.25,
787
- "eval_accuracy": 0.8387096774193549,
788
- "eval_loss": 0.6151520609855652,
789
- "eval_runtime": 18.1208,
790
- "eval_samples_per_second": 8.554,
791
- "eval_steps_per_second": 8.554,
792
  "step": 1200
793
  }
794
  ],
 
1
  {
2
+ "best_metric": 0.8857142857142857,
3
+ "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-1200",
4
  "epoch": 3.25,
5
  "eval_steps": 500,
6
  "global_step": 1200,
 
11
  {
12
  "epoch": 0.01,
13
  "learning_rate": 4.166666666666667e-06,
14
+ "loss": 2.329,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.02,
19
  "learning_rate": 8.333333333333334e-06,
20
+ "loss": 2.2466,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.03,
25
  "learning_rate": 1.25e-05,
26
+ "loss": 2.2363,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.03,
31
  "learning_rate": 1.6666666666666667e-05,
32
+ "loss": 2.328,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.04,
37
  "learning_rate": 2.0833333333333336e-05,
38
+ "loss": 2.1135,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.05,
43
  "learning_rate": 2.5e-05,
44
+ "loss": 2.2845,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.06,
49
  "learning_rate": 2.916666666666667e-05,
50
+ "loss": 2.2225,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.07,
55
  "learning_rate": 3.3333333333333335e-05,
56
+ "loss": 2.163,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.07,
61
  "learning_rate": 3.7500000000000003e-05,
62
+ "loss": 2.1187,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 0.08,
67
  "learning_rate": 4.166666666666667e-05,
68
+ "loss": 2.2381,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 0.09,
73
  "learning_rate": 4.5833333333333334e-05,
74
+ "loss": 2.1272,
75
  "step": 110
76
  },
77
  {
78
  "epoch": 0.1,
79
  "learning_rate": 5e-05,
80
+ "loss": 1.8531,
81
  "step": 120
82
  },
83
  {
84
  "epoch": 0.11,
85
  "learning_rate": 4.9537037037037035e-05,
86
+ "loss": 2.5397,
87
  "step": 130
88
  },
89
  {
90
  "epoch": 0.12,
91
  "learning_rate": 4.9074074074074075e-05,
92
+ "loss": 2.1318,
93
  "step": 140
94
  },
95
  {
96
  "epoch": 0.12,
97
  "learning_rate": 4.8611111111111115e-05,
98
+ "loss": 2.3615,
99
  "step": 150
100
  },
101
  {
102
  "epoch": 0.13,
103
  "learning_rate": 4.814814814814815e-05,
104
+ "loss": 2.0472,
105
  "step": 160
106
  },
107
  {
108
  "epoch": 0.14,
109
  "learning_rate": 4.768518518518519e-05,
110
+ "loss": 2.0705,
111
  "step": 170
112
  },
113
  {
114
  "epoch": 0.15,
115
  "learning_rate": 4.722222222222222e-05,
116
+ "loss": 1.8354,
117
  "step": 180
118
  },
119
  {
120
  "epoch": 0.16,
121
  "learning_rate": 4.675925925925926e-05,
122
+ "loss": 2.1276,
123
  "step": 190
124
  },
125
  {
126
  "epoch": 0.17,
127
  "learning_rate": 4.62962962962963e-05,
128
+ "loss": 2.1269,
129
  "step": 200
130
  },
131
  {
132
  "epoch": 0.17,
133
  "learning_rate": 4.5833333333333334e-05,
134
+ "loss": 1.9699,
135
  "step": 210
136
  },
137
  {
138
  "epoch": 0.18,
139
  "learning_rate": 4.5370370370370374e-05,
140
+ "loss": 2.2254,
141
  "step": 220
142
  },
143
  {
144
  "epoch": 0.19,
145
  "learning_rate": 4.490740740740741e-05,
146
+ "loss": 2.1619,
147
  "step": 230
148
  },
149
  {
150
  "epoch": 0.2,
151
  "learning_rate": 4.4444444444444447e-05,
152
+ "loss": 2.0637,
153
  "step": 240
154
  },
155
  {
156
  "epoch": 0.21,
157
  "learning_rate": 4.3981481481481486e-05,
158
+ "loss": 1.5704,
159
  "step": 250
160
  },
161
  {
162
  "epoch": 0.22,
163
  "learning_rate": 4.351851851851852e-05,
164
+ "loss": 1.6747,
165
  "step": 260
166
  },
167
  {
168
  "epoch": 0.23,
169
  "learning_rate": 4.305555555555556e-05,
170
+ "loss": 2.4152,
171
  "step": 270
172
  },
173
  {
174
  "epoch": 0.23,
175
  "learning_rate": 4.259259259259259e-05,
176
+ "loss": 1.3747,
177
  "step": 280
178
  },
179
  {
180
  "epoch": 0.24,
181
  "learning_rate": 4.212962962962963e-05,
182
+ "loss": 1.5412,
183
  "step": 290
184
  },
185
  {
186
  "epoch": 0.25,
187
  "learning_rate": 4.166666666666667e-05,
188
+ "loss": 1.7031,
189
  "step": 300
190
  },
191
  {
192
  "epoch": 0.25,
193
+ "eval_accuracy": 0.15714285714285714,
194
+ "eval_loss": 2.6308274269104004,
195
+ "eval_runtime": 19.445,
196
+ "eval_samples_per_second": 3.6,
197
+ "eval_steps_per_second": 3.6,
198
  "step": 300
199
  },
200
  {
201
  "epoch": 1.01,
202
  "learning_rate": 4.1203703703703705e-05,
203
+ "loss": 2.6236,
204
  "step": 310
205
  },
206
  {
207
  "epoch": 1.02,
208
  "learning_rate": 4.074074074074074e-05,
209
+ "loss": 1.4325,
210
  "step": 320
211
  },
212
  {
213
  "epoch": 1.02,
214
  "learning_rate": 4.027777777777778e-05,
215
+ "loss": 1.1836,
216
  "step": 330
217
  },
218
  {
219
  "epoch": 1.03,
220
  "learning_rate": 3.981481481481482e-05,
221
+ "loss": 1.4357,
222
  "step": 340
223
  },
224
  {
225
  "epoch": 1.04,
226
  "learning_rate": 3.935185185185186e-05,
227
+ "loss": 1.4766,
228
  "step": 350
229
  },
230
  {
231
  "epoch": 1.05,
232
  "learning_rate": 3.888888888888889e-05,
233
+ "loss": 0.9234,
234
  "step": 360
235
  },
236
  {
237
  "epoch": 1.06,
238
  "learning_rate": 3.8425925925925924e-05,
239
+ "loss": 0.9579,
240
  "step": 370
241
  },
242
  {
243
  "epoch": 1.07,
244
  "learning_rate": 3.7962962962962964e-05,
245
+ "loss": 1.9765,
246
  "step": 380
247
  },
248
  {
249
  "epoch": 1.07,
250
  "learning_rate": 3.7500000000000003e-05,
251
+ "loss": 1.3833,
252
  "step": 390
253
  },
254
  {
255
  "epoch": 1.08,
256
  "learning_rate": 3.7037037037037037e-05,
257
+ "loss": 1.1666,
258
  "step": 400
259
  },
260
  {
261
  "epoch": 1.09,
262
  "learning_rate": 3.6574074074074076e-05,
263
+ "loss": 1.5377,
264
  "step": 410
265
  },
266
  {
267
  "epoch": 1.1,
268
  "learning_rate": 3.611111111111111e-05,
269
+ "loss": 0.9717,
270
  "step": 420
271
  },
272
  {
273
  "epoch": 1.11,
274
  "learning_rate": 3.564814814814815e-05,
275
+ "loss": 1.9949,
276
  "step": 430
277
  },
278
  {
279
  "epoch": 1.12,
280
  "learning_rate": 3.518518518518519e-05,
281
+ "loss": 0.8711,
282
  "step": 440
283
  },
284
  {
285
  "epoch": 1.12,
286
  "learning_rate": 3.472222222222222e-05,
287
+ "loss": 1.5926,
288
  "step": 450
289
  },
290
  {
291
  "epoch": 1.13,
292
  "learning_rate": 3.425925925925926e-05,
293
+ "loss": 0.774,
294
  "step": 460
295
  },
296
  {
297
  "epoch": 1.14,
298
  "learning_rate": 3.3796296296296295e-05,
299
+ "loss": 0.9463,
300
  "step": 470
301
  },
302
  {
303
  "epoch": 1.15,
304
  "learning_rate": 3.3333333333333335e-05,
305
+ "loss": 1.7684,
306
  "step": 480
307
  },
308
  {
309
  "epoch": 1.16,
310
  "learning_rate": 3.2870370370370375e-05,
311
+ "loss": 1.159,
312
  "step": 490
313
  },
314
  {
315
  "epoch": 1.17,
316
  "learning_rate": 3.240740740740741e-05,
317
+ "loss": 0.3306,
318
  "step": 500
319
  },
320
  {
321
  "epoch": 1.18,
322
  "learning_rate": 3.194444444444444e-05,
323
+ "loss": 1.6143,
324
  "step": 510
325
  },
326
  {
327
  "epoch": 1.18,
328
  "learning_rate": 3.148148148148148e-05,
329
+ "loss": 1.3537,
330
  "step": 520
331
  },
332
  {
333
  "epoch": 1.19,
334
  "learning_rate": 3.101851851851852e-05,
335
+ "loss": 1.1058,
336
  "step": 530
337
  },
338
  {
339
  "epoch": 1.2,
340
  "learning_rate": 3.055555555555556e-05,
341
+ "loss": 1.2422,
342
  "step": 540
343
  },
344
  {
345
  "epoch": 1.21,
346
  "learning_rate": 3.0092592592592593e-05,
347
+ "loss": 1.121,
348
  "step": 550
349
  },
350
  {
351
  "epoch": 1.22,
352
  "learning_rate": 2.962962962962963e-05,
353
+ "loss": 0.3986,
354
  "step": 560
355
  },
356
  {
357
  "epoch": 1.23,
358
  "learning_rate": 2.916666666666667e-05,
359
+ "loss": 0.9468,
360
  "step": 570
361
  },
362
  {
363
  "epoch": 1.23,
364
  "learning_rate": 2.8703703703703706e-05,
365
+ "loss": 0.665,
366
  "step": 580
367
  },
368
  {
369
  "epoch": 1.24,
370
  "learning_rate": 2.824074074074074e-05,
371
+ "loss": 1.8967,
372
  "step": 590
373
  },
374
  {
375
  "epoch": 1.25,
376
  "learning_rate": 2.777777777777778e-05,
377
+ "loss": 1.465,
378
  "step": 600
379
  },
380
  {
381
  "epoch": 1.25,
382
+ "eval_accuracy": 0.5,
383
+ "eval_loss": 1.529031753540039,
384
+ "eval_runtime": 20.4162,
385
+ "eval_samples_per_second": 3.429,
386
+ "eval_steps_per_second": 3.429,
387
  "step": 600
388
  },
389
  {
390
  "epoch": 2.01,
391
  "learning_rate": 2.7314814814814816e-05,
392
+ "loss": 0.6396,
393
  "step": 610
394
  },
395
  {
396
  "epoch": 2.02,
397
  "learning_rate": 2.6851851851851855e-05,
398
+ "loss": 2.567,
399
  "step": 620
400
  },
401
  {
402
  "epoch": 2.02,
403
  "learning_rate": 2.6388888888888892e-05,
404
+ "loss": 1.4652,
405
  "step": 630
406
  },
407
  {
408
  "epoch": 2.03,
409
  "learning_rate": 2.5925925925925925e-05,
410
+ "loss": 1.3357,
411
  "step": 640
412
  },
413
  {
414
  "epoch": 2.04,
415
  "learning_rate": 2.5462962962962965e-05,
416
+ "loss": 0.6774,
417
  "step": 650
418
  },
419
  {
420
  "epoch": 2.05,
421
  "learning_rate": 2.5e-05,
422
+ "loss": 0.8569,
423
  "step": 660
424
  },
425
  {
426
  "epoch": 2.06,
427
  "learning_rate": 2.4537037037037038e-05,
428
+ "loss": 1.5074,
429
  "step": 670
430
  },
431
  {
432
  "epoch": 2.07,
433
  "learning_rate": 2.4074074074074074e-05,
434
+ "loss": 0.1759,
435
  "step": 680
436
  },
437
  {
438
  "epoch": 2.08,
439
  "learning_rate": 2.361111111111111e-05,
440
+ "loss": 0.1035,
441
  "step": 690
442
  },
443
  {
444
  "epoch": 2.08,
445
  "learning_rate": 2.314814814814815e-05,
446
+ "loss": 1.5577,
447
  "step": 700
448
  },
449
  {
450
  "epoch": 2.09,
451
  "learning_rate": 2.2685185185185187e-05,
452
+ "loss": 1.3958,
453
  "step": 710
454
  },
455
  {
456
  "epoch": 2.1,
457
  "learning_rate": 2.2222222222222223e-05,
458
+ "loss": 0.6997,
459
  "step": 720
460
  },
461
  {
462
  "epoch": 2.11,
463
  "learning_rate": 2.175925925925926e-05,
464
+ "loss": 0.1684,
465
  "step": 730
466
  },
467
  {
468
  "epoch": 2.12,
469
  "learning_rate": 2.1296296296296296e-05,
470
+ "loss": 1.0568,
471
  "step": 740
472
  },
473
  {
474
  "epoch": 2.12,
475
  "learning_rate": 2.0833333333333336e-05,
476
+ "loss": 0.2304,
477
  "step": 750
478
  },
479
  {
480
  "epoch": 2.13,
481
  "learning_rate": 2.037037037037037e-05,
482
+ "loss": 0.3393,
483
  "step": 760
484
  },
485
  {
486
  "epoch": 2.14,
487
  "learning_rate": 1.990740740740741e-05,
488
+ "loss": 0.1672,
489
  "step": 770
490
  },
491
  {
492
  "epoch": 2.15,
493
  "learning_rate": 1.9444444444444445e-05,
494
+ "loss": 0.2065,
495
  "step": 780
496
  },
497
  {
498
  "epoch": 2.16,
499
  "learning_rate": 1.8981481481481482e-05,
500
+ "loss": 0.8764,
501
  "step": 790
502
  },
503
  {
504
  "epoch": 2.17,
505
  "learning_rate": 1.8518518518518518e-05,
506
+ "loss": 0.6443,
507
  "step": 800
508
  },
509
  {
510
  "epoch": 2.17,
511
  "learning_rate": 1.8055555555555555e-05,
512
+ "loss": 0.9863,
513
  "step": 810
514
  },
515
  {
516
  "epoch": 2.18,
517
  "learning_rate": 1.7592592592592595e-05,
518
+ "loss": 1.2478,
519
  "step": 820
520
  },
521
  {
522
  "epoch": 2.19,
523
  "learning_rate": 1.712962962962963e-05,
524
+ "loss": 0.5116,
525
  "step": 830
526
  },
527
  {
528
  "epoch": 2.2,
529
  "learning_rate": 1.6666666666666667e-05,
530
+ "loss": 0.4251,
531
  "step": 840
532
  },
533
  {
534
  "epoch": 2.21,
535
  "learning_rate": 1.6203703703703704e-05,
536
+ "loss": 0.3013,
537
  "step": 850
538
  },
539
  {
540
  "epoch": 2.22,
541
  "learning_rate": 1.574074074074074e-05,
542
+ "loss": 0.5455,
543
  "step": 860
544
  },
545
  {
546
  "epoch": 2.23,
547
  "learning_rate": 1.527777777777778e-05,
548
+ "loss": 0.236,
549
  "step": 870
550
  },
551
  {
552
  "epoch": 2.23,
553
  "learning_rate": 1.4814814814814815e-05,
554
+ "loss": 0.2929,
555
  "step": 880
556
  },
557
  {
558
  "epoch": 2.24,
559
  "learning_rate": 1.4351851851851853e-05,
560
+ "loss": 0.6095,
561
  "step": 890
562
  },
563
  {
564
  "epoch": 2.25,
565
  "learning_rate": 1.388888888888889e-05,
566
+ "loss": 0.1843,
567
  "step": 900
568
  },
569
  {
570
  "epoch": 2.25,
571
+ "eval_accuracy": 0.8714285714285714,
572
+ "eval_loss": 0.41582703590393066,
573
+ "eval_runtime": 19.8934,
574
+ "eval_samples_per_second": 3.519,
575
+ "eval_steps_per_second": 3.519,
576
  "step": 900
577
  },
578
  {
579
  "epoch": 3.01,
580
  "learning_rate": 1.3425925925925928e-05,
581
+ "loss": 0.2904,
582
  "step": 910
583
  },
584
  {
585
  "epoch": 3.02,
586
  "learning_rate": 1.2962962962962962e-05,
587
+ "loss": 0.6225,
588
  "step": 920
589
  },
590
  {
591
  "epoch": 3.02,
592
  "learning_rate": 1.25e-05,
593
+ "loss": 0.1852,
594
  "step": 930
595
  },
596
  {
597
  "epoch": 3.03,
598
  "learning_rate": 1.2037037037037037e-05,
599
+ "loss": 0.0468,
600
  "step": 940
601
  },
602
  {
603
  "epoch": 3.04,
604
  "learning_rate": 1.1574074074074075e-05,
605
+ "loss": 0.3321,
606
  "step": 950
607
  },
608
  {
609
  "epoch": 3.05,
610
  "learning_rate": 1.1111111111111112e-05,
611
+ "loss": 0.3365,
612
  "step": 960
613
  },
614
  {
615
  "epoch": 3.06,
616
  "learning_rate": 1.0648148148148148e-05,
617
+ "loss": 0.5674,
618
  "step": 970
619
  },
620
  {
621
  "epoch": 3.07,
622
  "learning_rate": 1.0185185185185185e-05,
623
+ "loss": 0.6881,
624
  "step": 980
625
  },
626
  {
627
  "epoch": 3.08,
628
  "learning_rate": 9.722222222222223e-06,
629
+ "loss": 0.317,
630
  "step": 990
631
  },
632
  {
633
  "epoch": 3.08,
634
  "learning_rate": 9.259259259259259e-06,
635
+ "loss": 0.6281,
636
  "step": 1000
637
  },
638
  {
639
  "epoch": 3.09,
640
  "learning_rate": 8.796296296296297e-06,
641
+ "loss": 0.4502,
642
  "step": 1010
643
  },
644
  {
645
  "epoch": 3.1,
646
  "learning_rate": 8.333333333333334e-06,
647
+ "loss": 0.0073,
648
  "step": 1020
649
  },
650
  {
651
  "epoch": 3.11,
652
  "learning_rate": 7.87037037037037e-06,
653
+ "loss": 0.0168,
654
  "step": 1030
655
  },
656
  {
657
  "epoch": 3.12,
658
  "learning_rate": 7.4074074074074075e-06,
659
+ "loss": 0.0054,
660
  "step": 1040
661
  },
662
  {
663
  "epoch": 3.12,
664
  "learning_rate": 6.944444444444445e-06,
665
+ "loss": 0.3653,
666
  "step": 1050
667
  },
668
  {
669
  "epoch": 3.13,
670
  "learning_rate": 6.481481481481481e-06,
671
+ "loss": 0.0088,
672
  "step": 1060
673
  },
674
  {
675
  "epoch": 3.14,
676
  "learning_rate": 6.0185185185185185e-06,
677
+ "loss": 0.014,
678
  "step": 1070
679
  },
680
  {
681
  "epoch": 3.15,
682
  "learning_rate": 5.555555555555556e-06,
683
+ "loss": 0.1096,
684
  "step": 1080
685
  },
686
  {
687
  "epoch": 3.16,
688
  "learning_rate": 5.092592592592592e-06,
689
+ "loss": 0.3679,
690
  "step": 1090
691
  },
692
  {
693
  "epoch": 3.17,
694
  "learning_rate": 4.6296296296296296e-06,
695
+ "loss": 0.0451,
696
  "step": 1100
697
  },
698
  {
699
  "epoch": 3.17,
700
  "learning_rate": 4.166666666666667e-06,
701
+ "loss": 0.5222,
702
  "step": 1110
703
  },
704
  {
705
  "epoch": 3.18,
706
  "learning_rate": 3.7037037037037037e-06,
707
+ "loss": 0.0134,
708
  "step": 1120
709
  },
710
  {
711
  "epoch": 3.19,
712
  "learning_rate": 3.2407407407407406e-06,
713
+ "loss": 0.4009,
714
  "step": 1130
715
  },
716
  {
717
  "epoch": 3.2,
718
  "learning_rate": 2.777777777777778e-06,
719
+ "loss": 0.1051,
720
  "step": 1140
721
  },
722
  {
723
  "epoch": 3.21,
724
  "learning_rate": 2.3148148148148148e-06,
725
+ "loss": 0.1059,
726
  "step": 1150
727
  },
728
  {
729
  "epoch": 3.22,
730
  "learning_rate": 1.8518518518518519e-06,
731
+ "loss": 0.5751,
732
  "step": 1160
733
  },
734
  {
735
  "epoch": 3.23,
736
  "learning_rate": 1.388888888888889e-06,
737
+ "loss": 0.0371,
738
  "step": 1170
739
  },
740
  {
741
  "epoch": 3.23,
742
  "learning_rate": 9.259259259259259e-07,
743
+ "loss": 0.039,
744
  "step": 1180
745
  },
746
  {
747
  "epoch": 3.24,
748
  "learning_rate": 4.6296296296296297e-07,
749
+ "loss": 0.1572,
750
  "step": 1190
751
  },
752
  {
753
  "epoch": 3.25,
754
  "learning_rate": 0.0,
755
+ "loss": 0.1232,
756
  "step": 1200
757
  },
758
  {
759
  "epoch": 3.25,
760
+ "eval_accuracy": 0.8857142857142857,
761
+ "eval_loss": 0.37796080112457275,
762
+ "eval_runtime": 19.8608,
763
+ "eval_samples_per_second": 3.525,
764
+ "eval_steps_per_second": 3.525,
765
  "step": 1200
766
  },
767
  {
768
  "epoch": 3.25,
769
  "step": 1200,
770
  "total_flos": 1.495384188125184e+18,
771
+ "train_loss": 1.0834253594931216,
772
+ "train_runtime": 831.8803,
773
+ "train_samples_per_second": 1.443,
774
+ "train_steps_per_second": 1.443
775
  },
776
  {
777
  "epoch": 3.25,
778
+ "eval_accuracy": 0.8709677419354839,
779
+ "eval_loss": 0.41407766938209534,
780
+ "eval_runtime": 52.821,
781
+ "eval_samples_per_second": 2.934,
782
+ "eval_steps_per_second": 2.934,
783
  "step": 1200
784
  },
785
  {
786
  "epoch": 3.25,
787
+ "eval_accuracy": 0.8709677419354839,
788
+ "eval_loss": 0.41407766938209534,
789
+ "eval_runtime": 44.3003,
790
+ "eval_samples_per_second": 3.499,
791
+ "eval_steps_per_second": 3.499,
792
  "step": 1200
793
  }
794
  ],