Training in progress, step 500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +714 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:be45cbcd51cc51bb35d1af48b2ffb1f9ae9c10be39d66f905233d4c3a4b737fc
 size 140815952

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab7de5a4f71f4c55592d7dd0bfec683332e4beef3a399148fd0647f05b016669
 size 140815952

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:33394cad4e3d9b5e6d592055d9ad87473f3d502e226f52c57afdb2dc98760dc0
 size 71878996

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe90ba59438995207d6b2fd90994253ff0224026840ec367200c2735aa5ca6ac
 size 71878996

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:31aaaf2256a61489b16bce40ff91b9071167d2d3b8e40d009f16488b9f730ca0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f152b160914c2ede02f6e50de068f39d624bdf7552f9a2b416ec64ec58043054
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:505f9225762b105f8ca5168f44d99b2f8467174f4ade85f1cc95f684fbd828e0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe1d153de177b356f9e3a70d6e4ec979560b0c300994e71ca4cb89afc74c5b3a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.5092929601669312,
-  "best_model_checkpoint": "miner_id_24/checkpoint-400",
-  "epoch": 0.0730593607305936,
   "eval_steps": 100,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2847,6 +2847,714 @@
       "eval_samples_per_second": 16.209,
       "eval_steps_per_second": 4.054,
       "step": 400
     }
   ],
   "logging_steps": 1,
@@ -2870,12 +3578,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.09139297271808e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.5021965503692627,
+  "best_model_checkpoint": "miner_id_24/checkpoint-500",
+  "epoch": 0.091324200913242,
   "eval_steps": 100,
+  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 16.209,
       "eval_steps_per_second": 4.054,
       "step": 400
+    },
+    {
+      "epoch": 0.07324200913242009,
+      "grad_norm": 1.2463815212249756,
+      "learning_rate": 1.013396731136465e-05,
+      "loss": 0.6997,
+      "step": 401
+    },
+    {
+      "epoch": 0.07342465753424658,
+      "grad_norm": 1.217960000038147,
+      "learning_rate": 9.937309365446973e-06,
+      "loss": 0.4999,
+      "step": 402
+    },
+    {
+      "epoch": 0.07360730593607306,
+      "grad_norm": 1.2658041715621948,
+      "learning_rate": 9.742367571857091e-06,
+      "loss": 0.4848,
+      "step": 403
+    },
+    {
+      "epoch": 0.07378995433789955,
+      "grad_norm": 1.161754846572876,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.3601,
+      "step": 404
+    },
+    {
+      "epoch": 0.07397260273972603,
+      "grad_norm": 1.1665185689926147,
+      "learning_rate": 9.357665770419244e-06,
+      "loss": 0.4717,
+      "step": 405
+    },
+    {
+      "epoch": 0.07415525114155251,
+      "grad_norm": 1.2439416646957397,
+      "learning_rate": 9.167922241916055e-06,
+      "loss": 0.4756,
+      "step": 406
+    },
+    {
+      "epoch": 0.074337899543379,
+      "grad_norm": 1.0245717763900757,
+      "learning_rate": 8.97992782372432e-06,
+      "loss": 0.4582,
+      "step": 407
+    },
+    {
+      "epoch": 0.07452054794520548,
+      "grad_norm": 1.089290738105774,
+      "learning_rate": 8.793690568899216e-06,
+      "loss": 0.5299,
+      "step": 408
+    },
+    {
+      "epoch": 0.07470319634703196,
+      "grad_norm": 1.3025835752487183,
+      "learning_rate": 8.609218455224893e-06,
+      "loss": 0.5364,
+      "step": 409
+    },
+    {
+      "epoch": 0.07488584474885844,
+      "grad_norm": 1.1520413160324097,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 0.4844,
+      "step": 410
+    },
+    {
+      "epoch": 0.07506849315068494,
+      "grad_norm": 1.42751145362854,
+      "learning_rate": 8.245601184062852e-06,
+      "loss": 0.6758,
+      "step": 411
+    },
+    {
+      "epoch": 0.07525114155251142,
+      "grad_norm": 1.2706706523895264,
+      "learning_rate": 8.066471602728803e-06,
+      "loss": 0.5892,
+      "step": 412
+    },
+    {
+      "epoch": 0.0754337899543379,
+      "grad_norm": 1.106239676475525,
+      "learning_rate": 7.889138314185678e-06,
+      "loss": 0.6417,
+      "step": 413
+    },
+    {
+      "epoch": 0.07561643835616438,
+      "grad_norm": 1.1466580629348755,
+      "learning_rate": 7.71360891480134e-06,
+      "loss": 0.5323,
+      "step": 414
+    },
+    {
+      "epoch": 0.07579908675799087,
+      "grad_norm": 1.415773868560791,
+      "learning_rate": 7.539890923671062e-06,
+      "loss": 0.6715,
+      "step": 415
+    },
+    {
+      "epoch": 0.07598173515981735,
+      "grad_norm": 1.2140964269638062,
+      "learning_rate": 7.367991782295391e-06,
+      "loss": 0.5337,
+      "step": 416
+    },
+    {
+      "epoch": 0.07616438356164383,
+      "grad_norm": 1.058760643005371,
+      "learning_rate": 7.197918854261432e-06,
+      "loss": 0.4444,
+      "step": 417
+    },
+    {
+      "epoch": 0.07634703196347031,
+      "grad_norm": 1.1686381101608276,
+      "learning_rate": 7.029679424927365e-06,
+      "loss": 0.5853,
+      "step": 418
+    },
+    {
+      "epoch": 0.07652968036529681,
+      "grad_norm": 1.3012456893920898,
+      "learning_rate": 6.863280701110408e-06,
+      "loss": 0.6194,
+      "step": 419
+    },
+    {
+      "epoch": 0.07671232876712329,
+      "grad_norm": 1.3436243534088135,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.5871,
+      "step": 420
+    },
+    {
+      "epoch": 0.07689497716894977,
+      "grad_norm": 1.2779135704040527,
+      "learning_rate": 6.536033802742813e-06,
+      "loss": 0.6079,
+      "step": 421
+    },
+    {
+      "epoch": 0.07707762557077626,
+      "grad_norm": 1.21416175365448,
+      "learning_rate": 6.375199646360142e-06,
+      "loss": 0.5959,
+      "step": 422
+    },
+    {
+      "epoch": 0.07726027397260274,
+      "grad_norm": 1.3231496810913086,
+      "learning_rate": 6.216234231230012e-06,
+      "loss": 0.6456,
+      "step": 423
+    },
+    {
+      "epoch": 0.07744292237442922,
+      "grad_norm": 1.204132318496704,
+      "learning_rate": 6.059144366901736e-06,
+      "loss": 0.5165,
+      "step": 424
+    },
+    {
+      "epoch": 0.0776255707762557,
+      "grad_norm": 1.0210753679275513,
+      "learning_rate": 5.903936782582253e-06,
+      "loss": 0.5296,
+      "step": 425
+    },
+    {
+      "epoch": 0.07780821917808219,
+      "grad_norm": 1.1843194961547852,
+      "learning_rate": 5.750618126847912e-06,
+      "loss": 0.5952,
+      "step": 426
+    },
+    {
+      "epoch": 0.07799086757990868,
+      "grad_norm": 1.1271318197250366,
+      "learning_rate": 5.599194967359639e-06,
+      "loss": 0.4949,
+      "step": 427
+    },
+    {
+      "epoch": 0.07817351598173516,
+      "grad_norm": 1.4363117218017578,
+      "learning_rate": 5.449673790581611e-06,
+      "loss": 0.604,
+      "step": 428
+    },
+    {
+      "epoch": 0.07835616438356165,
+      "grad_norm": 1.2224055528640747,
+      "learning_rate": 5.302061001503394e-06,
+      "loss": 0.584,
+      "step": 429
+    },
+    {
+      "epoch": 0.07853881278538813,
+      "grad_norm": 1.8298218250274658,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 0.6796,
+      "step": 430
+    },
+    {
+      "epoch": 0.07872146118721461,
+      "grad_norm": 1.4991016387939453,
+      "learning_rate": 5.012585797388936e-06,
+      "loss": 0.6289,
+      "step": 431
+    },
+    {
+      "epoch": 0.0789041095890411,
+      "grad_norm": 1.097782015800476,
+      "learning_rate": 4.87073578250698e-06,
+      "loss": 0.4951,
+      "step": 432
+    },
+    {
+      "epoch": 0.07908675799086758,
+      "grad_norm": 1.333260416984558,
+      "learning_rate": 4.730818955102234e-06,
+      "loss": 0.6281,
+      "step": 433
+    },
+    {
+      "epoch": 0.07926940639269406,
+      "grad_norm": 1.578722357749939,
+      "learning_rate": 4.592841308745932e-06,
+      "loss": 0.5824,
+      "step": 434
+    },
+    {
+      "epoch": 0.07945205479452055,
+      "grad_norm": 0.9925230741500854,
+      "learning_rate": 4.456808753941205e-06,
+      "loss": 0.4832,
+      "step": 435
+    },
+    {
+      "epoch": 0.07963470319634704,
+      "grad_norm": 1.4724770784378052,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 0.5059,
+      "step": 436
+    },
+    {
+      "epoch": 0.07981735159817352,
+      "grad_norm": 1.2720314264297485,
+      "learning_rate": 4.190602144143207e-06,
+      "loss": 0.5338,
+      "step": 437
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 1.396422028541565,
+      "learning_rate": 4.06043949255509e-06,
+      "loss": 0.4048,
+      "step": 438
+    },
+    {
+      "epoch": 0.08018264840182648,
+      "grad_norm": 1.325544834136963,
+      "learning_rate": 3.932244738840379e-06,
+      "loss": 0.4413,
+      "step": 439
+    },
+    {
+      "epoch": 0.08036529680365297,
+      "grad_norm": 1.644364833831787,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 0.4839,
+      "step": 440
+    },
+    {
+      "epoch": 0.08054794520547945,
+      "grad_norm": 1.1722346544265747,
+      "learning_rate": 3.681780806244095e-06,
+      "loss": 0.3067,
+      "step": 441
+    },
+    {
+      "epoch": 0.08073059360730593,
+      "grad_norm": 1.310457706451416,
+      "learning_rate": 3.5595223564037884e-06,
+      "loss": 0.4886,
+      "step": 442
+    },
+    {
+      "epoch": 0.08091324200913241,
+      "grad_norm": 1.7274094820022583,
+      "learning_rate": 3.4392532620598216e-06,
+      "loss": 0.484,
+      "step": 443
+    },
+    {
+      "epoch": 0.08109589041095891,
+      "grad_norm": 1.5255731344223022,
+      "learning_rate": 3.3209786751399187e-06,
+      "loss": 0.3585,
+      "step": 444
+    },
+    {
+      "epoch": 0.08127853881278539,
+      "grad_norm": 1.4306080341339111,
+      "learning_rate": 3.2047036621337236e-06,
+      "loss": 0.4051,
+      "step": 445
+    },
+    {
+      "epoch": 0.08146118721461187,
+      "grad_norm": 1.2907383441925049,
+      "learning_rate": 3.0904332038757977e-06,
+      "loss": 0.2441,
+      "step": 446
+    },
+    {
+      "epoch": 0.08164383561643836,
+      "grad_norm": 1.5992954969406128,
+      "learning_rate": 2.978172195332263e-06,
+      "loss": 0.4898,
+      "step": 447
+    },
+    {
+      "epoch": 0.08182648401826484,
+      "grad_norm": 1.7128164768218994,
+      "learning_rate": 2.8679254453910785e-06,
+      "loss": 0.5083,
+      "step": 448
+    },
+    {
+      "epoch": 0.08200913242009132,
+      "grad_norm": 1.7184181213378906,
+      "learning_rate": 2.759697676656098e-06,
+      "loss": 0.382,
+      "step": 449
+    },
+    {
+      "epoch": 0.0821917808219178,
+      "grad_norm": 2.048771858215332,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 0.3999,
+      "step": 450
+    },
+    {
+      "epoch": 0.08237442922374429,
+      "grad_norm": 1.1284714937210083,
+      "learning_rate": 2.549317540589308e-06,
+      "loss": 0.5719,
+      "step": 451
+    },
+    {
+      "epoch": 0.08255707762557078,
+      "grad_norm": 1.186042308807373,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.5015,
+      "step": 452
+    },
+    {
+      "epoch": 0.08273972602739726,
+      "grad_norm": 1.1713098287582397,
+      "learning_rate": 2.3470678346851518e-06,
+      "loss": 0.4088,
+      "step": 453
+    },
+    {
+      "epoch": 0.08292237442922375,
+      "grad_norm": 1.1227015256881714,
+      "learning_rate": 2.2490027771406687e-06,
+      "loss": 0.4691,
+      "step": 454
+    },
+    {
+      "epoch": 0.08310502283105023,
+      "grad_norm": 1.1555074453353882,
+      "learning_rate": 2.152983213389559e-06,
+      "loss": 0.4275,
+      "step": 455
+    },
+    {
+      "epoch": 0.08328767123287671,
+      "grad_norm": 1.7054256200790405,
+      "learning_rate": 2.0590132565903476e-06,
+      "loss": 0.4563,
+      "step": 456
+    },
+    {
+      "epoch": 0.0834703196347032,
+      "grad_norm": 1.2316186428070068,
+      "learning_rate": 1.9670969321032407e-06,
+      "loss": 0.5705,
+      "step": 457
+    },
+    {
+      "epoch": 0.08365296803652968,
+      "grad_norm": 1.1939345598220825,
+      "learning_rate": 1.8772381773176417e-06,
+      "loss": 0.5377,
+      "step": 458
+    },
+    {
+      "epoch": 0.08383561643835616,
+      "grad_norm": 1.361952543258667,
+      "learning_rate": 1.7894408414835362e-06,
+      "loss": 0.599,
+      "step": 459
+    },
+    {
+      "epoch": 0.08401826484018265,
+      "grad_norm": 1.2690627574920654,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 0.4931,
+      "step": 460
+    },
+    {
+      "epoch": 0.08420091324200914,
+      "grad_norm": 1.0819649696350098,
+      "learning_rate": 1.620045381987012e-06,
+      "loss": 0.4172,
+      "step": 461
+    },
+    {
+      "epoch": 0.08438356164383562,
+      "grad_norm": 1.047044038772583,
+      "learning_rate": 1.5384545146622852e-06,
+      "loss": 0.468,
+      "step": 462
+    },
+    {
+      "epoch": 0.0845662100456621,
+      "grad_norm": 1.3348311185836792,
+      "learning_rate": 1.4589395786535953e-06,
+      "loss": 0.5959,
+      "step": 463
+    },
+    {
+      "epoch": 0.08474885844748858,
+      "grad_norm": 1.244368076324463,
+      "learning_rate": 1.3815039801161721e-06,
+      "loss": 0.6091,
+      "step": 464
+    },
+    {
+      "epoch": 0.08493150684931507,
+      "grad_norm": 1.2115455865859985,
+      "learning_rate": 1.3061510361333185e-06,
+      "loss": 0.5653,
+      "step": 465
+    },
+    {
+      "epoch": 0.08511415525114155,
+      "grad_norm": 0.9911224842071533,
+      "learning_rate": 1.232883974574367e-06,
+      "loss": 0.422,
+      "step": 466
+    },
+    {
+      "epoch": 0.08529680365296803,
+      "grad_norm": 1.102059006690979,
+      "learning_rate": 1.1617059339563807e-06,
+      "loss": 0.529,
+      "step": 467
+    },
+    {
+      "epoch": 0.08547945205479453,
+      "grad_norm": 1.138898491859436,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 0.5913,
+      "step": 468
+    },
+    {
+      "epoch": 0.08566210045662101,
+      "grad_norm": 1.2904703617095947,
+      "learning_rate": 1.0256290220474307e-06,
+      "loss": 0.6278,
+      "step": 469
+    },
+    {
+      "epoch": 0.08584474885844749,
+      "grad_norm": 0.8520990610122681,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 0.401,
+      "step": 470
+    },
+    {
+      "epoch": 0.08602739726027397,
+      "grad_norm": 1.2092121839523315,
+      "learning_rate": 8.979436164848088e-07,
+      "loss": 0.5386,
+      "step": 471
+    },
+    {
+      "epoch": 0.08621004566210046,
+      "grad_norm": 1.0581966638565063,
+      "learning_rate": 8.372546218022747e-07,
+      "loss": 0.4301,
+      "step": 472
+    },
+    {
+      "epoch": 0.08639269406392694,
+      "grad_norm": 1.1138097047805786,
+      "learning_rate": 7.786715955054203e-07,
+      "loss": 0.552,
+      "step": 473
+    },
+    {
+      "epoch": 0.08657534246575342,
+      "grad_norm": 1.2832486629486084,
+      "learning_rate": 7.221970470961125e-07,
+      "loss": 0.544,
+      "step": 474
+    },
+    {
+      "epoch": 0.0867579908675799,
+      "grad_norm": 1.1421263217926025,
+      "learning_rate": 6.678333957560512e-07,
+      "loss": 0.4701,
+      "step": 475
+    },
+    {
+      "epoch": 0.08694063926940639,
+      "grad_norm": 1.7712730169296265,
+      "learning_rate": 6.15582970243117e-07,
+      "loss": 0.4951,
+      "step": 476
+    },
+    {
+      "epoch": 0.08712328767123288,
+      "grad_norm": 1.3618603944778442,
+      "learning_rate": 5.654480087916303e-07,
+      "loss": 0.7162,
+      "step": 477
+    },
+    {
+      "epoch": 0.08730593607305936,
+      "grad_norm": 1.3067758083343506,
+      "learning_rate": 5.174306590164879e-07,
+      "loss": 0.5436,
+      "step": 478
+    },
+    {
+      "epoch": 0.08748858447488585,
+      "grad_norm": 1.1023305654525757,
+      "learning_rate": 4.715329778211375e-07,
+      "loss": 0.5493,
+      "step": 479
+    },
+    {
+      "epoch": 0.08767123287671233,
+      "grad_norm": 1.245082974433899,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 0.604,
+      "step": 480
+    },
+    {
+      "epoch": 0.08785388127853881,
+      "grad_norm": 1.2294963598251343,
+      "learning_rate": 3.8610439470164737e-07,
+      "loss": 0.5481,
+      "step": 481
+    },
+    {
+      "epoch": 0.0880365296803653,
+      "grad_norm": 1.3997677564620972,
+      "learning_rate": 3.465771522536854e-07,
+      "loss": 0.4414,
+      "step": 482
+    },
+    {
+      "epoch": 0.08821917808219178,
+      "grad_norm": 1.1400816440582275,
+      "learning_rate": 3.09176897181096e-07,
+      "loss": 0.4481,
+      "step": 483
+    },
+    {
+      "epoch": 0.08840182648401826,
+      "grad_norm": 1.1948268413543701,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 0.456,
+      "step": 484
+    },
+    {
+      "epoch": 0.08858447488584476,
+      "grad_norm": 1.2014074325561523,
+      "learning_rate": 2.407636663901591e-07,
+      "loss": 0.4554,
+      "step": 485
+    },
+    {
+      "epoch": 0.08876712328767124,
+      "grad_norm": 1.2776867151260376,
+      "learning_rate": 2.0975362126691712e-07,
+      "loss": 0.4105,
+      "step": 486
+    },
+    {
+      "epoch": 0.08894977168949772,
+      "grad_norm": 1.259320616722107,
+      "learning_rate": 1.8087642458373134e-07,
+      "loss": 0.4406,
+      "step": 487
+    },
+    {
+      "epoch": 0.0891324200913242,
+      "grad_norm": 1.1561946868896484,
+      "learning_rate": 1.5413331334360182e-07,
+      "loss": 0.4482,
+      "step": 488
+    },
+    {
+      "epoch": 0.08931506849315068,
+      "grad_norm": 1.4489840269088745,
+      "learning_rate": 1.2952543313240472e-07,
+      "loss": 0.4719,
+      "step": 489
+    },
+    {
+      "epoch": 0.08949771689497717,
+      "grad_norm": 1.1808651685714722,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 0.337,
+      "step": 490
+    },
+    {
+      "epoch": 0.08968036529680365,
+      "grad_norm": 1.3887622356414795,
+      "learning_rate": 8.671949076420882e-08,
+      "loss": 0.4287,
+      "step": 491
+    },
+    {
+      "epoch": 0.08986301369863013,
+      "grad_norm": 1.3295773267745972,
+      "learning_rate": 6.852326227130834e-08,
+      "loss": 0.4468,
+      "step": 492
+    },
+    {
+      "epoch": 0.09004566210045663,
+      "grad_norm": 1.7511096000671387,
+      "learning_rate": 5.246593205699424e-08,
+      "loss": 0.4839,
+      "step": 493
+    },
+    {
+      "epoch": 0.09022831050228311,
+      "grad_norm": 1.6349002122879028,
+      "learning_rate": 3.8548187963854956e-08,
+      "loss": 0.3704,
+      "step": 494
+    },
+    {
+      "epoch": 0.09041095890410959,
+      "grad_norm": 1.719805121421814,
+      "learning_rate": 2.6770626181715773e-08,
+      "loss": 0.4897,
+      "step": 495
+    },
+    {
+      "epoch": 0.09059360730593607,
+      "grad_norm": 1.7145909070968628,
+      "learning_rate": 1.7133751222137007e-08,
+      "loss": 0.3541,
+      "step": 496
+    },
+    {
+      "epoch": 0.09077625570776256,
+      "grad_norm": 1.6819974184036255,
+      "learning_rate": 9.637975896759077e-09,
+      "loss": 0.3164,
+      "step": 497
+    },
+    {
+      "epoch": 0.09095890410958904,
+      "grad_norm": 2.2097480297088623,
+      "learning_rate": 4.2836212996499865e-09,
+      "loss": 0.3985,
+      "step": 498
+    },
+    {
+      "epoch": 0.09114155251141552,
+      "grad_norm": 1.636614441871643,
+      "learning_rate": 1.0709167935385455e-09,
+      "loss": 0.3184,
+      "step": 499
+    },
+    {
+      "epoch": 0.091324200913242,
+      "grad_norm": 2.1838996410369873,
+      "learning_rate": 0.0,
+      "loss": 0.4692,
+      "step": 500
+    },
+    {
+      "epoch": 0.091324200913242,
+      "eval_loss": 0.5021965503692627,
+      "eval_runtime": 574.0387,
+      "eval_samples_per_second": 16.063,
+      "eval_steps_per_second": 4.017,
+      "step": 500
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.8642412158976e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null