Training in progress, step 500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +714 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b085157f0473441fb9977d3df11a7a4e193b6bdf47d705257fba06b83141453a
 size 289452128

 version https://git-lfs.github.com/spec/v1
+oid sha256:e3df5ef1d2ce5b02479ceb8e3ff2c26303b53b7924fe7487ea356dcac19c2031
 size 289452128

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:034873755ff7773119caf8c281745405cc671d8e01312b19148a01f4c30f2e83
 size 147360212

 version https://git-lfs.github.com/spec/v1
+oid sha256:f88b8faa4dc76b21ec9d652300c7f1c59b14f5c7e5ac47c3b13c0b8d662d7ce3
 size 147360212

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c42e7102d7712bedf568055f461ee69fd9419d0183ab66940d3e4fb3204d09fe
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8951b704db56af5e70fa3834be5689c67fd8794fc01b847766f858aee8282b5f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:505f9225762b105f8ca5168f44d99b2f8467174f4ade85f1cc95f684fbd828e0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe1d153de177b356f9e3a70d6e4ec979560b0c300994e71ca4cb89afc74c5b3a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.6768385171890259,
-  "best_model_checkpoint": "miner_id_24/checkpoint-400",
-  "epoch": 0.07969318125217911,
   "eval_steps": 100,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2847,6 +2847,714 @@
       "eval_samples_per_second": 31.851,
       "eval_steps_per_second": 7.965,
       "step": 400
     }
   ],
   "logging_steps": 1,
@@ -2870,12 +3578,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.3302341221692211e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.6748951077461243,
+  "best_model_checkpoint": "miner_id_24/checkpoint-500",
+  "epoch": 0.09961647656522389,
   "eval_steps": 100,
+  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 31.851,
       "eval_steps_per_second": 7.965,
       "step": 400
+    },
+    {
+      "epoch": 0.07989241420530956,
+      "grad_norm": 0.19331474602222443,
+      "learning_rate": 1.013396731136465e-05,
+      "loss": 0.5627,
+      "step": 401
+    },
+    {
+      "epoch": 0.08009164715844,
+      "grad_norm": 0.24819611012935638,
+      "learning_rate": 9.937309365446973e-06,
+      "loss": 0.7396,
+      "step": 402
+    },
+    {
+      "epoch": 0.08029088011157046,
+      "grad_norm": 0.2520431578159332,
+      "learning_rate": 9.742367571857091e-06,
+      "loss": 0.6631,
+      "step": 403
+    },
+    {
+      "epoch": 0.0804901130647009,
+      "grad_norm": 0.21120384335517883,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.6727,
+      "step": 404
+    },
+    {
+      "epoch": 0.08068934601783134,
+      "grad_norm": 0.20902977883815765,
+      "learning_rate": 9.357665770419244e-06,
+      "loss": 0.6242,
+      "step": 405
+    },
+    {
+      "epoch": 0.0808885789709618,
+      "grad_norm": 0.18309302628040314,
+      "learning_rate": 9.167922241916055e-06,
+      "loss": 0.5031,
+      "step": 406
+    },
+    {
+      "epoch": 0.08108781192409224,
+      "grad_norm": 0.21523945033550262,
+      "learning_rate": 8.97992782372432e-06,
+      "loss": 0.6427,
+      "step": 407
+    },
+    {
+      "epoch": 0.08128704487722269,
+      "grad_norm": 0.22867245972156525,
+      "learning_rate": 8.793690568899216e-06,
+      "loss": 0.6857,
+      "step": 408
+    },
+    {
+      "epoch": 0.08148627783035314,
+      "grad_norm": 0.2331218272447586,
+      "learning_rate": 8.609218455224893e-06,
+      "loss": 0.6583,
+      "step": 409
+    },
+    {
+      "epoch": 0.08168551078348359,
+      "grad_norm": 0.2502421736717224,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 0.7734,
+      "step": 410
+    },
+    {
+      "epoch": 0.08188474373661403,
+      "grad_norm": 0.22778432071208954,
+      "learning_rate": 8.245601184062852e-06,
+      "loss": 0.6216,
+      "step": 411
+    },
+    {
+      "epoch": 0.08208397668974449,
+      "grad_norm": 0.20863741636276245,
+      "learning_rate": 8.066471602728803e-06,
+      "loss": 0.5912,
+      "step": 412
+    },
+    {
+      "epoch": 0.08228320964287493,
+      "grad_norm": 0.22159984707832336,
+      "learning_rate": 7.889138314185678e-06,
+      "loss": 0.6247,
+      "step": 413
+    },
+    {
+      "epoch": 0.08248244259600537,
+      "grad_norm": 0.2666146457195282,
+      "learning_rate": 7.71360891480134e-06,
+      "loss": 0.748,
+      "step": 414
+    },
+    {
+      "epoch": 0.08268167554913583,
+      "grad_norm": 0.25675085186958313,
+      "learning_rate": 7.539890923671062e-06,
+      "loss": 0.6865,
+      "step": 415
+    },
+    {
+      "epoch": 0.08288090850226627,
+      "grad_norm": 0.24603363871574402,
+      "learning_rate": 7.367991782295391e-06,
+      "loss": 0.6717,
+      "step": 416
+    },
+    {
+      "epoch": 0.08308014145539672,
+      "grad_norm": 0.31044501066207886,
+      "learning_rate": 7.197918854261432e-06,
+      "loss": 0.662,
+      "step": 417
+    },
+    {
+      "epoch": 0.08327937440852717,
+      "grad_norm": 0.22573980689048767,
+      "learning_rate": 7.029679424927365e-06,
+      "loss": 0.6391,
+      "step": 418
+    },
+    {
+      "epoch": 0.08347860736165762,
+      "grad_norm": 0.22419226169586182,
+      "learning_rate": 6.863280701110408e-06,
+      "loss": 0.6233,
+      "step": 419
+    },
+    {
+      "epoch": 0.08367784031478806,
+      "grad_norm": 0.2326180338859558,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.7135,
+      "step": 420
+    },
+    {
+      "epoch": 0.08387707326791852,
+      "grad_norm": 0.2318958342075348,
+      "learning_rate": 6.536033802742813e-06,
+      "loss": 0.6337,
+      "step": 421
+    },
+    {
+      "epoch": 0.08407630622104896,
+      "grad_norm": 0.24011459946632385,
+      "learning_rate": 6.375199646360142e-06,
+      "loss": 0.6822,
+      "step": 422
+    },
+    {
+      "epoch": 0.0842755391741794,
+      "grad_norm": 0.25653573870658875,
+      "learning_rate": 6.216234231230012e-06,
+      "loss": 0.6508,
+      "step": 423
+    },
+    {
+      "epoch": 0.08447477212730986,
+      "grad_norm": 0.22322338819503784,
+      "learning_rate": 6.059144366901736e-06,
+      "loss": 0.6078,
+      "step": 424
+    },
+    {
+      "epoch": 0.0846740050804403,
+      "grad_norm": 0.2577781677246094,
+      "learning_rate": 5.903936782582253e-06,
+      "loss": 0.683,
+      "step": 425
+    },
+    {
+      "epoch": 0.08487323803357075,
+      "grad_norm": 0.22077085077762604,
+      "learning_rate": 5.750618126847912e-06,
+      "loss": 0.6902,
+      "step": 426
+    },
+    {
+      "epoch": 0.0850724709867012,
+      "grad_norm": 0.2472268044948578,
+      "learning_rate": 5.599194967359639e-06,
+      "loss": 0.6837,
+      "step": 427
+    },
+    {
+      "epoch": 0.08527170393983165,
+      "grad_norm": 0.22269569337368011,
+      "learning_rate": 5.449673790581611e-06,
+      "loss": 0.6202,
+      "step": 428
+    },
+    {
+      "epoch": 0.08547093689296209,
+      "grad_norm": 0.21259906888008118,
+      "learning_rate": 5.302061001503394e-06,
+      "loss": 0.6024,
+      "step": 429
+    },
+    {
+      "epoch": 0.08567016984609255,
+      "grad_norm": 0.24221090972423553,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 0.6671,
+      "step": 430
+    },
+    {
+      "epoch": 0.08586940279922299,
+      "grad_norm": 0.24661476910114288,
+      "learning_rate": 5.012585797388936e-06,
+      "loss": 0.658,
+      "step": 431
+    },
+    {
+      "epoch": 0.08606863575235343,
+      "grad_norm": 0.2605218291282654,
+      "learning_rate": 4.87073578250698e-06,
+      "loss": 0.63,
+      "step": 432
+    },
+    {
+      "epoch": 0.08626786870548389,
+      "grad_norm": 0.2187577337026596,
+      "learning_rate": 4.730818955102234e-06,
+      "loss": 0.5845,
+      "step": 433
+    },
+    {
+      "epoch": 0.08646710165861433,
+      "grad_norm": 0.2221374809741974,
+      "learning_rate": 4.592841308745932e-06,
+      "loss": 0.5466,
+      "step": 434
+    },
+    {
+      "epoch": 0.08666633461174478,
+      "grad_norm": 0.2580986022949219,
+      "learning_rate": 4.456808753941205e-06,
+      "loss": 0.6753,
+      "step": 435
+    },
+    {
+      "epoch": 0.08686556756487523,
+      "grad_norm": 0.2541551887989044,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 0.655,
+      "step": 436
+    },
+    {
+      "epoch": 0.08706480051800568,
+      "grad_norm": 0.31278523802757263,
+      "learning_rate": 4.190602144143207e-06,
+      "loss": 0.7489,
+      "step": 437
+    },
+    {
+      "epoch": 0.08726403347113612,
+      "grad_norm": 0.2648221552371979,
+      "learning_rate": 4.06043949255509e-06,
+      "loss": 0.6957,
+      "step": 438
+    },
+    {
+      "epoch": 0.08746326642426658,
+      "grad_norm": 0.2753226161003113,
+      "learning_rate": 3.932244738840379e-06,
+      "loss": 0.6294,
+      "step": 439
+    },
+    {
+      "epoch": 0.08766249937739702,
+      "grad_norm": 0.26904359459877014,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 0.7311,
+      "step": 440
+    },
+    {
+      "epoch": 0.08786173233052746,
+      "grad_norm": 0.26399528980255127,
+      "learning_rate": 3.681780806244095e-06,
+      "loss": 0.6948,
+      "step": 441
+    },
+    {
+      "epoch": 0.08806096528365792,
+      "grad_norm": 0.2822622060775757,
+      "learning_rate": 3.5595223564037884e-06,
+      "loss": 0.6786,
+      "step": 442
+    },
+    {
+      "epoch": 0.08826019823678836,
+      "grad_norm": 0.265007346868515,
+      "learning_rate": 3.4392532620598216e-06,
+      "loss": 0.7124,
+      "step": 443
+    },
+    {
+      "epoch": 0.08845943118991881,
+      "grad_norm": 0.257730633020401,
+      "learning_rate": 3.3209786751399187e-06,
+      "loss": 0.6245,
+      "step": 444
+    },
+    {
+      "epoch": 0.08865866414304927,
+      "grad_norm": 0.30108872056007385,
+      "learning_rate": 3.2047036621337236e-06,
+      "loss": 0.7926,
+      "step": 445
+    },
+    {
+      "epoch": 0.08885789709617971,
+      "grad_norm": 0.3035549819469452,
+      "learning_rate": 3.0904332038757977e-06,
+      "loss": 0.8031,
+      "step": 446
+    },
+    {
+      "epoch": 0.08905713004931015,
+      "grad_norm": 0.2673462927341461,
+      "learning_rate": 2.978172195332263e-06,
+      "loss": 0.6643,
+      "step": 447
+    },
+    {
+      "epoch": 0.08925636300244061,
+      "grad_norm": 0.3156924545764923,
+      "learning_rate": 2.8679254453910785e-06,
+      "loss": 0.7175,
+      "step": 448
+    },
+    {
+      "epoch": 0.08945559595557105,
+      "grad_norm": 0.29648149013519287,
+      "learning_rate": 2.759697676656098e-06,
+      "loss": 0.7006,
+      "step": 449
+    },
+    {
+      "epoch": 0.0896548289087015,
+      "grad_norm": 0.304087370634079,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 0.6956,
+      "step": 450
+    },
+    {
+      "epoch": 0.08985406186183195,
+      "grad_norm": 0.18861311674118042,
+      "learning_rate": 2.549317540589308e-06,
+      "loss": 0.5796,
+      "step": 451
+    },
+    {
+      "epoch": 0.0900532948149624,
+      "grad_norm": 0.20030918717384338,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.5735,
+      "step": 452
+    },
+    {
+      "epoch": 0.09025252776809284,
+      "grad_norm": 0.20259323716163635,
+      "learning_rate": 2.3470678346851518e-06,
+      "loss": 0.5841,
+      "step": 453
+    },
+    {
+      "epoch": 0.0904517607212233,
+      "grad_norm": 0.22177210450172424,
+      "learning_rate": 2.2490027771406687e-06,
+      "loss": 0.6743,
+      "step": 454
+    },
+    {
+      "epoch": 0.09065099367435374,
+      "grad_norm": 0.20536138117313385,
+      "learning_rate": 2.152983213389559e-06,
+      "loss": 0.6299,
+      "step": 455
+    },
+    {
+      "epoch": 0.09085022662748418,
+      "grad_norm": 0.21932297945022583,
+      "learning_rate": 2.0590132565903476e-06,
+      "loss": 0.7102,
+      "step": 456
+    },
+    {
+      "epoch": 0.09104945958061464,
+      "grad_norm": 0.1917608678340912,
+      "learning_rate": 1.9670969321032407e-06,
+      "loss": 0.5939,
+      "step": 457
+    },
+    {
+      "epoch": 0.09124869253374508,
+      "grad_norm": 0.21632328629493713,
+      "learning_rate": 1.8772381773176417e-06,
+      "loss": 0.6413,
+      "step": 458
+    },
+    {
+      "epoch": 0.09144792548687553,
+      "grad_norm": 0.23702724277973175,
+      "learning_rate": 1.7894408414835362e-06,
+      "loss": 0.6321,
+      "step": 459
+    },
+    {
+      "epoch": 0.09164715844000598,
+      "grad_norm": 0.2379865050315857,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 0.6566,
+      "step": 460
+    },
+    {
+      "epoch": 0.09184639139313643,
+      "grad_norm": 0.23803111910820007,
+      "learning_rate": 1.620045381987012e-06,
+      "loss": 0.6265,
+      "step": 461
+    },
+    {
+      "epoch": 0.09204562434626687,
+      "grad_norm": 0.23217985033988953,
+      "learning_rate": 1.5384545146622852e-06,
+      "loss": 0.7032,
+      "step": 462
+    },
+    {
+      "epoch": 0.09224485729939733,
+      "grad_norm": 0.2249220609664917,
+      "learning_rate": 1.4589395786535953e-06,
+      "loss": 0.608,
+      "step": 463
+    },
+    {
+      "epoch": 0.09244409025252777,
+      "grad_norm": 0.21133165061473846,
+      "learning_rate": 1.3815039801161721e-06,
+      "loss": 0.6084,
+      "step": 464
+    },
+    {
+      "epoch": 0.09264332320565821,
+      "grad_norm": 0.22552044689655304,
+      "learning_rate": 1.3061510361333185e-06,
+      "loss": 0.6371,
+      "step": 465
+    },
+    {
+      "epoch": 0.09284255615878867,
+      "grad_norm": 0.2244846075773239,
+      "learning_rate": 1.232883974574367e-06,
+      "loss": 0.654,
+      "step": 466
+    },
+    {
+      "epoch": 0.09304178911191911,
+      "grad_norm": 0.24191239476203918,
+      "learning_rate": 1.1617059339563807e-06,
+      "loss": 0.7616,
+      "step": 467
+    },
+    {
+      "epoch": 0.09324102206504956,
+      "grad_norm": 0.22811263799667358,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 0.6378,
+      "step": 468
+    },
+    {
+      "epoch": 0.09344025501818001,
+      "grad_norm": 0.22340433299541473,
+      "learning_rate": 1.0256290220474307e-06,
+      "loss": 0.596,
+      "step": 469
+    },
+    {
+      "epoch": 0.09363948797131046,
+      "grad_norm": 0.24107715487480164,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 0.6633,
+      "step": 470
+    },
+    {
+      "epoch": 0.0938387209244409,
+      "grad_norm": 0.24450042843818665,
+      "learning_rate": 8.979436164848088e-07,
+      "loss": 0.706,
+      "step": 471
+    },
+    {
+      "epoch": 0.09403795387757136,
+      "grad_norm": 0.24326194822788239,
+      "learning_rate": 8.372546218022747e-07,
+      "loss": 0.6342,
+      "step": 472
+    },
+    {
+      "epoch": 0.0942371868307018,
+      "grad_norm": 0.25381046533584595,
+      "learning_rate": 7.786715955054203e-07,
+      "loss": 0.6603,
+      "step": 473
+    },
+    {
+      "epoch": 0.09443641978383224,
+      "grad_norm": 0.24382087588310242,
+      "learning_rate": 7.221970470961125e-07,
+      "loss": 0.638,
+      "step": 474
+    },
+    {
+      "epoch": 0.0946356527369627,
+      "grad_norm": 0.27060163021087646,
+      "learning_rate": 6.678333957560512e-07,
+      "loss": 0.7178,
+      "step": 475
+    },
+    {
+      "epoch": 0.09483488569009314,
+      "grad_norm": 0.24051162600517273,
+      "learning_rate": 6.15582970243117e-07,
+      "loss": 0.5669,
+      "step": 476
+    },
+    {
+      "epoch": 0.09503411864322359,
+      "grad_norm": 0.2567567527294159,
+      "learning_rate": 5.654480087916303e-07,
+      "loss": 0.6944,
+      "step": 477
+    },
+    {
+      "epoch": 0.09523335159635404,
+      "grad_norm": 0.2464078813791275,
+      "learning_rate": 5.174306590164879e-07,
+      "loss": 0.7134,
+      "step": 478
+    },
+    {
+      "epoch": 0.09543258454948449,
+      "grad_norm": 0.2505365014076233,
+      "learning_rate": 4.715329778211375e-07,
+      "loss": 0.7042,
+      "step": 479
+    },
+    {
+      "epoch": 0.09563181750261493,
+      "grad_norm": 0.253229022026062,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 0.6158,
+      "step": 480
+    },
+    {
+      "epoch": 0.09583105045574539,
+      "grad_norm": 0.23370255529880524,
+      "learning_rate": 3.8610439470164737e-07,
+      "loss": 0.6129,
+      "step": 481
+    },
+    {
+      "epoch": 0.09603028340887583,
+      "grad_norm": 0.23140022158622742,
+      "learning_rate": 3.465771522536854e-07,
+      "loss": 0.5864,
+      "step": 482
+    },
+    {
+      "epoch": 0.09622951636200627,
+      "grad_norm": 0.2647562026977539,
+      "learning_rate": 3.09176897181096e-07,
+      "loss": 0.6768,
+      "step": 483
+    },
+    {
+      "epoch": 0.09642874931513673,
+      "grad_norm": 0.2301555573940277,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 0.6862,
+      "step": 484
+    },
+    {
+      "epoch": 0.09662798226826717,
+      "grad_norm": 0.25709620118141174,
+      "learning_rate": 2.407636663901591e-07,
+      "loss": 0.6935,
+      "step": 485
+    },
+    {
+      "epoch": 0.09682721522139762,
+      "grad_norm": 0.27146580815315247,
+      "learning_rate": 2.0975362126691712e-07,
+      "loss": 0.7363,
+      "step": 486
+    },
+    {
+      "epoch": 0.09702644817452807,
+      "grad_norm": 0.25563889741897583,
+      "learning_rate": 1.8087642458373134e-07,
+      "loss": 0.6381,
+      "step": 487
+    },
+    {
+      "epoch": 0.09722568112765852,
+      "grad_norm": 0.2607404291629791,
+      "learning_rate": 1.5413331334360182e-07,
+      "loss": 0.6381,
+      "step": 488
+    },
+    {
+      "epoch": 0.09742491408078896,
+      "grad_norm": 0.25664660334587097,
+      "learning_rate": 1.2952543313240472e-07,
+      "loss": 0.7206,
+      "step": 489
+    },
+    {
+      "epoch": 0.0976241470339194,
+      "grad_norm": 0.26443812251091003,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 0.6878,
+      "step": 490
+    },
+    {
+      "epoch": 0.09782337998704986,
+      "grad_norm": 0.24642571806907654,
+      "learning_rate": 8.671949076420882e-08,
+      "loss": 0.6992,
+      "step": 491
+    },
+    {
+      "epoch": 0.0980226129401803,
+      "grad_norm": 0.25187572836875916,
+      "learning_rate": 6.852326227130834e-08,
+      "loss": 0.7137,
+      "step": 492
+    },
+    {
+      "epoch": 0.09822184589331075,
+      "grad_norm": 0.3068501651287079,
+      "learning_rate": 5.246593205699424e-08,
+      "loss": 0.7202,
+      "step": 493
+    },
+    {
+      "epoch": 0.0984210788464412,
+      "grad_norm": 0.2589758634567261,
+      "learning_rate": 3.8548187963854956e-08,
+      "loss": 0.6706,
+      "step": 494
+    },
+    {
+      "epoch": 0.09862031179957165,
+      "grad_norm": 0.2692635953426361,
+      "learning_rate": 2.6770626181715773e-08,
+      "loss": 0.6816,
+      "step": 495
+    },
+    {
+      "epoch": 0.09881954475270209,
+      "grad_norm": 0.2632291615009308,
+      "learning_rate": 1.7133751222137007e-08,
+      "loss": 0.6855,
+      "step": 496
+    },
+    {
+      "epoch": 0.09901877770583255,
+      "grad_norm": 0.27418622374534607,
+      "learning_rate": 9.637975896759077e-09,
+      "loss": 0.7329,
+      "step": 497
+    },
+    {
+      "epoch": 0.09921801065896299,
+      "grad_norm": 0.2903919816017151,
+      "learning_rate": 4.2836212996499865e-09,
+      "loss": 0.7565,
+      "step": 498
+    },
+    {
+      "epoch": 0.09941724361209343,
+      "grad_norm": 0.3054603636264801,
+      "learning_rate": 1.0709167935385455e-09,
+      "loss": 0.7183,
+      "step": 499
+    },
+    {
+      "epoch": 0.09961647656522389,
+      "grad_norm": 0.30139267444610596,
+      "learning_rate": 0.0,
+      "loss": 0.6969,
+      "step": 500
+    },
+    {
+      "epoch": 0.09961647656522389,
+      "eval_loss": 0.6748951077461243,
+      "eval_runtime": 265.2168,
+      "eval_samples_per_second": 31.876,
+      "eval_steps_per_second": 7.971,
+      "step": 500
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.6627926527115264e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null