Training in progress, step 425, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +179 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:134499e57d45a7fd5ce0fc745d89b7d5235a5c637b45907532bfc81ea64c4fb3
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:a9ece51e98aa5e08bb825978d3489e0352183f84e442b36189d9c7c900ccfa7b
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:07ca44ce0f454b04e9a008cc43f4a349cc7cacf9880fc83effba51973d8c4e74
 size 341314644

 version https://git-lfs.github.com/spec/v1
+oid sha256:307dcb2034acb404762de1201cda3ef739d641f9fded961a58856b8c457cf964
 size 341314644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:859771627dc023f016f01621bc5c786af762f9d6b951abdbadba930557ca3e1a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f714b289dd09c25736d2eb5fc6375b2e8df7d87d36c1242bd537b49bf96a11fa
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ca2768ac4ca9856b6b7269d283904106ae00a0ce974111c991dbbe02a93bf930
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3f0fa4917cbab6353dc9863ee5bc208db299bf865d0e1def1c60860a29287ca8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.053612470626831,
   "best_model_checkpoint": "miner_id_24/checkpoint-400",
-  "epoch": 0.9422850412249706,
   "eval_steps": 100,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2847,6 +2847,181 @@
       "eval_samples_per_second": 9.993,
       "eval_steps_per_second": 2.502,
       "step": 400
     }
   ],
   "logging_steps": 1,
@@ -2870,12 +3045,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.884979311961702e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.053612470626831,
   "best_model_checkpoint": "miner_id_24/checkpoint-400",
+  "epoch": 1.0011778563015312,
   "eval_steps": 100,
+  "global_step": 425,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.993,
       "eval_steps_per_second": 2.502,
       "step": 400
+    },
+    {
+      "epoch": 0.944640753828033,
+      "grad_norm": 2.428661346435547,
+      "learning_rate": 8.639676646793382e-07,
+      "loss": 2.9634,
+      "step": 401
+    },
+    {
+      "epoch": 0.9469964664310954,
+      "grad_norm": 2.951463460922241,
+      "learning_rate": 7.936574269178377e-07,
+      "loss": 3.2676,
+      "step": 402
+    },
+    {
+      "epoch": 0.9493521790341578,
+      "grad_norm": 2.77046537399292,
+      "learning_rate": 7.263079859864297e-07,
+      "loss": 3.3325,
+      "step": 403
+    },
+    {
+      "epoch": 0.9517078916372202,
+      "grad_norm": 2.883030652999878,
+      "learning_rate": 6.61923394371039e-07,
+      "loss": 3.2712,
+      "step": 404
+    },
+    {
+      "epoch": 0.9540636042402827,
+      "grad_norm": 2.923128843307495,
+      "learning_rate": 6.005075261595494e-07,
+      "loss": 3.4014,
+      "step": 405
+    },
+    {
+      "epoch": 0.9564193168433451,
+      "grad_norm": 2.9878504276275635,
+      "learning_rate": 5.42064076808646e-07,
+      "loss": 3.7119,
+      "step": 406
+    },
+    {
+      "epoch": 0.9587750294464076,
+      "grad_norm": 3.2617733478546143,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 3.5583,
+      "step": 407
+    },
+    {
+      "epoch": 0.9611307420494699,
+      "grad_norm": 3.0995283126831055,
+      "learning_rate": 4.341083220360864e-07,
+      "loss": 3.511,
+      "step": 408
+    },
+    {
+      "epoch": 0.9634864546525324,
+      "grad_norm": 3.3939716815948486,
+      "learning_rate": 3.846025124245145e-07,
+      "loss": 3.6265,
+      "step": 409
+    },
+    {
+      "epoch": 0.9658421672555948,
+      "grad_norm": 3.365983486175537,
+      "learning_rate": 3.380821129028489e-07,
+      "loss": 3.8739,
+      "step": 410
+    },
+    {
+      "epoch": 0.9681978798586572,
+      "grad_norm": 3.7531888484954834,
+      "learning_rate": 2.945499226519322e-07,
+      "loss": 3.5749,
+      "step": 411
+    },
+    {
+      "epoch": 0.9705535924617197,
+      "grad_norm": 4.2459940910339355,
+      "learning_rate": 2.5400856104894067e-07,
+      "loss": 4.4775,
+      "step": 412
+    },
+    {
+      "epoch": 0.9729093050647821,
+      "grad_norm": 4.396928310394287,
+      "learning_rate": 2.1646046750978254e-07,
+      "loss": 4.078,
+      "step": 413
+    },
+    {
+      "epoch": 0.9752650176678446,
+      "grad_norm": 4.993990421295166,
+      "learning_rate": 1.819079013423153e-07,
+      "loss": 5.1984,
+      "step": 414
+    },
+    {
+      "epoch": 0.9776207302709069,
+      "grad_norm": 8.399889945983887,
+      "learning_rate": 1.503529416103988e-07,
+      "loss": 4.4625,
+      "step": 415
+    },
+    {
+      "epoch": 0.9799764428739693,
+      "grad_norm": 5.020269870758057,
+      "learning_rate": 1.2179748700879012e-07,
+      "loss": 4.6616,
+      "step": 416
+    },
+    {
+      "epoch": 0.9823321554770318,
+      "grad_norm": 6.619906902313232,
+      "learning_rate": 9.624325574890125e-08,
+      "loss": 5.0221,
+      "step": 417
+    },
+    {
+      "epoch": 0.9846878680800942,
+      "grad_norm": 6.48217248916626,
+      "learning_rate": 7.369178545542088e-08,
+      "loss": 4.5943,
+      "step": 418
+    },
+    {
+      "epoch": 0.9870435806831567,
+      "grad_norm": 7.419217109680176,
+      "learning_rate": 5.4144433073771707e-08,
+      "loss": 5.5227,
+      "step": 419
+    },
+    {
+      "epoch": 0.9893992932862191,
+      "grad_norm": 8.479371070861816,
+      "learning_rate": 3.760237478849793e-08,
+      "loss": 5.264,
+      "step": 420
+    },
+    {
+      "epoch": 0.9917550058892816,
+      "grad_norm": 18.458097457885742,
+      "learning_rate": 2.4066605952444142e-08,
+      "loss": 5.3962,
+      "step": 421
+    },
+    {
+      "epoch": 0.9941107184923439,
+      "grad_norm": 10.58730697631836,
+      "learning_rate": 1.3537941026914303e-08,
+      "loss": 4.2135,
+      "step": 422
+    },
+    {
+      "epoch": 0.9964664310954063,
+      "grad_norm": 12.496702194213867,
+      "learning_rate": 6.017013532627624e-09,
+      "loss": 4.5984,
+      "step": 423
+    },
+    {
+      "epoch": 0.9988221436984688,
+      "grad_norm": 18.563827514648438,
+      "learning_rate": 1.5042760116212861e-09,
+      "loss": 4.4128,
+      "step": 424
+    },
+    {
+      "epoch": 1.0011778563015312,
+      "grad_norm": 32.98548889160156,
+      "learning_rate": 0.0,
+      "loss": 5.637,
+      "step": 425
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.258827541809725e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null