Training in progress, step 9000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +83 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f28de0041a3996b92bdb106301466f87586414e5ae7e2fd505f291b51c856ec9
 size 2373352

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d2fecf77d5e89bebc7e52b1e7ad4c3700095ba5ed6bf915b63994136730606c
 size 2373352

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b1c22ce2b4809c251cd311d01b0d15a3af6840e923cc4f5931c495ec63e7fcfe
 size 4899962

 version https://git-lfs.github.com/spec/v1
+oid sha256:a5fa1150d4dd02cbaddb889caf0142f62f01de73f9465e86ec66568c57020a43
 size 4899962

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:270b6287a73e2067396ab6a3e2393f5969fb8398ad020b771c6503e2e2615a33
 size 14308

 version https://git-lfs.github.com/spec/v1
+oid sha256:6430fbc179aa58d1d62e103957529b88997bd94d6e982250b44b521c1bed6dbe
 size 14308

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:30dcb9447a9b271d3d135b50e56f91526062051264deca7285bd511e97fe15b3
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5ac049b3a3b9b8a4c04401dcee0ce067c5d564756ad040ddd31682eb4723f5c4
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 11.740056037902832,
   "best_model_checkpoint": "miner_id_24/checkpoint-8000",
-  "epoch": 2.5779058306164226,
   "eval_steps": 500,
-  "global_step": 8500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1341,6 +1341,84 @@
       "eval_samples_per_second": 56.958,
       "eval_steps_per_second": 14.245,
       "step": 8500
     }
   ],
   "logging_steps": 50,
@@ -1355,7 +1433,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -1364,12 +1442,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 9372318253056000.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 11.740056037902832,
   "best_model_checkpoint": "miner_id_24/checkpoint-8000",
+  "epoch": 2.729547350064448,
   "eval_steps": 500,
+  "global_step": 9000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 56.958,
       "eval_steps_per_second": 14.245,
       "step": 8500
+    },
+    {
+      "epoch": 2.593069982561225,
+      "grad_norm": 0.09178922325372696,
+      "learning_rate": 1.2966973838057032e-06,
+      "loss": 11.7532,
+      "step": 8550
+    },
+    {
+      "epoch": 2.6082341345060276,
+      "grad_norm": 0.0969797670841217,
+      "learning_rate": 1.0250034170112066e-06,
+      "loss": 11.7431,
+      "step": 8600
+    },
+    {
+      "epoch": 2.62339828645083,
+      "grad_norm": 0.09565451741218567,
+      "learning_rate": 7.850739606764662e-07,
+      "loss": 11.7516,
+      "step": 8650
+    },
+    {
+      "epoch": 2.638562438395633,
+      "grad_norm": 0.09562574326992035,
+      "learning_rate": 5.769837511778591e-07,
+      "loss": 11.7592,
+      "step": 8700
+    },
+    {
+      "epoch": 2.653726590340435,
+      "grad_norm": 0.10010024160146713,
+      "learning_rate": 4.0079760718522074e-07,
+      "loss": 11.7424,
+      "step": 8750
+    },
+    {
+      "epoch": 2.668890742285238,
+      "grad_norm": 0.09608300030231476,
+      "learning_rate": 2.5657040947133024e-07,
+      "loss": 11.7489,
+      "step": 8800
+    },
+    {
+      "epoch": 2.68405489423004,
+      "grad_norm": 0.0922103077173233,
+      "learning_rate": 1.443470838168276e-07,
+      "loss": 11.7536,
+      "step": 8850
+    },
+    {
+      "epoch": 2.699219046174843,
+      "grad_norm": 0.08855465799570084,
+      "learning_rate": 6.416258701624544e-08,
+      "loss": 11.7358,
+      "step": 8900
+    },
+    {
+      "epoch": 2.714383198119645,
+      "grad_norm": 0.09823824465274811,
+      "learning_rate": 1.6041895989147846e-08,
+      "loss": 11.7648,
+      "step": 8950
+    },
+    {
+      "epoch": 2.729547350064448,
+      "grad_norm": 0.09125322103500366,
+      "learning_rate": 0.0,
+      "loss": 11.7279,
+      "step": 9000
+    },
+    {
+      "epoch": 2.729547350064448,
+      "eval_loss": 11.740180015563965,
+      "eval_runtime": 82.7885,
+      "eval_samples_per_second": 67.087,
+      "eval_steps_per_second": 16.778,
+      "step": 9000
     }
   ],
   "logging_steps": 50,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 9923902488576000.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null