Training in progress, step 9000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +83 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b9a09bf824d1bff7f721989176c12ec92b02b40ca78e44d7c8e4c738314072ac
 size 2373352

 version https://git-lfs.github.com/spec/v1
+oid sha256:fb70ce200be730fd5499defcbfcb51225d5644fc671500c2d1e76726ae7a7b69
 size 2373352

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e83caef5497bb67bd04b39079e16ace1f78ec4498cbd170d4a7b16c79af73738
 size 4899962

 version https://git-lfs.github.com/spec/v1
+oid sha256:880fe8b8b6ca7aa262d4bc8e11156fdeb25e208eed751939d85c62702edcc8a6
 size 4899962

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8646e34fc890e21269375270a6e0ce2a32bcc2b1ad55fb196f6b1c226c2916f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:9a517b9ab4a35704cbd42e43ce8e92db5b3f21b720728caa748dca82d0f6c7c9
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04dd85931f65b5c9f16df9d76eecddafd1b76e4a002da7e064432627ec5efeee
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:183dda8021864ec2543194942c7e9cf691d24dcbf84a0d87f80ed861dca5e467
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 11.543472290039062,
   "best_model_checkpoint": "miner_id_24/checkpoint-8500",
-  "epoch": 2.443406395975566,
   "eval_steps": 500,
-  "global_step": 8500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1341,6 +1341,84 @@
       "eval_samples_per_second": 66.152,
       "eval_steps_per_second": 16.541,
       "step": 8500
     }
   ],
   "logging_steps": 50,
@@ -1355,7 +1433,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -1364,12 +1442,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 9372593700864000.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 11.543472290039062,
   "best_model_checkpoint": "miner_id_24/checkpoint-8500",
+  "epoch": 2.5871361839741285,
   "eval_steps": 500,
+  "global_step": 9000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 66.152,
       "eval_steps_per_second": 16.541,
       "step": 8500
+    },
+    {
+      "epoch": 2.457779374775422,
+      "grad_norm": 0.19527162611484528,
+      "learning_rate": 1.3155813262883107e-06,
+      "loss": 11.552,
+      "step": 8550
+    },
+    {
+      "epoch": 2.4721523535752783,
+      "grad_norm": 0.20165836811065674,
+      "learning_rate": 1.0399306512395251e-06,
+      "loss": 11.5652,
+      "step": 8600
+    },
+    {
+      "epoch": 2.486525332375135,
+      "grad_norm": 0.20157189667224884,
+      "learning_rate": 7.965070766086479e-07,
+      "loss": 11.5603,
+      "step": 8650
+    },
+    {
+      "epoch": 2.500898311174991,
+      "grad_norm": 0.2076597958803177,
+      "learning_rate": 5.853864271658863e-07,
+      "loss": 11.5602,
+      "step": 8700
+    },
+    {
+      "epoch": 2.515271289974847,
+      "grad_norm": 0.2005164921283722,
+      "learning_rate": 4.066344655422871e-07,
+      "loss": 11.5624,
+      "step": 8750
+    },
+    {
+      "epoch": 2.5296442687747036,
+      "grad_norm": 0.1953943967819214,
+      "learning_rate": 2.603068717451846e-07,
+      "loss": 11.5487,
+      "step": 8800
+    },
+    {
+      "epoch": 2.5440172475745597,
+      "grad_norm": 0.22445988655090332,
+      "learning_rate": 1.4644922581416003e-07,
+      "loss": 11.5831,
+      "step": 8850
+    },
+    {
+      "epoch": 2.5583902263744163,
+      "grad_norm": 0.2075551301240921,
+      "learning_rate": 6.509699362327815e-08,
+      "loss": 11.5445,
+      "step": 8900
+    },
+    {
+      "epoch": 2.5727632051742724,
+      "grad_norm": 0.19453804194927216,
+      "learning_rate": 1.6275515833650003e-08,
+      "loss": 11.5572,
+      "step": 8950
+    },
+    {
+      "epoch": 2.5871361839741285,
+      "grad_norm": 0.2139836996793747,
+      "learning_rate": 0.0,
+      "loss": 11.5535,
+      "step": 9000
+    },
+    {
+      "epoch": 2.5871361839741285,
+      "eval_loss": 11.543502807617188,
+      "eval_runtime": 128.3762,
+      "eval_samples_per_second": 45.639,
+      "eval_steps_per_second": 11.412,
+      "step": 9000
     }
   ],
   "logging_steps": 50,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 9923902488576000.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null