Training in progress, step 1350, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +118 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:01965d98a92c0f64f8a0d029a10632f08d0287219af676b8048158b0661d0a62
 size 83945296

 version https://git-lfs.github.com/spec/v1
+oid sha256:1f792ca2f39253114b59eb717807e2ff96b18c41da2f5d430b8418f21938975d
 size 83945296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:61d5a8a0dcbd9606342ddfa403836caac031ee714e121062f218d4eebfd185e1
 size 168149074

 version https://git-lfs.github.com/spec/v1
+oid sha256:1bb6d2fc8b2efd6822aad78c3e4fd174d2075ff491b7a302ce16e6e2b9a7176c
 size 168149074

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c8e13ad7428c5509076c15c21d35ce0b2bffea5947cd099daf2af0afb123a71d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:86519396222f045b85ec9776e7db27686ba4130a0d8956349c7b70ae36555704
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:beb0b0f4cb227409c25efa2d36f03edfc3a0032e3296f1707945c3a0c5611cc5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a866955cff9370cd3957339d6bf23f5ca8494fc491b0c5ef9330a9273b5d4460
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.636073112487793,
   "best_model_checkpoint": "miner_id_24/checkpoint-900",
-  "epoch": 2.6044492674986435,
   "eval_steps": 150,
-  "global_step": 1200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -919,6 +919,119 @@
       "eval_samples_per_second": 14.437,
       "eval_steps_per_second": 1.821,
       "step": 1200
     }
   ],
   "logging_steps": 10,
@@ -933,7 +1046,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -942,12 +1055,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.6825629945102336e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.636073112487793,
   "best_model_checkpoint": "miner_id_24/checkpoint-900",
+  "epoch": 2.930005425935974,
   "eval_steps": 150,
+  "global_step": 1350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.437,
       "eval_steps_per_second": 1.821,
       "step": 1200
+    },
+    {
+      "epoch": 2.6261530113944653,
+      "grad_norm": 23.57729721069336,
+      "learning_rate": 2.86474508437579e-06,
+      "loss": 1.3837,
+      "step": 1210
+    },
+    {
+      "epoch": 2.6478567552902876,
+      "grad_norm": 35.865020751953125,
+      "learning_rate": 2.67658249108603e-06,
+      "loss": 1.562,
+      "step": 1220
+    },
+    {
+      "epoch": 2.6695604991861095,
+      "grad_norm": 19.588651657104492,
+      "learning_rate": 2.4942045588130504e-06,
+      "loss": 1.7947,
+      "step": 1230
+    },
+    {
+      "epoch": 2.691264243081932,
+      "grad_norm": 25.088882446289062,
+      "learning_rate": 2.317696896481024e-06,
+      "loss": 1.7935,
+      "step": 1240
+    },
+    {
+      "epoch": 2.7129679869777537,
+      "grad_norm": 31.45013427734375,
+      "learning_rate": 2.1471423574861643e-06,
+      "loss": 1.8259,
+      "step": 1250
+    },
+    {
+      "epoch": 2.7346717308735755,
+      "grad_norm": 27.531116485595703,
+      "learning_rate": 1.982621000804979e-06,
+      "loss": 1.4111,
+      "step": 1260
+    },
+    {
+      "epoch": 2.756375474769398,
+      "grad_norm": 42.346099853515625,
+      "learning_rate": 1.8242100534143065e-06,
+      "loss": 1.4248,
+      "step": 1270
+    },
+    {
+      "epoch": 2.7780792186652197,
+      "grad_norm": 25.887807846069336,
+      "learning_rate": 1.6719838740406313e-06,
+      "loss": 1.9046,
+      "step": 1280
+    },
+    {
+      "epoch": 2.799782962561042,
+      "grad_norm": 22.550439834594727,
+      "learning_rate": 1.5260139182558363e-06,
+      "loss": 1.7181,
+      "step": 1290
+    },
+    {
+      "epoch": 2.821486706456864,
+      "grad_norm": 24.849620819091797,
+      "learning_rate": 1.3863687049356465e-06,
+      "loss": 1.5168,
+      "step": 1300
+    },
+    {
+      "epoch": 2.8431904503526857,
+      "grad_norm": 25.409860610961914,
+      "learning_rate": 1.25311378409661e-06,
+      "loss": 1.4321,
+      "step": 1310
+    },
+    {
+      "epoch": 2.864894194248508,
+      "grad_norm": 36.389671325683594,
+      "learning_rate": 1.1263117061266677e-06,
+      "loss": 1.3841,
+      "step": 1320
+    },
+    {
+      "epoch": 2.88659793814433,
+      "grad_norm": 24.459070205688477,
+      "learning_rate": 1.006021992423738e-06,
+      "loss": 1.9626,
+      "step": 1330
+    },
+    {
+      "epoch": 2.908301682040152,
+      "grad_norm": 20.42031478881836,
+      "learning_rate": 8.923011074561405e-07,
+      "loss": 1.6266,
+      "step": 1340
+    },
+    {
+      "epoch": 2.930005425935974,
+      "grad_norm": 25.92626190185547,
+      "learning_rate": 7.852024322579649e-07,
+      "loss": 1.6606,
+      "step": 1350
+    },
+    {
+      "epoch": 2.930005425935974,
+      "eval_loss": 0.6615604758262634,
+      "eval_runtime": 53.8295,
+      "eval_samples_per_second": 14.434,
+      "eval_steps_per_second": 1.821,
+      "step": 1350
     }
   ],
   "logging_steps": 10,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.8928833688240128e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null