Training in progress, step 1200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +117 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:33b2440d2694321ee888d51ec5666ec9e866384a438fe8a89aab4937822170b1
 size 83945296

 version https://git-lfs.github.com/spec/v1
+oid sha256:01965d98a92c0f64f8a0d029a10632f08d0287219af676b8048158b0661d0a62
 size 83945296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d50e471a33e6d700094dc5bd8ce2bca4a2efdc1b1d33b74f03e583750f796133
 size 168149074

 version https://git-lfs.github.com/spec/v1
+oid sha256:61d5a8a0dcbd9606342ddfa403836caac031ee714e121062f218d4eebfd185e1
 size 168149074

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5ba78f70e8450cf0ec82402ec5f695006e3cd4c321e54ba57e606421f97e4957
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c8e13ad7428c5509076c15c21d35ce0b2bffea5947cd099daf2af0afb123a71d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eec984887369339384df18a86847622608ab8c12ab961dc4703f600d8c64c21c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:beb0b0f4cb227409c25efa2d36f03edfc3a0032e3296f1707945c3a0c5611cc5
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.636073112487793,
   "best_model_checkpoint": "miner_id_24/checkpoint-900",
-  "epoch": 2.278893109061313,
   "eval_steps": 150,
-  "global_step": 1050,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -806,6 +806,119 @@
       "eval_samples_per_second": 14.423,
       "eval_steps_per_second": 1.819,
       "step": 1050
     }
   ],
   "logging_steps": 10,
@@ -820,7 +933,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -834,7 +947,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.4722426201964544e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.636073112487793,
   "best_model_checkpoint": "miner_id_24/checkpoint-900",
+  "epoch": 2.6044492674986435,
   "eval_steps": 150,
+  "global_step": 1200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.423,
       "eval_steps_per_second": 1.819,
       "step": 1050
+    },
+    {
+      "epoch": 2.3005968529571352,
+      "grad_norm": 31.06637191772461,
+      "learning_rate": 6.315198152822273e-06,
+      "loss": 1.4709,
+      "step": 1060
+    },
+    {
+      "epoch": 2.322300596852957,
+      "grad_norm": 41.439178466796875,
+      "learning_rate": 6.052278966485492e-06,
+      "loss": 1.5022,
+      "step": 1070
+    },
+    {
+      "epoch": 2.3440043407487794,
+      "grad_norm": 23.359983444213867,
+      "learning_rate": 5.793559875861938e-06,
+      "loss": 1.9443,
+      "step": 1080
+    },
+    {
+      "epoch": 2.3657080846446013,
+      "grad_norm": 20.295778274536133,
+      "learning_rate": 5.539162324718075e-06,
+      "loss": 1.739,
+      "step": 1090
+    },
+    {
+      "epoch": 2.387411828540423,
+      "grad_norm": 25.940486907958984,
+      "learning_rate": 5.289205728272587e-06,
+      "loss": 1.5355,
+      "step": 1100
+    },
+    {
+      "epoch": 2.4091155724362454,
+      "grad_norm": 28.764272689819336,
+      "learning_rate": 5.043807417142436e-06,
+      "loss": 1.4645,
+      "step": 1110
+    },
+    {
+      "epoch": 2.4308193163320673,
+      "grad_norm": 36.14052963256836,
+      "learning_rate": 4.8030825822673816e-06,
+      "loss": 1.3555,
+      "step": 1120
+    },
+    {
+      "epoch": 2.452523060227889,
+      "grad_norm": 20.15415382385254,
+      "learning_rate": 4.567144220838923e-06,
+      "loss": 1.9016,
+      "step": 1130
+    },
+    {
+      "epoch": 2.4742268041237114,
+      "grad_norm": 22.350454330444336,
+      "learning_rate": 4.336103083258942e-06,
+      "loss": 1.7998,
+      "step": 1140
+    },
+    {
+      "epoch": 2.4959305480195333,
+      "grad_norm": 22.9199161529541,
+      "learning_rate": 4.110067621153041e-06,
+      "loss": 1.5755,
+      "step": 1150
+    },
+    {
+      "epoch": 2.517634291915355,
+      "grad_norm": 27.70863151550293,
+      "learning_rate": 3.889143936462915e-06,
+      "loss": 1.3297,
+      "step": 1160
+    },
+    {
+      "epoch": 2.5393380358111775,
+      "grad_norm": 41.40476608276367,
+      "learning_rate": 3.673435731641692e-06,
+      "loss": 1.3004,
+      "step": 1170
+    },
+    {
+      "epoch": 2.5610417797069993,
+      "grad_norm": 21.756010055541992,
+      "learning_rate": 3.4630442609755666e-06,
+      "loss": 2.0685,
+      "step": 1180
+    },
+    {
+      "epoch": 2.5827455236028216,
+      "grad_norm": 29.703012466430664,
+      "learning_rate": 3.2580682830546667e-06,
+      "loss": 1.8172,
+      "step": 1190
+    },
+    {
+      "epoch": 2.6044492674986435,
+      "grad_norm": 21.751575469970703,
+      "learning_rate": 3.0586040144153436e-06,
+      "loss": 1.5008,
+      "step": 1200
+    },
+    {
+      "epoch": 2.6044492674986435,
+      "eval_loss": 0.6650447845458984,
+      "eval_runtime": 53.8186,
+      "eval_samples_per_second": 14.437,
+      "eval_steps_per_second": 1.821,
+      "step": 1200
     }
   ],
   "logging_steps": 10,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 1.6825629945102336e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null