Training in progress, step 165, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +109 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6032562c43e7e21b5a44bf9262906f327bfffcd58e015820f3cb3e009bd12d11
 size 402688040

 version https://git-lfs.github.com/spec/v1
+oid sha256:1ddcf9748c79fff78249bba1ee4b570f3633c1aa46f5ab5735b7a501bb863255
 size 402688040

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51499b6dc75851a1b091c1efb3dcf05d15643740bc7c8abec43aa4ad2b2326ef
 size 805522170

 version https://git-lfs.github.com/spec/v1
+oid sha256:56016a899d0700e7aff181f9c447418b3c2d5e533084a4c15a70b61cee97871d
 size 805522170

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f2f660c1248913ac76374bd9cfad95b84950114c39187aff5d1b1e8e9173a23d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:906065d8e6b69ff61c304cb0507e0c6b8206a4680766e0e5f5eba3c70a70e680
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:be51b766ad06c765428a9ee8b7604e68c326479a48a54f27b30ed94a31bed710
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9a9687a1b8f656eb1ba7c54db2096d9fb4dcefbc465460811f45afd99ce73ed4
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.3527563214302063,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 2.7272727272727275,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,111 @@
       "eval_samples_per_second": 16.729,
       "eval_steps_per_second": 8.455,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1217,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.2924198177472512e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.3527563214302063,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 3.0,
   "eval_steps": 50,
+  "global_step": 165,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 16.729,
       "eval_steps_per_second": 8.455,
       "step": 150
+    },
+    {
+      "epoch": 2.7454545454545456,
+      "grad_norm": 1.4934179782867432,
+      "learning_rate": 2.282587464572594e-06,
+      "loss": 1.104,
+      "step": 151
+    },
+    {
+      "epoch": 2.7636363636363637,
+      "grad_norm": 1.4305020570755005,
+      "learning_rate": 1.9702322308350674e-06,
+      "loss": 1.1139,
+      "step": 152
+    },
+    {
+      "epoch": 2.7818181818181817,
+      "grad_norm": 1.3299729824066162,
+      "learning_rate": 1.6804223604318825e-06,
+      "loss": 0.7675,
+      "step": 153
+    },
+    {
+      "epoch": 2.8,
+      "grad_norm": 1.3233647346496582,
+      "learning_rate": 1.413293891264722e-06,
+      "loss": 0.8955,
+      "step": 154
+    },
+    {
+      "epoch": 2.8181818181818183,
+      "grad_norm": 1.3566880226135254,
+      "learning_rate": 1.1689722144956671e-06,
+      "loss": 0.787,
+      "step": 155
+    },
+    {
+      "epoch": 2.8363636363636364,
+      "grad_norm": 1.4912869930267334,
+      "learning_rate": 9.475720156880419e-07,
+      "loss": 0.9158,
+      "step": 156
+    },
+    {
+      "epoch": 2.8545454545454545,
+      "grad_norm": 1.391627550125122,
+      "learning_rate": 7.491972209725806e-07,
+      "loss": 0.8527,
+      "step": 157
+    },
+    {
+      "epoch": 2.8727272727272726,
+      "grad_norm": 1.6862071752548218,
+      "learning_rate": 5.739409482640956e-07,
+      "loss": 0.8168,
+      "step": 158
+    },
+    {
+      "epoch": 2.8909090909090907,
+      "grad_norm": 1.505070686340332,
+      "learning_rate": 4.2188546355153013e-07,
+      "loss": 0.7495,
+      "step": 159
+    },
+    {
+      "epoch": 2.909090909090909,
+      "grad_norm": 1.4706422090530396,
+      "learning_rate": 2.9310214228202013e-07,
+      "loss": 0.6194,
+      "step": 160
+    },
+    {
+      "epoch": 2.9272727272727272,
+      "grad_norm": 1.5374929904937744,
+      "learning_rate": 1.8765143585693922e-07,
+      "loss": 0.7473,
+      "step": 161
+    },
+    {
+      "epoch": 2.9454545454545453,
+      "grad_norm": 1.621105432510376,
+      "learning_rate": 1.0558284325578038e-07,
+      "loss": 0.6099,
+      "step": 162
+    },
+    {
+      "epoch": 2.963636363636364,
+      "grad_norm": 1.0957081317901611,
+      "learning_rate": 4.6934887801164396e-08,
+      "loss": 1.0203,
+      "step": 163
+    },
+    {
+      "epoch": 2.981818181818182,
+      "grad_norm": 1.2702456712722778,
+      "learning_rate": 1.173509907579362e-08,
+      "loss": 0.7746,
+      "step": 164
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 1.6230897903442383,
+      "learning_rate": 0.0,
+      "loss": 0.6382,
+      "step": 165
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.41527251943424e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null