Training in progress, step 169, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +137 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de527414683c8b0bed0a78c4f0285fc12cf70935b48b0d7f6ad89125b80d8cd2
 size 156926880

 version https://git-lfs.github.com/spec/v1
+oid sha256:bb205bf96bc1e449692c3c3cf7a04e77e8774eef99517674be52d2b58dc00d3a
 size 156926880

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c5c623cdad42407d532f2cd2b0e539c53401f7a0e948bedb05230e9fb8a66875
 size 79968772

 version https://git-lfs.github.com/spec/v1
+oid sha256:de18f2c6fd17d0915b212e95220c9e0ab2f31466db846c413f841ab1844abaec
 size 79968772

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1a5e41997852137eff5e8bdd6b2b44e24799f8f0f4f9c34656630f501c7c37a1
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:47b4bd283c9337c504f1be1381e1d6ff8b5e0796f19065ccf9b8185a17347f88
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e77d360cd203b2fed24862c9206a3e5f5157856fbd9bf2643f1d3eb87c6e566
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ce7d2e8219e1343da35c24f4c17225d06d859aa5c80ad461abea4a9219c97b31
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.04182600975036621,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 2.6696230598669626,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,139 @@
       "eval_samples_per_second": 28.094,
       "eval_steps_per_second": 7.098,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1245,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.96048917561344e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.04182600975036621,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 3.011086474501109,
   "eval_steps": 50,
+  "global_step": 169,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 28.094,
       "eval_steps_per_second": 7.098,
       "step": 150
+    },
+    {
+      "epoch": 2.6873614190687363,
+      "grad_norm": 0.46853962540626526,
+      "learning_rate": 2.85663688337436e-06,
+      "loss": 0.0156,
+      "step": 151
+    },
+    {
+      "epoch": 2.70509977827051,
+      "grad_norm": 0.45606735348701477,
+      "learning_rate": 2.551858691034086e-06,
+      "loss": 0.0086,
+      "step": 152
+    },
+    {
+      "epoch": 2.7228381374722836,
+      "grad_norm": 0.39163070917129517,
+      "learning_rate": 2.26365503861976e-06,
+      "loss": 0.0061,
+      "step": 153
+    },
+    {
+      "epoch": 2.740576496674058,
+      "grad_norm": 0.22401034832000732,
+      "learning_rate": 1.992173140817682e-06,
+      "loss": 0.0048,
+      "step": 154
+    },
+    {
+      "epoch": 2.7583148558758315,
+      "grad_norm": 0.3343549370765686,
+      "learning_rate": 1.737551670826774e-06,
+      "loss": 0.0111,
+      "step": 155
+    },
+    {
+      "epoch": 2.776053215077605,
+      "grad_norm": 0.33819764852523804,
+      "learning_rate": 1.49992068952417e-06,
+      "loss": 0.0101,
+      "step": 156
+    },
+    {
+      "epoch": 2.7937915742793793,
+      "grad_norm": 0.3976474404335022,
+      "learning_rate": 1.27940157903004e-06,
+      "loss": 0.0145,
+      "step": 157
+    },
+    {
+      "epoch": 2.811529933481153,
+      "grad_norm": 0.3741917014122009,
+      "learning_rate": 1.0761069807054472e-06,
+      "loss": 0.0116,
+      "step": 158
+    },
+    {
+      "epoch": 2.8292682926829267,
+      "grad_norm": 0.6680158972740173,
+      "learning_rate": 8.901407376150799e-07,
+      "loss": 0.0174,
+      "step": 159
+    },
+    {
+      "epoch": 2.847006651884701,
+      "grad_norm": 0.24637554585933685,
+      "learning_rate": 7.215978414840828e-07,
+      "loss": 0.0068,
+      "step": 160
+    },
+    {
+      "epoch": 2.8647450110864745,
+      "grad_norm": 0.5326627492904663,
+      "learning_rate": 5.705643841762314e-07,
+      "loss": 0.0114,
+      "step": 161
+    },
+    {
+      "epoch": 2.882483370288248,
+      "grad_norm": 0.42392855882644653,
+      "learning_rate": 4.371175137181088e-07,
+      "loss": 0.0097,
+      "step": 162
+    },
+    {
+      "epoch": 2.9002217294900223,
+      "grad_norm": 0.5340198874473572,
+      "learning_rate": 3.213253948918315e-07,
+      "loss": 0.016,
+      "step": 163
+    },
+    {
+      "epoch": 2.917960088691796,
+      "grad_norm": 1.3395206928253174,
+      "learning_rate": 2.232471744164116e-07,
+      "loss": 0.0197,
+      "step": 164
+    },
+    {
+      "epoch": 2.9356984478935697,
+      "grad_norm": 0.18678541481494904,
+      "learning_rate": 1.4293295073557144e-07,
+      "loss": 0.0046,
+      "step": 165
+    },
+    {
+      "epoch": 2.953436807095344,
+      "grad_norm": 0.3585246801376343,
+      "learning_rate": 8.042374842740341e-08,
+      "loss": 0.0088,
+      "step": 166
+    },
+    {
+      "epoch": 2.9711751662971175,
+      "grad_norm": 0.2283589392900467,
+      "learning_rate": 3.575149724897308e-08,
+      "loss": 0.0052,
+      "step": 167
+    },
+    {
+      "epoch": 2.988913525498891,
+      "grad_norm": 0.37096989154815674,
+      "learning_rate": 8.939015826586738e-09,
+      "loss": 0.007,
+      "step": 168
+    },
+    {
+      "epoch": 3.011086474501109,
+      "grad_norm": 1.0814924240112305,
+      "learning_rate": 0.0,
+      "loss": 0.0184,
+      "step": 169
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.715484471191142e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null