Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43622af7b1ec25a6f952ee3726e3e5e38406e748e35fa12d4f77eb20544fabc2
 size 2373352

 version https://git-lfs.github.com/spec/v1
+oid sha256:9650e028731d9445cf08bfb66b127d9b3b39f98030444db569af32cc47e898e1
 size 2373352

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1953da5f89b74be510ccb0b32ce3dc9ac7eea42d5ca4d589aaf5ed23224a77f8
 size 4830714

 version https://git-lfs.github.com/spec/v1
+oid sha256:fbc54dff7d9ace2ba78e053120df48fe43a8f5fa24f5a688658addb3e262d0f3
 size 4830714

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:468daf8cd40f0c374a543a1453608b3561f902620df2ec662332a521825274d8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1643ec1ee24e4f04159f14fed8b63e218aa8bdba8a2cec54e1f95b7f795d82bf
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d606eeb1aa97b417de3c30d0a970be83ac979e2c7cc0fa41135c63d459909e5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ba4af3b1b4fa156d60adeec70df709d1741ac2f3147c676ab2805007313fc707
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 11.847994804382324,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.01052299273913501,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 36.275,
       "eval_steps_per_second": 9.072,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 41317171200000.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 11.84705638885498,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.014030656985513347,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 36.275,
       "eval_steps_per_second": 9.072,
       "step": 150
+    },
+    {
+      "epoch": 0.010593146024062576,
+      "grad_norm": 0.4529383182525635,
+      "learning_rate": 2.6047368421052634e-05,
+      "loss": 11.8733,
+      "step": 151
+    },
+    {
+      "epoch": 0.010663299308990144,
+      "grad_norm": 0.4352326989173889,
+      "learning_rate": 2.5515789473684213e-05,
+      "loss": 11.8735,
+      "step": 152
+    },
+    {
+      "epoch": 0.01073345259391771,
+      "grad_norm": 0.4672123193740845,
+      "learning_rate": 2.4984210526315788e-05,
+      "loss": 11.8567,
+      "step": 153
+    },
+    {
+      "epoch": 0.010803605878845277,
+      "grad_norm": 0.42695605754852295,
+      "learning_rate": 2.445263157894737e-05,
+      "loss": 11.882,
+      "step": 154
+    },
+    {
+      "epoch": 0.010873759163772844,
+      "grad_norm": 0.5232683420181274,
+      "learning_rate": 2.3921052631578946e-05,
+      "loss": 11.8818,
+      "step": 155
+    },
+    {
+      "epoch": 0.01094391244870041,
+      "grad_norm": 0.49288409948349,
+      "learning_rate": 2.3389473684210528e-05,
+      "loss": 11.8722,
+      "step": 156
+    },
+    {
+      "epoch": 0.011014065733627978,
+      "grad_norm": 0.4101928770542145,
+      "learning_rate": 2.2857894736842106e-05,
+      "loss": 11.8923,
+      "step": 157
+    },
+    {
+      "epoch": 0.011084219018555544,
+      "grad_norm": 0.3824304938316345,
+      "learning_rate": 2.2326315789473685e-05,
+      "loss": 11.8699,
+      "step": 158
+    },
+    {
+      "epoch": 0.011154372303483111,
+      "grad_norm": 0.45842933654785156,
+      "learning_rate": 2.1794736842105264e-05,
+      "loss": 11.8689,
+      "step": 159
+    },
+    {
+      "epoch": 0.011224525588410677,
+      "grad_norm": 0.45681577920913696,
+      "learning_rate": 2.1263157894736842e-05,
+      "loss": 11.8635,
+      "step": 160
+    },
+    {
+      "epoch": 0.011294678873338244,
+      "grad_norm": 0.5055872201919556,
+      "learning_rate": 2.073157894736842e-05,
+      "loss": 11.8719,
+      "step": 161
+    },
+    {
+      "epoch": 0.01136483215826581,
+      "grad_norm": 0.3668268322944641,
+      "learning_rate": 2.0200000000000003e-05,
+      "loss": 11.8668,
+      "step": 162
+    },
+    {
+      "epoch": 0.011434985443193378,
+      "grad_norm": 0.4307781159877777,
+      "learning_rate": 1.966842105263158e-05,
+      "loss": 11.8479,
+      "step": 163
+    },
+    {
+      "epoch": 0.011505138728120944,
+      "grad_norm": 1.7248305082321167,
+      "learning_rate": 1.913684210526316e-05,
+      "loss": 11.8683,
+      "step": 164
+    },
+    {
+      "epoch": 0.011575292013048511,
+      "grad_norm": 0.6698246002197266,
+      "learning_rate": 1.8605263157894736e-05,
+      "loss": 11.8733,
+      "step": 165
+    },
+    {
+      "epoch": 0.011645445297976077,
+      "grad_norm": 0.6852075457572937,
+      "learning_rate": 1.8073684210526318e-05,
+      "loss": 11.866,
+      "step": 166
+    },
+    {
+      "epoch": 0.011715598582903644,
+      "grad_norm": 0.6911703944206238,
+      "learning_rate": 1.7542105263157897e-05,
+      "loss": 11.9028,
+      "step": 167
+    },
+    {
+      "epoch": 0.011785751867831212,
+      "grad_norm": 0.493837833404541,
+      "learning_rate": 1.7010526315789475e-05,
+      "loss": 11.8694,
+      "step": 168
+    },
+    {
+      "epoch": 0.011855905152758778,
+      "grad_norm": 0.3934398293495178,
+      "learning_rate": 1.6478947368421054e-05,
+      "loss": 11.8429,
+      "step": 169
+    },
+    {
+      "epoch": 0.011926058437686345,
+      "grad_norm": 0.48499631881713867,
+      "learning_rate": 1.5947368421052633e-05,
+      "loss": 11.8446,
+      "step": 170
+    },
+    {
+      "epoch": 0.011996211722613911,
+      "grad_norm": 0.5978266000747681,
+      "learning_rate": 1.541578947368421e-05,
+      "loss": 11.8199,
+      "step": 171
+    },
+    {
+      "epoch": 0.012066365007541479,
+      "grad_norm": 0.5004124045372009,
+      "learning_rate": 1.4884210526315788e-05,
+      "loss": 11.8432,
+      "step": 172
+    },
+    {
+      "epoch": 0.012136518292469044,
+      "grad_norm": 0.40780723094940186,
+      "learning_rate": 1.4352631578947369e-05,
+      "loss": 11.8466,
+      "step": 173
+    },
+    {
+      "epoch": 0.012206671577396612,
+      "grad_norm": 0.7493283748626709,
+      "learning_rate": 1.3821052631578949e-05,
+      "loss": 11.8455,
+      "step": 174
+    },
+    {
+      "epoch": 0.012276824862324178,
+      "grad_norm": 0.6349316239356995,
+      "learning_rate": 1.3289473684210526e-05,
+      "loss": 11.8458,
+      "step": 175
+    },
+    {
+      "epoch": 0.012346978147251745,
+      "grad_norm": 0.9960390329360962,
+      "learning_rate": 1.2757894736842106e-05,
+      "loss": 11.8589,
+      "step": 176
+    },
+    {
+      "epoch": 0.012417131432179311,
+      "grad_norm": 0.5007641315460205,
+      "learning_rate": 1.2226315789473685e-05,
+      "loss": 11.842,
+      "step": 177
+    },
+    {
+      "epoch": 0.012487284717106879,
+      "grad_norm": 0.6938870549201965,
+      "learning_rate": 1.1694736842105264e-05,
+      "loss": 11.8694,
+      "step": 178
+    },
+    {
+      "epoch": 0.012557438002034444,
+      "grad_norm": 0.7909740805625916,
+      "learning_rate": 1.1163157894736842e-05,
+      "loss": 11.8301,
+      "step": 179
+    },
+    {
+      "epoch": 0.012627591286962012,
+      "grad_norm": 0.7522603869438171,
+      "learning_rate": 1.0631578947368421e-05,
+      "loss": 11.8559,
+      "step": 180
+    },
+    {
+      "epoch": 0.01269774457188958,
+      "grad_norm": 0.6552623510360718,
+      "learning_rate": 1.0100000000000002e-05,
+      "loss": 11.8547,
+      "step": 181
+    },
+    {
+      "epoch": 0.012767897856817145,
+      "grad_norm": 0.5103181004524231,
+      "learning_rate": 9.56842105263158e-06,
+      "loss": 11.819,
+      "step": 182
+    },
+    {
+      "epoch": 0.012838051141744713,
+      "grad_norm": 0.6029077172279358,
+      "learning_rate": 9.036842105263159e-06,
+      "loss": 11.876,
+      "step": 183
+    },
+    {
+      "epoch": 0.012908204426672279,
+      "grad_norm": 0.9292001724243164,
+      "learning_rate": 8.505263157894738e-06,
+      "loss": 11.8402,
+      "step": 184
+    },
+    {
+      "epoch": 0.012978357711599846,
+      "grad_norm": 0.5196678638458252,
+      "learning_rate": 7.973684210526316e-06,
+      "loss": 11.8279,
+      "step": 185
+    },
+    {
+      "epoch": 0.013048510996527412,
+      "grad_norm": 0.5138827562332153,
+      "learning_rate": 7.442105263157894e-06,
+      "loss": 11.868,
+      "step": 186
+    },
+    {
+      "epoch": 0.01311866428145498,
+      "grad_norm": 0.8440822958946228,
+      "learning_rate": 6.9105263157894745e-06,
+      "loss": 11.8365,
+      "step": 187
+    },
+    {
+      "epoch": 0.013188817566382545,
+      "grad_norm": 0.632576584815979,
+      "learning_rate": 6.378947368421053e-06,
+      "loss": 11.8567,
+      "step": 188
+    },
+    {
+      "epoch": 0.013258970851310113,
+      "grad_norm": 0.6650000214576721,
+      "learning_rate": 5.847368421052632e-06,
+      "loss": 11.8198,
+      "step": 189
+    },
+    {
+      "epoch": 0.013329124136237679,
+      "grad_norm": 0.5772100687026978,
+      "learning_rate": 5.315789473684211e-06,
+      "loss": 11.8298,
+      "step": 190
+    },
+    {
+      "epoch": 0.013399277421165246,
+      "grad_norm": 0.8075653314590454,
+      "learning_rate": 4.78421052631579e-06,
+      "loss": 11.8056,
+      "step": 191
+    },
+    {
+      "epoch": 0.013469430706092812,
+      "grad_norm": 0.9557323455810547,
+      "learning_rate": 4.252631578947369e-06,
+      "loss": 11.8465,
+      "step": 192
+    },
+    {
+      "epoch": 0.01353958399102038,
+      "grad_norm": 0.663483738899231,
+      "learning_rate": 3.721052631578947e-06,
+      "loss": 11.8338,
+      "step": 193
+    },
+    {
+      "epoch": 0.013609737275947947,
+      "grad_norm": 0.9484142661094666,
+      "learning_rate": 3.1894736842105266e-06,
+      "loss": 11.8005,
+      "step": 194
+    },
+    {
+      "epoch": 0.013679890560875513,
+      "grad_norm": 0.6537492275238037,
+      "learning_rate": 2.6578947368421053e-06,
+      "loss": 11.8352,
+      "step": 195
+    },
+    {
+      "epoch": 0.01375004384580308,
+      "grad_norm": 0.7138619422912598,
+      "learning_rate": 2.1263157894736844e-06,
+      "loss": 11.7953,
+      "step": 196
+    },
+    {
+      "epoch": 0.013820197130730646,
+      "grad_norm": 0.8647388815879822,
+      "learning_rate": 1.5947368421052633e-06,
+      "loss": 11.8648,
+      "step": 197
+    },
+    {
+      "epoch": 0.013890350415658214,
+      "grad_norm": 1.743220329284668,
+      "learning_rate": 1.0631578947368422e-06,
+      "loss": 11.755,
+      "step": 198
+    },
+    {
+      "epoch": 0.01396050370058578,
+      "grad_norm": 1.5254229307174683,
+      "learning_rate": 5.315789473684211e-07,
+      "loss": 11.7763,
+      "step": 199
+    },
+    {
+      "epoch": 0.014030656985513347,
+      "grad_norm": 2.5268492698669434,
+      "learning_rate": 0.0,
+      "loss": 11.8194,
+      "step": 200
+    },
+    {
+      "epoch": 0.014030656985513347,
+      "eval_loss": 11.84705638885498,
+      "eval_runtime": 165.5105,
+      "eval_samples_per_second": 36.264,
+      "eval_steps_per_second": 9.069,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 55089561600000.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null