Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:729425801a16d9eb3cedd8ba8dcad9d1468f3d229ac72825d5f8196e51ceaeed
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:b85cec5e4fd2dd855174100333daa22ce2817cc9a4febc08144088d63de6f0f0
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:010af75d831f82c300e7e41ed6398d510b91a5e41614620a6bba0c142959ad62
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:5cdce61cec7f1dcb8c178fbd4d12ce0e0817369063f4efa6bb3907db2a1bff99
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:06f6e6d5f4581cfac81193952f9945589db5e0f5ac9c07959c292ad2bd16af59
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7c0f81402f40376f1c4102d53f897a7113b1bfa0c6a056684eeef9abbf2faf98
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:123ecf29cfd4fe3b008c987ce1ef9f63c2ad00365e06a3691aa36827aaded381
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d703f9adb617aa1ec13556a7b7482c741f765121a5a04f3cafdcfbce6ed485ee
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.4503767490386963,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.11286681715575621,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 14.867,
       "eval_steps_per_second": 3.717,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.437792434153062e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.432666063308716,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.1504890895410083,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.867,
       "eval_steps_per_second": 3.717,
       "step": 150
+    },
+    {
+      "epoch": 0.11361926260346125,
+      "grad_norm": 3.661033868789673,
+      "learning_rate": 2.6253684210526317e-05,
+      "loss": 5.0616,
+      "step": 151
+    },
+    {
+      "epoch": 0.1143717080511663,
+      "grad_norm": 5.994472026824951,
+      "learning_rate": 2.5717894736842106e-05,
+      "loss": 4.6401,
+      "step": 152
+    },
+    {
+      "epoch": 0.11512415349887133,
+      "grad_norm": 5.022431373596191,
+      "learning_rate": 2.518210526315789e-05,
+      "loss": 4.6046,
+      "step": 153
+    },
+    {
+      "epoch": 0.11587659894657637,
+      "grad_norm": 4.494561195373535,
+      "learning_rate": 2.4646315789473683e-05,
+      "loss": 4.717,
+      "step": 154
+    },
+    {
+      "epoch": 0.11662904439428141,
+      "grad_norm": 5.689757347106934,
+      "learning_rate": 2.411052631578947e-05,
+      "loss": 5.0209,
+      "step": 155
+    },
+    {
+      "epoch": 0.11738148984198646,
+      "grad_norm": 4.620606422424316,
+      "learning_rate": 2.357473684210526e-05,
+      "loss": 4.0454,
+      "step": 156
+    },
+    {
+      "epoch": 0.1181339352896915,
+      "grad_norm": 6.991167068481445,
+      "learning_rate": 2.3038947368421052e-05,
+      "loss": 4.7982,
+      "step": 157
+    },
+    {
+      "epoch": 0.11888638073739653,
+      "grad_norm": 5.083990097045898,
+      "learning_rate": 2.250315789473684e-05,
+      "loss": 4.4286,
+      "step": 158
+    },
+    {
+      "epoch": 0.11963882618510158,
+      "grad_norm": 6.157924175262451,
+      "learning_rate": 2.196736842105263e-05,
+      "loss": 4.6027,
+      "step": 159
+    },
+    {
+      "epoch": 0.12039127163280662,
+      "grad_norm": 8.231710433959961,
+      "learning_rate": 2.1431578947368418e-05,
+      "loss": 5.3449,
+      "step": 160
+    },
+    {
+      "epoch": 0.12114371708051166,
+      "grad_norm": 7.961388111114502,
+      "learning_rate": 2.089578947368421e-05,
+      "loss": 5.2107,
+      "step": 161
+    },
+    {
+      "epoch": 0.12189616252821671,
+      "grad_norm": 6.96645450592041,
+      "learning_rate": 2.036e-05,
+      "loss": 5.1824,
+      "step": 162
+    },
+    {
+      "epoch": 0.12264860797592174,
+      "grad_norm": 7.0527801513671875,
+      "learning_rate": 1.9824210526315787e-05,
+      "loss": 4.5329,
+      "step": 163
+    },
+    {
+      "epoch": 0.12340105342362678,
+      "grad_norm": 6.659213066101074,
+      "learning_rate": 1.928842105263158e-05,
+      "loss": 4.9405,
+      "step": 164
+    },
+    {
+      "epoch": 0.12415349887133183,
+      "grad_norm": 6.317037582397461,
+      "learning_rate": 1.8752631578947367e-05,
+      "loss": 4.8909,
+      "step": 165
+    },
+    {
+      "epoch": 0.12490594431903687,
+      "grad_norm": 7.180385589599609,
+      "learning_rate": 1.8216842105263156e-05,
+      "loss": 5.1754,
+      "step": 166
+    },
+    {
+      "epoch": 0.1256583897667419,
+      "grad_norm": 8.178675651550293,
+      "learning_rate": 1.7681052631578948e-05,
+      "loss": 5.2554,
+      "step": 167
+    },
+    {
+      "epoch": 0.12641083521444696,
+      "grad_norm": 7.352338790893555,
+      "learning_rate": 1.7145263157894736e-05,
+      "loss": 5.179,
+      "step": 168
+    },
+    {
+      "epoch": 0.127163280662152,
+      "grad_norm": 7.15298318862915,
+      "learning_rate": 1.6609473684210525e-05,
+      "loss": 4.9093,
+      "step": 169
+    },
+    {
+      "epoch": 0.12791572610985705,
+      "grad_norm": 6.234389781951904,
+      "learning_rate": 1.6073684210526313e-05,
+      "loss": 4.3581,
+      "step": 170
+    },
+    {
+      "epoch": 0.12866817155756208,
+      "grad_norm": 7.177828311920166,
+      "learning_rate": 1.5537894736842105e-05,
+      "loss": 4.8511,
+      "step": 171
+    },
+    {
+      "epoch": 0.1294206170052671,
+      "grad_norm": 6.240314483642578,
+      "learning_rate": 1.5002105263157892e-05,
+      "loss": 4.6268,
+      "step": 172
+    },
+    {
+      "epoch": 0.13017306245297217,
+      "grad_norm": 6.857619762420654,
+      "learning_rate": 1.4466315789473684e-05,
+      "loss": 3.9235,
+      "step": 173
+    },
+    {
+      "epoch": 0.1309255079006772,
+      "grad_norm": 6.206757068634033,
+      "learning_rate": 1.3930526315789474e-05,
+      "loss": 4.8705,
+      "step": 174
+    },
+    {
+      "epoch": 0.13167795334838225,
+      "grad_norm": 6.866716384887695,
+      "learning_rate": 1.3394736842105261e-05,
+      "loss": 4.7763,
+      "step": 175
+    },
+    {
+      "epoch": 0.13243039879608728,
+      "grad_norm": 7.294867515563965,
+      "learning_rate": 1.2858947368421053e-05,
+      "loss": 4.6713,
+      "step": 176
+    },
+    {
+      "epoch": 0.13318284424379231,
+      "grad_norm": 7.363504886627197,
+      "learning_rate": 1.2323157894736842e-05,
+      "loss": 4.5199,
+      "step": 177
+    },
+    {
+      "epoch": 0.13393528969149737,
+      "grad_norm": 7.403514385223389,
+      "learning_rate": 1.178736842105263e-05,
+      "loss": 4.5396,
+      "step": 178
+    },
+    {
+      "epoch": 0.1346877351392024,
+      "grad_norm": 9.64759635925293,
+      "learning_rate": 1.125157894736842e-05,
+      "loss": 5.123,
+      "step": 179
+    },
+    {
+      "epoch": 0.13544018058690746,
+      "grad_norm": 8.454669952392578,
+      "learning_rate": 1.0715789473684209e-05,
+      "loss": 4.6262,
+      "step": 180
+    },
+    {
+      "epoch": 0.1361926260346125,
+      "grad_norm": 7.289380073547363,
+      "learning_rate": 1.018e-05,
+      "loss": 4.1746,
+      "step": 181
+    },
+    {
+      "epoch": 0.13694507148231752,
+      "grad_norm": 14.602645874023438,
+      "learning_rate": 9.64421052631579e-06,
+      "loss": 5.1446,
+      "step": 182
+    },
+    {
+      "epoch": 0.13769751693002258,
+      "grad_norm": 10.938192367553711,
+      "learning_rate": 9.108421052631578e-06,
+      "loss": 5.2975,
+      "step": 183
+    },
+    {
+      "epoch": 0.1384499623777276,
+      "grad_norm": 9.536102294921875,
+      "learning_rate": 8.572631578947368e-06,
+      "loss": 4.973,
+      "step": 184
+    },
+    {
+      "epoch": 0.13920240782543267,
+      "grad_norm": 9.649733543395996,
+      "learning_rate": 8.036842105263157e-06,
+      "loss": 4.9525,
+      "step": 185
+    },
+    {
+      "epoch": 0.1399548532731377,
+      "grad_norm": 7.923221111297607,
+      "learning_rate": 7.501052631578946e-06,
+      "loss": 4.8443,
+      "step": 186
+    },
+    {
+      "epoch": 0.14070729872084273,
+      "grad_norm": 9.543553352355957,
+      "learning_rate": 6.965263157894737e-06,
+      "loss": 5.2648,
+      "step": 187
+    },
+    {
+      "epoch": 0.14145974416854779,
+      "grad_norm": 9.844090461730957,
+      "learning_rate": 6.4294736842105265e-06,
+      "loss": 5.2391,
+      "step": 188
+    },
+    {
+      "epoch": 0.14221218961625282,
+      "grad_norm": 12.343792915344238,
+      "learning_rate": 5.893684210526315e-06,
+      "loss": 5.8381,
+      "step": 189
+    },
+    {
+      "epoch": 0.14296463506395787,
+      "grad_norm": 10.430888175964355,
+      "learning_rate": 5.3578947368421044e-06,
+      "loss": 5.2111,
+      "step": 190
+    },
+    {
+      "epoch": 0.1437170805116629,
+      "grad_norm": 9.29223346710205,
+      "learning_rate": 4.822105263157895e-06,
+      "loss": 5.2066,
+      "step": 191
+    },
+    {
+      "epoch": 0.14446952595936793,
+      "grad_norm": 12.151300430297852,
+      "learning_rate": 4.286315789473684e-06,
+      "loss": 5.1922,
+      "step": 192
+    },
+    {
+      "epoch": 0.145221971407073,
+      "grad_norm": 9.725712776184082,
+      "learning_rate": 3.750526315789473e-06,
+      "loss": 5.3949,
+      "step": 193
+    },
+    {
+      "epoch": 0.14597441685477802,
+      "grad_norm": 13.120623588562012,
+      "learning_rate": 3.2147368421052633e-06,
+      "loss": 5.2486,
+      "step": 194
+    },
+    {
+      "epoch": 0.14672686230248308,
+      "grad_norm": 10.279730796813965,
+      "learning_rate": 2.6789473684210522e-06,
+      "loss": 4.5108,
+      "step": 195
+    },
+    {
+      "epoch": 0.1474793077501881,
+      "grad_norm": 12.112651824951172,
+      "learning_rate": 2.143157894736842e-06,
+      "loss": 4.847,
+      "step": 196
+    },
+    {
+      "epoch": 0.14823175319789314,
+      "grad_norm": 10.086380958557129,
+      "learning_rate": 1.6073684210526316e-06,
+      "loss": 4.9699,
+      "step": 197
+    },
+    {
+      "epoch": 0.1489841986455982,
+      "grad_norm": 13.680956840515137,
+      "learning_rate": 1.071578947368421e-06,
+      "loss": 6.0189,
+      "step": 198
+    },
+    {
+      "epoch": 0.14973664409330323,
+      "grad_norm": 15.482779502868652,
+      "learning_rate": 5.357894736842105e-07,
+      "loss": 5.6126,
+      "step": 199
+    },
+    {
+      "epoch": 0.1504890895410083,
+      "grad_norm": 15.324419975280762,
+      "learning_rate": 0.0,
+      "loss": 6.2436,
+      "step": 200
+    },
+    {
+      "epoch": 0.1504890895410083,
+      "eval_loss": 2.432666063308716,
+      "eval_runtime": 37.6225,
+      "eval_samples_per_second": 14.885,
+      "eval_steps_per_second": 3.721,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.262314895612314e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null