Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d7bc9b3adfac57da0a68bcf5324df123bb98a639acbaa9f878ac08ea18841346
 size 226530600

 version https://git-lfs.github.com/spec/v1
+oid sha256:2b9481b60263b028ec8d6e576d27b74021df87e631d5a33c5c5fcd5876c03c8f
 size 226530600

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:670ac4d20368725b94d60e46e87c1e2f01d8240953806a04681ef865d71ad9a5
 size 115354708

 version https://git-lfs.github.com/spec/v1
+oid sha256:734ab2cd6dd4512cef7ddae12bf01dc567ac9b6368afaa3fac5210d774abf095
 size 115354708

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ec2d1e74c103ea738681d1cafaf9417a59d2c7cef976d54767c4ee919528361
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:dbb09a6aa8d206a1ec51579c5b25d6675729dab3a0fc5e72ace229d9bde69b69
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:123ecf29cfd4fe3b008c987ce1ef9f63c2ad00365e06a3691aa36827aaded381
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d703f9adb617aa1ec13556a7b7482c741f765121a5a04f3cafdcfbce6ed485ee
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.5793471336364746,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.5649717514124294,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 43.989,
       "eval_steps_per_second": 10.997,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.0336693143994368e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.5769926309585571,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.7532956685499058,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 43.989,
       "eval_steps_per_second": 10.997,
       "step": 150
+    },
+    {
+      "epoch": 0.568738229755179,
+      "grad_norm": 0.19955414533615112,
+      "learning_rate": 2.6253684210526317e-05,
+      "loss": 0.5705,
+      "step": 151
+    },
+    {
+      "epoch": 0.5725047080979284,
+      "grad_norm": 0.24504195153713226,
+      "learning_rate": 2.5717894736842106e-05,
+      "loss": 0.7473,
+      "step": 152
+    },
+    {
+      "epoch": 0.576271186440678,
+      "grad_norm": 0.27941247820854187,
+      "learning_rate": 2.518210526315789e-05,
+      "loss": 0.6658,
+      "step": 153
+    },
+    {
+      "epoch": 0.5800376647834274,
+      "grad_norm": 0.24577289819717407,
+      "learning_rate": 2.4646315789473683e-05,
+      "loss": 0.7071,
+      "step": 154
+    },
+    {
+      "epoch": 0.583804143126177,
+      "grad_norm": 0.2300121784210205,
+      "learning_rate": 2.411052631578947e-05,
+      "loss": 0.6006,
+      "step": 155
+    },
+    {
+      "epoch": 0.5875706214689266,
+      "grad_norm": 0.24530534446239471,
+      "learning_rate": 2.357473684210526e-05,
+      "loss": 0.618,
+      "step": 156
+    },
+    {
+      "epoch": 0.591337099811676,
+      "grad_norm": 0.25975605845451355,
+      "learning_rate": 2.3038947368421052e-05,
+      "loss": 0.6039,
+      "step": 157
+    },
+    {
+      "epoch": 0.5951035781544256,
+      "grad_norm": 0.26106777787208557,
+      "learning_rate": 2.250315789473684e-05,
+      "loss": 0.5157,
+      "step": 158
+    },
+    {
+      "epoch": 0.5988700564971752,
+      "grad_norm": 0.2734393775463104,
+      "learning_rate": 2.196736842105263e-05,
+      "loss": 0.6128,
+      "step": 159
+    },
+    {
+      "epoch": 0.6026365348399246,
+      "grad_norm": 0.2958735227584839,
+      "learning_rate": 2.1431578947368418e-05,
+      "loss": 0.6668,
+      "step": 160
+    },
+    {
+      "epoch": 0.6064030131826742,
+      "grad_norm": 0.32622960209846497,
+      "learning_rate": 2.089578947368421e-05,
+      "loss": 0.73,
+      "step": 161
+    },
+    {
+      "epoch": 0.6101694915254238,
+      "grad_norm": 0.29832321405410767,
+      "learning_rate": 2.036e-05,
+      "loss": 0.648,
+      "step": 162
+    },
+    {
+      "epoch": 0.6139359698681732,
+      "grad_norm": 0.3251093626022339,
+      "learning_rate": 1.9824210526315787e-05,
+      "loss": 0.6642,
+      "step": 163
+    },
+    {
+      "epoch": 0.6177024482109228,
+      "grad_norm": 0.30713552236557007,
+      "learning_rate": 1.928842105263158e-05,
+      "loss": 0.5635,
+      "step": 164
+    },
+    {
+      "epoch": 0.6214689265536724,
+      "grad_norm": 0.36056530475616455,
+      "learning_rate": 1.8752631578947367e-05,
+      "loss": 0.7228,
+      "step": 165
+    },
+    {
+      "epoch": 0.6252354048964218,
+      "grad_norm": 0.3532079756259918,
+      "learning_rate": 1.8216842105263156e-05,
+      "loss": 0.5366,
+      "step": 166
+    },
+    {
+      "epoch": 0.6290018832391714,
+      "grad_norm": 0.3571911156177521,
+      "learning_rate": 1.7681052631578948e-05,
+      "loss": 0.6532,
+      "step": 167
+    },
+    {
+      "epoch": 0.632768361581921,
+      "grad_norm": 0.35912466049194336,
+      "learning_rate": 1.7145263157894736e-05,
+      "loss": 0.5445,
+      "step": 168
+    },
+    {
+      "epoch": 0.6365348399246704,
+      "grad_norm": 0.36368894577026367,
+      "learning_rate": 1.6609473684210525e-05,
+      "loss": 0.5491,
+      "step": 169
+    },
+    {
+      "epoch": 0.64030131826742,
+      "grad_norm": 0.34409135580062866,
+      "learning_rate": 1.6073684210526313e-05,
+      "loss": 0.4394,
+      "step": 170
+    },
+    {
+      "epoch": 0.6440677966101694,
+      "grad_norm": 0.38495466113090515,
+      "learning_rate": 1.5537894736842105e-05,
+      "loss": 0.6512,
+      "step": 171
+    },
+    {
+      "epoch": 0.647834274952919,
+      "grad_norm": 0.3448059558868408,
+      "learning_rate": 1.5002105263157892e-05,
+      "loss": 0.4943,
+      "step": 172
+    },
+    {
+      "epoch": 0.6516007532956686,
+      "grad_norm": 0.36092063784599304,
+      "learning_rate": 1.4466315789473684e-05,
+      "loss": 0.5685,
+      "step": 173
+    },
+    {
+      "epoch": 0.655367231638418,
+      "grad_norm": 0.37616080045700073,
+      "learning_rate": 1.3930526315789474e-05,
+      "loss": 0.5859,
+      "step": 174
+    },
+    {
+      "epoch": 0.6591337099811676,
+      "grad_norm": 0.3933235704898834,
+      "learning_rate": 1.3394736842105261e-05,
+      "loss": 0.5828,
+      "step": 175
+    },
+    {
+      "epoch": 0.6629001883239172,
+      "grad_norm": 0.42634278535842896,
+      "learning_rate": 1.2858947368421053e-05,
+      "loss": 0.605,
+      "step": 176
+    },
+    {
+      "epoch": 0.6666666666666666,
+      "grad_norm": 0.3862863779067993,
+      "learning_rate": 1.2323157894736842e-05,
+      "loss": 0.5609,
+      "step": 177
+    },
+    {
+      "epoch": 0.6704331450094162,
+      "grad_norm": 0.4162931740283966,
+      "learning_rate": 1.178736842105263e-05,
+      "loss": 0.6204,
+      "step": 178
+    },
+    {
+      "epoch": 0.6741996233521658,
+      "grad_norm": 0.36133626103401184,
+      "learning_rate": 1.125157894736842e-05,
+      "loss": 0.455,
+      "step": 179
+    },
+    {
+      "epoch": 0.6779661016949152,
+      "grad_norm": 0.4119521379470825,
+      "learning_rate": 1.0715789473684209e-05,
+      "loss": 0.4619,
+      "step": 180
+    },
+    {
+      "epoch": 0.6817325800376648,
+      "grad_norm": 0.42595550417900085,
+      "learning_rate": 1.018e-05,
+      "loss": 0.5468,
+      "step": 181
+    },
+    {
+      "epoch": 0.6854990583804144,
+      "grad_norm": 0.4045557379722595,
+      "learning_rate": 9.64421052631579e-06,
+      "loss": 0.6256,
+      "step": 182
+    },
+    {
+      "epoch": 0.6892655367231638,
+      "grad_norm": 0.39754652976989746,
+      "learning_rate": 9.108421052631578e-06,
+      "loss": 0.3705,
+      "step": 183
+    },
+    {
+      "epoch": 0.6930320150659134,
+      "grad_norm": 0.40420177578926086,
+      "learning_rate": 8.572631578947368e-06,
+      "loss": 0.5937,
+      "step": 184
+    },
+    {
+      "epoch": 0.696798493408663,
+      "grad_norm": 0.4251862168312073,
+      "learning_rate": 8.036842105263157e-06,
+      "loss": 0.5046,
+      "step": 185
+    },
+    {
+      "epoch": 0.7005649717514124,
+      "grad_norm": 0.48561224341392517,
+      "learning_rate": 7.501052631578946e-06,
+      "loss": 0.5133,
+      "step": 186
+    },
+    {
+      "epoch": 0.704331450094162,
+      "grad_norm": 0.487111359834671,
+      "learning_rate": 6.965263157894737e-06,
+      "loss": 0.587,
+      "step": 187
+    },
+    {
+      "epoch": 0.7080979284369114,
+      "grad_norm": 0.45947694778442383,
+      "learning_rate": 6.4294736842105265e-06,
+      "loss": 0.592,
+      "step": 188
+    },
+    {
+      "epoch": 0.711864406779661,
+      "grad_norm": 0.5125983357429504,
+      "learning_rate": 5.893684210526315e-06,
+      "loss": 0.4986,
+      "step": 189
+    },
+    {
+      "epoch": 0.7156308851224106,
+      "grad_norm": 0.4144290089607239,
+      "learning_rate": 5.3578947368421044e-06,
+      "loss": 0.3933,
+      "step": 190
+    },
+    {
+      "epoch": 0.71939736346516,
+      "grad_norm": 0.502846896648407,
+      "learning_rate": 4.822105263157895e-06,
+      "loss": 0.422,
+      "step": 191
+    },
+    {
+      "epoch": 0.7231638418079096,
+      "grad_norm": 0.47996261715888977,
+      "learning_rate": 4.286315789473684e-06,
+      "loss": 0.5033,
+      "step": 192
+    },
+    {
+      "epoch": 0.7269303201506592,
+      "grad_norm": 0.4302833378314972,
+      "learning_rate": 3.750526315789473e-06,
+      "loss": 0.3569,
+      "step": 193
+    },
+    {
+      "epoch": 0.7306967984934086,
+      "grad_norm": 0.47998151183128357,
+      "learning_rate": 3.2147368421052633e-06,
+      "loss": 0.3189,
+      "step": 194
+    },
+    {
+      "epoch": 0.7344632768361582,
+      "grad_norm": 0.6263414025306702,
+      "learning_rate": 2.6789473684210522e-06,
+      "loss": 0.5123,
+      "step": 195
+    },
+    {
+      "epoch": 0.7382297551789078,
+      "grad_norm": 0.5459735989570618,
+      "learning_rate": 2.143157894736842e-06,
+      "loss": 0.4023,
+      "step": 196
+    },
+    {
+      "epoch": 0.7419962335216572,
+      "grad_norm": 0.6983940601348877,
+      "learning_rate": 1.6073684210526316e-06,
+      "loss": 0.6275,
+      "step": 197
+    },
+    {
+      "epoch": 0.7457627118644068,
+      "grad_norm": 0.667293906211853,
+      "learning_rate": 1.071578947368421e-06,
+      "loss": 0.5608,
+      "step": 198
+    },
+    {
+      "epoch": 0.7495291902071564,
+      "grad_norm": 0.9229950904846191,
+      "learning_rate": 5.357894736842105e-07,
+      "loss": 0.4369,
+      "step": 199
+    },
+    {
+      "epoch": 0.7532956685499058,
+      "grad_norm": 1.6464707851409912,
+      "learning_rate": 0.0,
+      "loss": 0.4719,
+      "step": 200
+    },
+    {
+      "epoch": 0.7532956685499058,
+      "eval_loss": 0.5769926309585571,
+      "eval_runtime": 2.5481,
+      "eval_samples_per_second": 43.954,
+      "eval_steps_per_second": 10.989,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.3771034188578816e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null