Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a7393e5c00e1a4837167da3a32da1c66d6f3fb24f2e4c09f97bfcab0c95d5ed
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:e5e2ec1850e37da4d606b15b239565dc7d2195889318e95f09ff47a2b56bdf5a
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:36904b8d817034cf46f27c894daebf1498070caf461522cba5fe2132c920efb6
 size 325339796

 version https://git-lfs.github.com/spec/v1
+oid sha256:edbbeb91b9acc36b6128276221b4d4475157041c403f44e86f9d3093627f4241
 size 325339796

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:859c61489e5e4fba44b8a27205f5c5a8b5ed7faa38b59ff24fdbfb836a1ea105
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:6f05ba2b816f0be0c117eced808cdd7142d5fa7e4eab2cdada10ff4f13738be1
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ef91cf09f7b6a58a39bbfe1ef78ac2fa91c0c15ca1705097a187d272d0433d8d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c47edf82128c3f034f21204d4b9ce5c76cd3269748ce31061b148a0a389d049d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.389584481716156,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.5649717514124294,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 5.794,
       "eval_steps_per_second": 1.448,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.8709111734272e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.3771364390850067,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.7532956685499058,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 5.794,
       "eval_steps_per_second": 1.448,
       "step": 150
+    },
+    {
+      "epoch": 0.568738229755179,
+      "grad_norm": 0.26727038621902466,
+      "learning_rate": 2.5944210526315793e-05,
+      "loss": 0.4036,
+      "step": 151
+    },
+    {
+      "epoch": 0.5725047080979284,
+      "grad_norm": 0.24589797854423523,
+      "learning_rate": 2.5414736842105266e-05,
+      "loss": 0.3443,
+      "step": 152
+    },
+    {
+      "epoch": 0.576271186440678,
+      "grad_norm": 0.3061113953590393,
+      "learning_rate": 2.4885263157894737e-05,
+      "loss": 0.4185,
+      "step": 153
+    },
+    {
+      "epoch": 0.5800376647834274,
+      "grad_norm": 0.290894091129303,
+      "learning_rate": 2.4355789473684214e-05,
+      "loss": 0.4565,
+      "step": 154
+    },
+    {
+      "epoch": 0.583804143126177,
+      "grad_norm": 0.3391144275665283,
+      "learning_rate": 2.3826315789473684e-05,
+      "loss": 0.4667,
+      "step": 155
+    },
+    {
+      "epoch": 0.5875706214689266,
+      "grad_norm": 0.30165934562683105,
+      "learning_rate": 2.3296842105263158e-05,
+      "loss": 0.4308,
+      "step": 156
+    },
+    {
+      "epoch": 0.591337099811676,
+      "grad_norm": 0.2832431495189667,
+      "learning_rate": 2.2767368421052635e-05,
+      "loss": 0.3862,
+      "step": 157
+    },
+    {
+      "epoch": 0.5951035781544256,
+      "grad_norm": 0.28951308131217957,
+      "learning_rate": 2.2237894736842105e-05,
+      "loss": 0.3645,
+      "step": 158
+    },
+    {
+      "epoch": 0.5988700564971752,
+      "grad_norm": 0.33745133876800537,
+      "learning_rate": 2.170842105263158e-05,
+      "loss": 0.4001,
+      "step": 159
+    },
+    {
+      "epoch": 0.6026365348399246,
+      "grad_norm": 0.31061139702796936,
+      "learning_rate": 2.1178947368421053e-05,
+      "loss": 0.3586,
+      "step": 160
+    },
+    {
+      "epoch": 0.6064030131826742,
+      "grad_norm": 0.3397316038608551,
+      "learning_rate": 2.0649473684210527e-05,
+      "loss": 0.4179,
+      "step": 161
+    },
+    {
+      "epoch": 0.6101694915254238,
+      "grad_norm": 0.3633480966091156,
+      "learning_rate": 2.0120000000000004e-05,
+      "loss": 0.458,
+      "step": 162
+    },
+    {
+      "epoch": 0.6139359698681732,
+      "grad_norm": 0.3560391068458557,
+      "learning_rate": 1.9590526315789474e-05,
+      "loss": 0.3905,
+      "step": 163
+    },
+    {
+      "epoch": 0.6177024482109228,
+      "grad_norm": 0.37587136030197144,
+      "learning_rate": 1.9061052631578948e-05,
+      "loss": 0.4476,
+      "step": 164
+    },
+    {
+      "epoch": 0.6214689265536724,
+      "grad_norm": 0.3361068665981293,
+      "learning_rate": 1.8531578947368422e-05,
+      "loss": 0.4138,
+      "step": 165
+    },
+    {
+      "epoch": 0.6252354048964218,
+      "grad_norm": 0.3378278911113739,
+      "learning_rate": 1.8002105263157896e-05,
+      "loss": 0.3866,
+      "step": 166
+    },
+    {
+      "epoch": 0.6290018832391714,
+      "grad_norm": 0.38653016090393066,
+      "learning_rate": 1.747263157894737e-05,
+      "loss": 0.4981,
+      "step": 167
+    },
+    {
+      "epoch": 0.632768361581921,
+      "grad_norm": 0.35619762539863586,
+      "learning_rate": 1.6943157894736843e-05,
+      "loss": 0.3997,
+      "step": 168
+    },
+    {
+      "epoch": 0.6365348399246704,
+      "grad_norm": 0.3622172474861145,
+      "learning_rate": 1.6413684210526317e-05,
+      "loss": 0.4018,
+      "step": 169
+    },
+    {
+      "epoch": 0.64030131826742,
+      "grad_norm": 0.3765234351158142,
+      "learning_rate": 1.588421052631579e-05,
+      "loss": 0.302,
+      "step": 170
+    },
+    {
+      "epoch": 0.6440677966101694,
+      "grad_norm": 0.36978664994239807,
+      "learning_rate": 1.5354736842105264e-05,
+      "loss": 0.3831,
+      "step": 171
+    },
+    {
+      "epoch": 0.647834274952919,
+      "grad_norm": 0.35858282446861267,
+      "learning_rate": 1.4825263157894736e-05,
+      "loss": 0.3773,
+      "step": 172
+    },
+    {
+      "epoch": 0.6516007532956686,
+      "grad_norm": 0.32193654775619507,
+      "learning_rate": 1.4295789473684212e-05,
+      "loss": 0.3053,
+      "step": 173
+    },
+    {
+      "epoch": 0.655367231638418,
+      "grad_norm": 0.37463074922561646,
+      "learning_rate": 1.3766315789473686e-05,
+      "loss": 0.3728,
+      "step": 174
+    },
+    {
+      "epoch": 0.6591337099811676,
+      "grad_norm": 0.3786180019378662,
+      "learning_rate": 1.3236842105263158e-05,
+      "loss": 0.3157,
+      "step": 175
+    },
+    {
+      "epoch": 0.6629001883239172,
+      "grad_norm": 0.3969653248786926,
+      "learning_rate": 1.2707368421052633e-05,
+      "loss": 0.4222,
+      "step": 176
+    },
+    {
+      "epoch": 0.6666666666666666,
+      "grad_norm": 0.38620397448539734,
+      "learning_rate": 1.2177894736842107e-05,
+      "loss": 0.3351,
+      "step": 177
+    },
+    {
+      "epoch": 0.6704331450094162,
+      "grad_norm": 0.3854876756668091,
+      "learning_rate": 1.1648421052631579e-05,
+      "loss": 0.3766,
+      "step": 178
+    },
+    {
+      "epoch": 0.6741996233521658,
+      "grad_norm": 0.45001623034477234,
+      "learning_rate": 1.1118947368421053e-05,
+      "loss": 0.5356,
+      "step": 179
+    },
+    {
+      "epoch": 0.6779661016949152,
+      "grad_norm": 0.5555963516235352,
+      "learning_rate": 1.0589473684210526e-05,
+      "loss": 0.3512,
+      "step": 180
+    },
+    {
+      "epoch": 0.6817325800376648,
+      "grad_norm": 0.385834276676178,
+      "learning_rate": 1.0060000000000002e-05,
+      "loss": 0.3159,
+      "step": 181
+    },
+    {
+      "epoch": 0.6854990583804144,
+      "grad_norm": 0.4277232587337494,
+      "learning_rate": 9.530526315789474e-06,
+      "loss": 0.3922,
+      "step": 182
+    },
+    {
+      "epoch": 0.6892655367231638,
+      "grad_norm": 0.4338568449020386,
+      "learning_rate": 9.001052631578948e-06,
+      "loss": 0.3263,
+      "step": 183
+    },
+    {
+      "epoch": 0.6930320150659134,
+      "grad_norm": 0.4069223701953888,
+      "learning_rate": 8.471578947368422e-06,
+      "loss": 0.3175,
+      "step": 184
+    },
+    {
+      "epoch": 0.696798493408663,
+      "grad_norm": 0.47060683369636536,
+      "learning_rate": 7.942105263157895e-06,
+      "loss": 0.3415,
+      "step": 185
+    },
+    {
+      "epoch": 0.7005649717514124,
+      "grad_norm": 0.43340185284614563,
+      "learning_rate": 7.412631578947368e-06,
+      "loss": 0.3047,
+      "step": 186
+    },
+    {
+      "epoch": 0.704331450094162,
+      "grad_norm": 0.5115451812744141,
+      "learning_rate": 6.883157894736843e-06,
+      "loss": 0.3594,
+      "step": 187
+    },
+    {
+      "epoch": 0.7080979284369114,
+      "grad_norm": 0.4953676164150238,
+      "learning_rate": 6.3536842105263166e-06,
+      "loss": 0.4087,
+      "step": 188
+    },
+    {
+      "epoch": 0.711864406779661,
+      "grad_norm": 0.531154990196228,
+      "learning_rate": 5.8242105263157895e-06,
+      "loss": 0.2525,
+      "step": 189
+    },
+    {
+      "epoch": 0.7156308851224106,
+      "grad_norm": 0.4638945758342743,
+      "learning_rate": 5.294736842105263e-06,
+      "loss": 0.2697,
+      "step": 190
+    },
+    {
+      "epoch": 0.71939736346516,
+      "grad_norm": 0.5047771334648132,
+      "learning_rate": 4.765263157894737e-06,
+      "loss": 0.3704,
+      "step": 191
+    },
+    {
+      "epoch": 0.7231638418079096,
+      "grad_norm": 0.5970916152000427,
+      "learning_rate": 4.235789473684211e-06,
+      "loss": 0.2877,
+      "step": 192
+    },
+    {
+      "epoch": 0.7269303201506592,
+      "grad_norm": 0.49476003646850586,
+      "learning_rate": 3.706315789473684e-06,
+      "loss": 0.3429,
+      "step": 193
+    },
+    {
+      "epoch": 0.7306967984934086,
+      "grad_norm": 0.5486244559288025,
+      "learning_rate": 3.1768421052631583e-06,
+      "loss": 0.3828,
+      "step": 194
+    },
+    {
+      "epoch": 0.7344632768361582,
+      "grad_norm": 0.6069429516792297,
+      "learning_rate": 2.6473684210526316e-06,
+      "loss": 0.2767,
+      "step": 195
+    },
+    {
+      "epoch": 0.7382297551789078,
+      "grad_norm": 0.6400809288024902,
+      "learning_rate": 2.1178947368421054e-06,
+      "loss": 0.3056,
+      "step": 196
+    },
+    {
+      "epoch": 0.7419962335216572,
+      "grad_norm": 0.5533387064933777,
+      "learning_rate": 1.5884210526315791e-06,
+      "loss": 0.2929,
+      "step": 197
+    },
+    {
+      "epoch": 0.7457627118644068,
+      "grad_norm": 0.68741774559021,
+      "learning_rate": 1.0589473684210527e-06,
+      "loss": 0.2843,
+      "step": 198
+    },
+    {
+      "epoch": 0.7495291902071564,
+      "grad_norm": 0.8008204102516174,
+      "learning_rate": 5.294736842105263e-07,
+      "loss": 0.2842,
+      "step": 199
+    },
+    {
+      "epoch": 0.7532956685499058,
+      "grad_norm": 0.8741005659103394,
+      "learning_rate": 0.0,
+      "loss": 0.2125,
+      "step": 200
+    },
+    {
+      "epoch": 0.7532956685499058,
+      "eval_loss": 0.3771364390850067,
+      "eval_runtime": 18.5487,
+      "eval_samples_per_second": 6.038,
+      "eval_steps_per_second": 1.51,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.7886300094464e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null