Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:10ad7dd5dab93baff8ab4200fee597bb9ba9de30f07968210705bdbc3353f2f2
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:489c464445b7d71c6bc0958673433d7a55eaa056f057a7c415b817cf452c7a43
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:404bf3aee8ccf09a689ca20f6656d263a3e16de1fcb9505a3db63b676675008a
 size 325339796

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d7fe452a53c6e6c6e32ddb798e96027abd9faaa9a643d96e286cd5e53fc77eb
 size 325339796

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7577594cb2e923f0986dcd66099d06bc38653ca89937b5665a0cf443b5a214d8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed4fa3443e28c3061ef70b82b3361b3c366300745782c921cc5ff4d66bdea1d8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48a1533051e4bee653afc683a4359c329f95831c0354ae8442616cabf80d0caa
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5efc880080e0854e5765c7a5ea108f46d0ccbf30b054c9bbb5a2162a1c5babf
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.4959875345230103,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.12886597938144329,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 20.257,
       "eval_steps_per_second": 5.085,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.4914993790976e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.4909777641296387,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.1718213058419244,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 20.257,
       "eval_steps_per_second": 5.085,
       "step": 150
+    },
+    {
+      "epoch": 0.12972508591065293,
+      "grad_norm": 0.4515070617198944,
+      "learning_rate": 2.5815263157894736e-05,
+      "loss": 1.4754,
+      "step": 151
+    },
+    {
+      "epoch": 0.13058419243986255,
+      "grad_norm": 0.3967503011226654,
+      "learning_rate": 2.528842105263158e-05,
+      "loss": 1.163,
+      "step": 152
+    },
+    {
+      "epoch": 0.13144329896907217,
+      "grad_norm": 0.4591720998287201,
+      "learning_rate": 2.4761578947368418e-05,
+      "loss": 1.3497,
+      "step": 153
+    },
+    {
+      "epoch": 0.1323024054982818,
+      "grad_norm": 0.47611069679260254,
+      "learning_rate": 2.423473684210526e-05,
+      "loss": 1.2411,
+      "step": 154
+    },
+    {
+      "epoch": 0.1331615120274914,
+      "grad_norm": 0.445859432220459,
+      "learning_rate": 2.3707894736842103e-05,
+      "loss": 1.3745,
+      "step": 155
+    },
+    {
+      "epoch": 0.13402061855670103,
+      "grad_norm": 0.4517122507095337,
+      "learning_rate": 2.3181052631578946e-05,
+      "loss": 1.4243,
+      "step": 156
+    },
+    {
+      "epoch": 0.13487972508591065,
+      "grad_norm": 0.48769956827163696,
+      "learning_rate": 2.265421052631579e-05,
+      "loss": 1.4979,
+      "step": 157
+    },
+    {
+      "epoch": 0.13573883161512026,
+      "grad_norm": 0.3892180323600769,
+      "learning_rate": 2.212736842105263e-05,
+      "loss": 1.368,
+      "step": 158
+    },
+    {
+      "epoch": 0.13659793814432988,
+      "grad_norm": 0.4427521228790283,
+      "learning_rate": 2.1600526315789474e-05,
+      "loss": 1.6416,
+      "step": 159
+    },
+    {
+      "epoch": 0.13745704467353953,
+      "grad_norm": 0.41149961948394775,
+      "learning_rate": 2.1073684210526313e-05,
+      "loss": 1.3097,
+      "step": 160
+    },
+    {
+      "epoch": 0.13831615120274915,
+      "grad_norm": 0.44827204942703247,
+      "learning_rate": 2.0546842105263155e-05,
+      "loss": 1.488,
+      "step": 161
+    },
+    {
+      "epoch": 0.13917525773195877,
+      "grad_norm": 0.4299844801425934,
+      "learning_rate": 2.002e-05,
+      "loss": 1.4034,
+      "step": 162
+    },
+    {
+      "epoch": 0.1400343642611684,
+      "grad_norm": 0.4039885997772217,
+      "learning_rate": 1.949315789473684e-05,
+      "loss": 1.3447,
+      "step": 163
+    },
+    {
+      "epoch": 0.140893470790378,
+      "grad_norm": 0.43283891677856445,
+      "learning_rate": 1.8966315789473683e-05,
+      "loss": 1.3755,
+      "step": 164
+    },
+    {
+      "epoch": 0.14175257731958762,
+      "grad_norm": 0.4442145526409149,
+      "learning_rate": 1.8439473684210522e-05,
+      "loss": 1.6471,
+      "step": 165
+    },
+    {
+      "epoch": 0.14261168384879724,
+      "grad_norm": 0.42544904351234436,
+      "learning_rate": 1.791263157894737e-05,
+      "loss": 1.5591,
+      "step": 166
+    },
+    {
+      "epoch": 0.14347079037800686,
+      "grad_norm": 0.4591159522533417,
+      "learning_rate": 1.738578947368421e-05,
+      "loss": 1.4695,
+      "step": 167
+    },
+    {
+      "epoch": 0.14432989690721648,
+      "grad_norm": 0.4475899636745453,
+      "learning_rate": 1.685894736842105e-05,
+      "loss": 1.7011,
+      "step": 168
+    },
+    {
+      "epoch": 0.14518900343642613,
+      "grad_norm": 0.3987146019935608,
+      "learning_rate": 1.6332105263157893e-05,
+      "loss": 1.3711,
+      "step": 169
+    },
+    {
+      "epoch": 0.14604810996563575,
+      "grad_norm": 0.4292147159576416,
+      "learning_rate": 1.5805263157894735e-05,
+      "loss": 1.5364,
+      "step": 170
+    },
+    {
+      "epoch": 0.14690721649484537,
+      "grad_norm": 0.4621577262878418,
+      "learning_rate": 1.5278421052631578e-05,
+      "loss": 1.5781,
+      "step": 171
+    },
+    {
+      "epoch": 0.14776632302405499,
+      "grad_norm": 0.4481081962585449,
+      "learning_rate": 1.4751578947368419e-05,
+      "loss": 1.5406,
+      "step": 172
+    },
+    {
+      "epoch": 0.1486254295532646,
+      "grad_norm": 0.4390103220939636,
+      "learning_rate": 1.4224736842105262e-05,
+      "loss": 1.549,
+      "step": 173
+    },
+    {
+      "epoch": 0.14948453608247422,
+      "grad_norm": 0.41087478399276733,
+      "learning_rate": 1.3697894736842106e-05,
+      "loss": 1.4282,
+      "step": 174
+    },
+    {
+      "epoch": 0.15034364261168384,
+      "grad_norm": 0.4323715269565582,
+      "learning_rate": 1.3171052631578945e-05,
+      "loss": 1.421,
+      "step": 175
+    },
+    {
+      "epoch": 0.15120274914089346,
+      "grad_norm": 0.4265320897102356,
+      "learning_rate": 1.264421052631579e-05,
+      "loss": 1.4488,
+      "step": 176
+    },
+    {
+      "epoch": 0.15206185567010308,
+      "grad_norm": 0.4511622190475464,
+      "learning_rate": 1.211736842105263e-05,
+      "loss": 1.5736,
+      "step": 177
+    },
+    {
+      "epoch": 0.15292096219931273,
+      "grad_norm": 0.417791485786438,
+      "learning_rate": 1.1590526315789473e-05,
+      "loss": 1.4141,
+      "step": 178
+    },
+    {
+      "epoch": 0.15378006872852235,
+      "grad_norm": 0.4172487258911133,
+      "learning_rate": 1.1063684210526316e-05,
+      "loss": 1.3984,
+      "step": 179
+    },
+    {
+      "epoch": 0.15463917525773196,
+      "grad_norm": 0.42654547095298767,
+      "learning_rate": 1.0536842105263156e-05,
+      "loss": 1.4239,
+      "step": 180
+    },
+    {
+      "epoch": 0.15549828178694158,
+      "grad_norm": 0.4155035614967346,
+      "learning_rate": 1.001e-05,
+      "loss": 1.51,
+      "step": 181
+    },
+    {
+      "epoch": 0.1563573883161512,
+      "grad_norm": 0.39767947793006897,
+      "learning_rate": 9.483157894736842e-06,
+      "loss": 1.4256,
+      "step": 182
+    },
+    {
+      "epoch": 0.15721649484536082,
+      "grad_norm": 0.4175090789794922,
+      "learning_rate": 8.956315789473684e-06,
+      "loss": 1.3716,
+      "step": 183
+    },
+    {
+      "epoch": 0.15807560137457044,
+      "grad_norm": 0.44680315256118774,
+      "learning_rate": 8.429473684210525e-06,
+      "loss": 1.5732,
+      "step": 184
+    },
+    {
+      "epoch": 0.15893470790378006,
+      "grad_norm": 0.4153236150741577,
+      "learning_rate": 7.902631578947368e-06,
+      "loss": 1.5334,
+      "step": 185
+    },
+    {
+      "epoch": 0.15979381443298968,
+      "grad_norm": 0.4097844660282135,
+      "learning_rate": 7.3757894736842095e-06,
+      "loss": 1.4617,
+      "step": 186
+    },
+    {
+      "epoch": 0.16065292096219932,
+      "grad_norm": 0.44263848662376404,
+      "learning_rate": 6.848947368421053e-06,
+      "loss": 1.5138,
+      "step": 187
+    },
+    {
+      "epoch": 0.16151202749140894,
+      "grad_norm": 0.3853647708892822,
+      "learning_rate": 6.322105263157895e-06,
+      "loss": 1.3983,
+      "step": 188
+    },
+    {
+      "epoch": 0.16237113402061856,
+      "grad_norm": 0.4370990991592407,
+      "learning_rate": 5.7952631578947365e-06,
+      "loss": 1.4268,
+      "step": 189
+    },
+    {
+      "epoch": 0.16323024054982818,
+      "grad_norm": 0.437597393989563,
+      "learning_rate": 5.268421052631578e-06,
+      "loss": 1.3218,
+      "step": 190
+    },
+    {
+      "epoch": 0.1640893470790378,
+      "grad_norm": 0.49196720123291016,
+      "learning_rate": 4.741578947368421e-06,
+      "loss": 1.5127,
+      "step": 191
+    },
+    {
+      "epoch": 0.16494845360824742,
+      "grad_norm": 0.450184166431427,
+      "learning_rate": 4.2147368421052626e-06,
+      "loss": 1.3564,
+      "step": 192
+    },
+    {
+      "epoch": 0.16580756013745704,
+      "grad_norm": 0.5065258145332336,
+      "learning_rate": 3.6878947368421047e-06,
+      "loss": 1.6407,
+      "step": 193
+    },
+    {
+      "epoch": 0.16666666666666666,
+      "grad_norm": 0.5404831767082214,
+      "learning_rate": 3.1610526315789474e-06,
+      "loss": 1.6031,
+      "step": 194
+    },
+    {
+      "epoch": 0.16752577319587628,
+      "grad_norm": 0.5111208558082581,
+      "learning_rate": 2.634210526315789e-06,
+      "loss": 1.4117,
+      "step": 195
+    },
+    {
+      "epoch": 0.16838487972508592,
+      "grad_norm": 0.628701388835907,
+      "learning_rate": 2.1073684210526313e-06,
+      "loss": 1.563,
+      "step": 196
+    },
+    {
+      "epoch": 0.16924398625429554,
+      "grad_norm": 0.591088056564331,
+      "learning_rate": 1.5805263157894737e-06,
+      "loss": 1.4572,
+      "step": 197
+    },
+    {
+      "epoch": 0.17010309278350516,
+      "grad_norm": 0.6137686371803284,
+      "learning_rate": 1.0536842105263156e-06,
+      "loss": 1.473,
+      "step": 198
+    },
+    {
+      "epoch": 0.17096219931271478,
+      "grad_norm": 0.787673830986023,
+      "learning_rate": 5.268421052631578e-07,
+      "loss": 1.5638,
+      "step": 199
+    },
+    {
+      "epoch": 0.1718213058419244,
+      "grad_norm": 0.8384268283843994,
+      "learning_rate": 0.0,
+      "loss": 1.466,
+      "step": 200
+    },
+    {
+      "epoch": 0.1718213058419244,
+      "eval_loss": 1.4909777641296387,
+      "eval_runtime": 24.2168,
+      "eval_samples_per_second": 20.234,
+      "eval_steps_per_second": 5.079,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.9886658387968e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null