Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d02c6665831f570801834521204d5417ef6480222eb8c5047a172edbbb560c77
 size 985240

 version https://git-lfs.github.com/spec/v1
+oid sha256:76ed1c8bf834c1ddffa72c6a4dd7144f4071a274d1b44a66fdc3f4ff4fd2ed49
 size 985240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4a4533b9ed586b01f4eb84b69a7330c82df8cdbb6f92d12a65514f0ef9995a7d
 size 520860

 version https://git-lfs.github.com/spec/v1
+oid sha256:d931569a897686c822497d8e474a6521edbe2db43ecbeb8e873474025c800189
 size 520860

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:520ed0f0305e51b9eba187b564a25f613b36cf1399f5af5141d8672ac8c7a0ad
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:163e15ce907d9da1284a6876075df6feaab3d046987352c38c44629120d62edf
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:22c9dfa784729c93db12d225bfc25c64e7ae0e1e9f4be7b45dc255fae6ea42c4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e90d1a9917ccbc9819e77d754e81c093aaa1bcc13b46bc6c7bb4bcae17159bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.51091194152832,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.016938625712833832,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 123.448,
       "eval_steps_per_second": 30.887,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4493817151488.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.495820045471191,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.022584834283778443,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 123.448,
       "eval_steps_per_second": 30.887,
       "step": 150
+    },
+    {
+      "epoch": 0.017051549884252726,
+      "grad_norm": 1.5686991214752197,
+      "learning_rate": 2.586684210526316e-05,
+      "loss": 21.5102,
+      "step": 151
+    },
+    {
+      "epoch": 0.017164474055671616,
+      "grad_norm": 1.539154052734375,
+      "learning_rate": 2.5338947368421054e-05,
+      "loss": 21.5452,
+      "step": 152
+    },
+    {
+      "epoch": 0.01727739822709051,
+      "grad_norm": 2.048976182937622,
+      "learning_rate": 2.4811052631578945e-05,
+      "loss": 21.3931,
+      "step": 153
+    },
+    {
+      "epoch": 0.017390322398509402,
+      "grad_norm": 1.514927864074707,
+      "learning_rate": 2.428315789473684e-05,
+      "loss": 21.5441,
+      "step": 154
+    },
+    {
+      "epoch": 0.017503246569928292,
+      "grad_norm": 2.028735637664795,
+      "learning_rate": 2.3755263157894736e-05,
+      "loss": 21.3943,
+      "step": 155
+    },
+    {
+      "epoch": 0.017616170741347185,
+      "grad_norm": 2.3182790279388428,
+      "learning_rate": 2.322736842105263e-05,
+      "loss": 21.2646,
+      "step": 156
+    },
+    {
+      "epoch": 0.01772909491276608,
+      "grad_norm": 2.041980743408203,
+      "learning_rate": 2.2699473684210526e-05,
+      "loss": 21.3444,
+      "step": 157
+    },
+    {
+      "epoch": 0.01784201908418497,
+      "grad_norm": 1.94465172290802,
+      "learning_rate": 2.217157894736842e-05,
+      "loss": 21.1919,
+      "step": 158
+    },
+    {
+      "epoch": 0.017954943255603862,
+      "grad_norm": 2.494795799255371,
+      "learning_rate": 2.1643684210526316e-05,
+      "loss": 21.1641,
+      "step": 159
+    },
+    {
+      "epoch": 0.018067867427022755,
+      "grad_norm": 1.9524977207183838,
+      "learning_rate": 2.1115789473684208e-05,
+      "loss": 21.1044,
+      "step": 160
+    },
+    {
+      "epoch": 0.018180791598441645,
+      "grad_norm": 1.5885933637619019,
+      "learning_rate": 2.0587894736842106e-05,
+      "loss": 20.8469,
+      "step": 161
+    },
+    {
+      "epoch": 0.01829371576986054,
+      "grad_norm": 1.4998608827590942,
+      "learning_rate": 2.006e-05,
+      "loss": 20.8448,
+      "step": 162
+    },
+    {
+      "epoch": 0.018406639941279432,
+      "grad_norm": 1.567491054534912,
+      "learning_rate": 1.9532105263157893e-05,
+      "loss": 20.8049,
+      "step": 163
+    },
+    {
+      "epoch": 0.01851956411269832,
+      "grad_norm": 1.4419852495193481,
+      "learning_rate": 1.900421052631579e-05,
+      "loss": 21.0469,
+      "step": 164
+    },
+    {
+      "epoch": 0.018632488284117215,
+      "grad_norm": 1.531498670578003,
+      "learning_rate": 1.8476315789473683e-05,
+      "loss": 20.7183,
+      "step": 165
+    },
+    {
+      "epoch": 0.01874541245553611,
+      "grad_norm": 1.39108407497406,
+      "learning_rate": 1.7948421052631578e-05,
+      "loss": 20.9409,
+      "step": 166
+    },
+    {
+      "epoch": 0.018858336626954998,
+      "grad_norm": 1.4878839254379272,
+      "learning_rate": 1.7420526315789473e-05,
+      "loss": 20.8906,
+      "step": 167
+    },
+    {
+      "epoch": 0.01897126079837389,
+      "grad_norm": 1.54227876663208,
+      "learning_rate": 1.6892631578947368e-05,
+      "loss": 20.8181,
+      "step": 168
+    },
+    {
+      "epoch": 0.019084184969792785,
+      "grad_norm": 1.6588259935379028,
+      "learning_rate": 1.6364736842105263e-05,
+      "loss": 20.6031,
+      "step": 169
+    },
+    {
+      "epoch": 0.019197109141211675,
+      "grad_norm": 1.6546351909637451,
+      "learning_rate": 1.5836842105263158e-05,
+      "loss": 20.837,
+      "step": 170
+    },
+    {
+      "epoch": 0.019310033312630568,
+      "grad_norm": 1.4789516925811768,
+      "learning_rate": 1.5308947368421053e-05,
+      "loss": 20.8276,
+      "step": 171
+    },
+    {
+      "epoch": 0.01942295748404946,
+      "grad_norm": 1.5245800018310547,
+      "learning_rate": 1.4781052631578945e-05,
+      "loss": 20.5968,
+      "step": 172
+    },
+    {
+      "epoch": 0.019535881655468355,
+      "grad_norm": 1.5665831565856934,
+      "learning_rate": 1.4253157894736842e-05,
+      "loss": 20.7256,
+      "step": 173
+    },
+    {
+      "epoch": 0.019648805826887245,
+      "grad_norm": 1.4446725845336914,
+      "learning_rate": 1.3725263157894737e-05,
+      "loss": 20.7415,
+      "step": 174
+    },
+    {
+      "epoch": 0.019761729998306138,
+      "grad_norm": 1.7932209968566895,
+      "learning_rate": 1.319736842105263e-05,
+      "loss": 20.6939,
+      "step": 175
+    },
+    {
+      "epoch": 0.01987465416972503,
+      "grad_norm": 1.6645069122314453,
+      "learning_rate": 1.2669473684210527e-05,
+      "loss": 21.1305,
+      "step": 176
+    },
+    {
+      "epoch": 0.01998757834114392,
+      "grad_norm": 1.72091805934906,
+      "learning_rate": 1.214157894736842e-05,
+      "loss": 20.7073,
+      "step": 177
+    },
+    {
+      "epoch": 0.020100502512562814,
+      "grad_norm": 1.6310333013534546,
+      "learning_rate": 1.1613684210526315e-05,
+      "loss": 20.905,
+      "step": 178
+    },
+    {
+      "epoch": 0.020213426683981708,
+      "grad_norm": 1.5467758178710938,
+      "learning_rate": 1.108578947368421e-05,
+      "loss": 20.7847,
+      "step": 179
+    },
+    {
+      "epoch": 0.020326350855400598,
+      "grad_norm": 1.7597756385803223,
+      "learning_rate": 1.0557894736842104e-05,
+      "loss": 21.0582,
+      "step": 180
+    },
+    {
+      "epoch": 0.02043927502681949,
+      "grad_norm": 1.6882752180099487,
+      "learning_rate": 1.003e-05,
+      "loss": 21.0327,
+      "step": 181
+    },
+    {
+      "epoch": 0.020552199198238384,
+      "grad_norm": 1.6082948446273804,
+      "learning_rate": 9.502105263157896e-06,
+      "loss": 20.654,
+      "step": 182
+    },
+    {
+      "epoch": 0.020665123369657274,
+      "grad_norm": 1.5289745330810547,
+      "learning_rate": 8.974210526315789e-06,
+      "loss": 20.9506,
+      "step": 183
+    },
+    {
+      "epoch": 0.020778047541076167,
+      "grad_norm": 1.6904937028884888,
+      "learning_rate": 8.446315789473684e-06,
+      "loss": 20.8622,
+      "step": 184
+    },
+    {
+      "epoch": 0.02089097171249506,
+      "grad_norm": 1.7416986227035522,
+      "learning_rate": 7.918421052631579e-06,
+      "loss": 21.0439,
+      "step": 185
+    },
+    {
+      "epoch": 0.02100389588391395,
+      "grad_norm": 1.66006600856781,
+      "learning_rate": 7.3905263157894725e-06,
+      "loss": 20.7495,
+      "step": 186
+    },
+    {
+      "epoch": 0.021116820055332844,
+      "grad_norm": 1.7359073162078857,
+      "learning_rate": 6.862631578947368e-06,
+      "loss": 21.2057,
+      "step": 187
+    },
+    {
+      "epoch": 0.021229744226751737,
+      "grad_norm": 1.7616182565689087,
+      "learning_rate": 6.3347368421052634e-06,
+      "loss": 21.1366,
+      "step": 188
+    },
+    {
+      "epoch": 0.021342668398170627,
+      "grad_norm": 1.7205109596252441,
+      "learning_rate": 5.806842105263158e-06,
+      "loss": 20.7864,
+      "step": 189
+    },
+    {
+      "epoch": 0.02145559256958952,
+      "grad_norm": 1.669534683227539,
+      "learning_rate": 5.278947368421052e-06,
+      "loss": 20.9223,
+      "step": 190
+    },
+    {
+      "epoch": 0.021568516741008414,
+      "grad_norm": 1.515965461730957,
+      "learning_rate": 4.751052631578948e-06,
+      "loss": 20.8241,
+      "step": 191
+    },
+    {
+      "epoch": 0.021681440912427304,
+      "grad_norm": 1.6119455099105835,
+      "learning_rate": 4.223157894736842e-06,
+      "loss": 21.1209,
+      "step": 192
+    },
+    {
+      "epoch": 0.021794365083846197,
+      "grad_norm": 1.6289201974868774,
+      "learning_rate": 3.6952631578947362e-06,
+      "loss": 20.7822,
+      "step": 193
+    },
+    {
+      "epoch": 0.02190728925526509,
+      "grad_norm": 1.5696207284927368,
+      "learning_rate": 3.1673684210526317e-06,
+      "loss": 20.9989,
+      "step": 194
+    },
+    {
+      "epoch": 0.02202021342668398,
+      "grad_norm": 1.5837267637252808,
+      "learning_rate": 2.639473684210526e-06,
+      "loss": 20.9296,
+      "step": 195
+    },
+    {
+      "epoch": 0.022133137598102873,
+      "grad_norm": 1.6385504007339478,
+      "learning_rate": 2.111578947368421e-06,
+      "loss": 20.8717,
+      "step": 196
+    },
+    {
+      "epoch": 0.022246061769521767,
+      "grad_norm": 1.7605206966400146,
+      "learning_rate": 1.5836842105263159e-06,
+      "loss": 20.9795,
+      "step": 197
+    },
+    {
+      "epoch": 0.02235898594094066,
+      "grad_norm": 1.8636879920959473,
+      "learning_rate": 1.0557894736842105e-06,
+      "loss": 20.8952,
+      "step": 198
+    },
+    {
+      "epoch": 0.02247191011235955,
+      "grad_norm": 1.7859822511672974,
+      "learning_rate": 5.278947368421053e-07,
+      "loss": 20.9202,
+      "step": 199
+    },
+    {
+      "epoch": 0.022584834283778443,
+      "grad_norm": 2.1079020500183105,
+      "learning_rate": 0.0,
+      "loss": 20.7932,
+      "step": 200
+    },
+    {
+      "epoch": 0.022584834283778443,
+      "eval_loss": 10.495820045471191,
+      "eval_runtime": 30.1559,
+      "eval_samples_per_second": 123.657,
+      "eval_steps_per_second": 30.939,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5986779660288.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null