Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f5cbcefe544f62c5d9bff4ffaef3ee7700f9d1d2183909f81173d7fe2d0f974
 size 985240

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2e0b72fe2f0c4321a8d936437e0cf30977a6eb4ff6915ae8ee13101f6d9c1de
 size 985240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ac9ee895f07e084d10df3912d9dc2e2b6c7b6997b77cc46ba4f6ec16bfaee5da
 size 520860

 version https://git-lfs.github.com/spec/v1
+oid sha256:aca4c113273d4f26d7d6540b20edc9c6a3e7c30bbd91996dd1d9533c8bc09ef4
 size 520860

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e97bc478fe2ac7504e7e616cdab9b2772ca519cb3f42fed6b89a554bc1348874
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b4f794a92fa2b9f16fda328afadd716e28907db970d48c60a97f93197611bd7
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fcf03b4d5469a652184718e32be2e70c8f65db8a0bf0774f129b38a7ee64ae50
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:2eca4a3affde04f8094d886307f00c561b3e3a77570955ff1177732e06fbab0c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.558324813842773,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.015048154093097914,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 59.498,
       "eval_steps_per_second": 14.882,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 8957775052800.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.55037784576416,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.020064205457463884,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 59.498,
       "eval_steps_per_second": 14.882,
       "step": 150
+    },
+    {
+      "epoch": 0.015148475120385233,
+      "grad_norm": 0.8424636721611023,
+      "learning_rate": 2.6021578947368423e-05,
+      "loss": 20.936,
+      "step": 151
+    },
+    {
+      "epoch": 0.015248796147672551,
+      "grad_norm": 0.5448256731033325,
+      "learning_rate": 2.5490526315789475e-05,
+      "loss": 21.1742,
+      "step": 152
+    },
+    {
+      "epoch": 0.015349117174959872,
+      "grad_norm": 0.6612237095832825,
+      "learning_rate": 2.4959473684210524e-05,
+      "loss": 21.0709,
+      "step": 153
+    },
+    {
+      "epoch": 0.01544943820224719,
+      "grad_norm": 0.550390899181366,
+      "learning_rate": 2.442842105263158e-05,
+      "loss": 20.8933,
+      "step": 154
+    },
+    {
+      "epoch": 0.015549759229534511,
+      "grad_norm": 0.6066597700119019,
+      "learning_rate": 2.389736842105263e-05,
+      "loss": 20.9679,
+      "step": 155
+    },
+    {
+      "epoch": 0.01565008025682183,
+      "grad_norm": 0.595045804977417,
+      "learning_rate": 2.3366315789473685e-05,
+      "loss": 21.0887,
+      "step": 156
+    },
+    {
+      "epoch": 0.01575040128410915,
+      "grad_norm": 0.6363713145256042,
+      "learning_rate": 2.2835263157894738e-05,
+      "loss": 21.3898,
+      "step": 157
+    },
+    {
+      "epoch": 0.01585072231139647,
+      "grad_norm": 0.5982836484909058,
+      "learning_rate": 2.230421052631579e-05,
+      "loss": 21.1671,
+      "step": 158
+    },
+    {
+      "epoch": 0.015951043338683787,
+      "grad_norm": 0.7963234782218933,
+      "learning_rate": 2.1773157894736843e-05,
+      "loss": 20.9716,
+      "step": 159
+    },
+    {
+      "epoch": 0.016051364365971106,
+      "grad_norm": 0.647216796875,
+      "learning_rate": 2.1242105263157895e-05,
+      "loss": 21.1608,
+      "step": 160
+    },
+    {
+      "epoch": 0.016151685393258428,
+      "grad_norm": 0.5022075772285461,
+      "learning_rate": 2.0711052631578947e-05,
+      "loss": 21.1819,
+      "step": 161
+    },
+    {
+      "epoch": 0.016252006420545747,
+      "grad_norm": 0.5094108581542969,
+      "learning_rate": 2.018e-05,
+      "loss": 21.1568,
+      "step": 162
+    },
+    {
+      "epoch": 0.016352327447833066,
+      "grad_norm": 0.5434950590133667,
+      "learning_rate": 1.9648947368421052e-05,
+      "loss": 20.9586,
+      "step": 163
+    },
+    {
+      "epoch": 0.016452648475120384,
+      "grad_norm": 0.6874385476112366,
+      "learning_rate": 1.9117894736842105e-05,
+      "loss": 21.0375,
+      "step": 164
+    },
+    {
+      "epoch": 0.016552969502407703,
+      "grad_norm": 0.49630945920944214,
+      "learning_rate": 1.8586842105263157e-05,
+      "loss": 21.0352,
+      "step": 165
+    },
+    {
+      "epoch": 0.016653290529695025,
+      "grad_norm": 0.6111531257629395,
+      "learning_rate": 1.805578947368421e-05,
+      "loss": 21.2822,
+      "step": 166
+    },
+    {
+      "epoch": 0.016753611556982344,
+      "grad_norm": 0.5392615795135498,
+      "learning_rate": 1.7524736842105266e-05,
+      "loss": 21.1524,
+      "step": 167
+    },
+    {
+      "epoch": 0.016853932584269662,
+      "grad_norm": 0.5594942569732666,
+      "learning_rate": 1.6993684210526315e-05,
+      "loss": 21.1931,
+      "step": 168
+    },
+    {
+      "epoch": 0.01695425361155698,
+      "grad_norm": 0.5756310224533081,
+      "learning_rate": 1.646263157894737e-05,
+      "loss": 21.2089,
+      "step": 169
+    },
+    {
+      "epoch": 0.017054574638844303,
+      "grad_norm": 0.783043622970581,
+      "learning_rate": 1.593157894736842e-05,
+      "loss": 20.8715,
+      "step": 170
+    },
+    {
+      "epoch": 0.017154895666131622,
+      "grad_norm": 0.4898316562175751,
+      "learning_rate": 1.5400526315789475e-05,
+      "loss": 21.1136,
+      "step": 171
+    },
+    {
+      "epoch": 0.01725521669341894,
+      "grad_norm": 0.6354514360427856,
+      "learning_rate": 1.4869473684210524e-05,
+      "loss": 21.089,
+      "step": 172
+    },
+    {
+      "epoch": 0.01735553772070626,
+      "grad_norm": 0.6032927632331848,
+      "learning_rate": 1.4338421052631579e-05,
+      "loss": 21.0936,
+      "step": 173
+    },
+    {
+      "epoch": 0.017455858747993578,
+      "grad_norm": 0.6571072936058044,
+      "learning_rate": 1.3807368421052633e-05,
+      "loss": 20.9723,
+      "step": 174
+    },
+    {
+      "epoch": 0.0175561797752809,
+      "grad_norm": 0.5904538035392761,
+      "learning_rate": 1.3276315789473684e-05,
+      "loss": 21.287,
+      "step": 175
+    },
+    {
+      "epoch": 0.01765650080256822,
+      "grad_norm": 0.5847862362861633,
+      "learning_rate": 1.2745263157894738e-05,
+      "loss": 21.1966,
+      "step": 176
+    },
+    {
+      "epoch": 0.017756821829855537,
+      "grad_norm": 0.5706862211227417,
+      "learning_rate": 1.221421052631579e-05,
+      "loss": 20.9706,
+      "step": 177
+    },
+    {
+      "epoch": 0.017857142857142856,
+      "grad_norm": 0.5410795211791992,
+      "learning_rate": 1.1683157894736843e-05,
+      "loss": 21.2563,
+      "step": 178
+    },
+    {
+      "epoch": 0.01795746388443018,
+      "grad_norm": 0.5394900441169739,
+      "learning_rate": 1.1152105263157895e-05,
+      "loss": 20.9992,
+      "step": 179
+    },
+    {
+      "epoch": 0.018057784911717497,
+      "grad_norm": 0.45912498235702515,
+      "learning_rate": 1.0621052631578948e-05,
+      "loss": 21.2137,
+      "step": 180
+    },
+    {
+      "epoch": 0.018158105939004816,
+      "grad_norm": 0.5860676765441895,
+      "learning_rate": 1.009e-05,
+      "loss": 21.1744,
+      "step": 181
+    },
+    {
+      "epoch": 0.018258426966292134,
+      "grad_norm": 0.7394751310348511,
+      "learning_rate": 9.558947368421052e-06,
+      "loss": 20.9416,
+      "step": 182
+    },
+    {
+      "epoch": 0.018358747993579453,
+      "grad_norm": 0.6703020334243774,
+      "learning_rate": 9.027894736842105e-06,
+      "loss": 20.9801,
+      "step": 183
+    },
+    {
+      "epoch": 0.018459069020866775,
+      "grad_norm": 0.5259845852851868,
+      "learning_rate": 8.496842105263157e-06,
+      "loss": 21.0672,
+      "step": 184
+    },
+    {
+      "epoch": 0.018559390048154094,
+      "grad_norm": 0.47938376665115356,
+      "learning_rate": 7.96578947368421e-06,
+      "loss": 21.0379,
+      "step": 185
+    },
+    {
+      "epoch": 0.018659711075441412,
+      "grad_norm": 0.6665632128715515,
+      "learning_rate": 7.434736842105262e-06,
+      "loss": 21.3934,
+      "step": 186
+    },
+    {
+      "epoch": 0.01876003210272873,
+      "grad_norm": 0.6356412172317505,
+      "learning_rate": 6.903684210526316e-06,
+      "loss": 21.3995,
+      "step": 187
+    },
+    {
+      "epoch": 0.01886035313001605,
+      "grad_norm": 0.5531170964241028,
+      "learning_rate": 6.372631578947369e-06,
+      "loss": 21.177,
+      "step": 188
+    },
+    {
+      "epoch": 0.018960674157303372,
+      "grad_norm": 0.5264145731925964,
+      "learning_rate": 5.841578947368421e-06,
+      "loss": 21.2467,
+      "step": 189
+    },
+    {
+      "epoch": 0.01906099518459069,
+      "grad_norm": 0.5184823870658875,
+      "learning_rate": 5.310526315789474e-06,
+      "loss": 21.0768,
+      "step": 190
+    },
+    {
+      "epoch": 0.01916131621187801,
+      "grad_norm": 0.601334810256958,
+      "learning_rate": 4.779473684210526e-06,
+      "loss": 21.0318,
+      "step": 191
+    },
+    {
+      "epoch": 0.019261637239165328,
+      "grad_norm": 0.6639525890350342,
+      "learning_rate": 4.248421052631579e-06,
+      "loss": 20.9996,
+      "step": 192
+    },
+    {
+      "epoch": 0.01936195826645265,
+      "grad_norm": 0.47377097606658936,
+      "learning_rate": 3.717368421052631e-06,
+      "loss": 21.0164,
+      "step": 193
+    },
+    {
+      "epoch": 0.01946227929373997,
+      "grad_norm": 0.6908702254295349,
+      "learning_rate": 3.1863157894736844e-06,
+      "loss": 20.9247,
+      "step": 194
+    },
+    {
+      "epoch": 0.019562600321027288,
+      "grad_norm": 0.5369330048561096,
+      "learning_rate": 2.655263157894737e-06,
+      "loss": 21.3861,
+      "step": 195
+    },
+    {
+      "epoch": 0.019662921348314606,
+      "grad_norm": 0.6818935871124268,
+      "learning_rate": 2.1242105263157893e-06,
+      "loss": 21.0936,
+      "step": 196
+    },
+    {
+      "epoch": 0.019763242375601925,
+      "grad_norm": 0.6107151508331299,
+      "learning_rate": 1.5931578947368422e-06,
+      "loss": 21.0526,
+      "step": 197
+    },
+    {
+      "epoch": 0.019863563402889247,
+      "grad_norm": 0.6597663760185242,
+      "learning_rate": 1.0621052631578947e-06,
+      "loss": 21.1064,
+      "step": 198
+    },
+    {
+      "epoch": 0.019963884430176566,
+      "grad_norm": 0.6686668992042542,
+      "learning_rate": 5.310526315789473e-07,
+      "loss": 21.0729,
+      "step": 199
+    },
+    {
+      "epoch": 0.020064205457463884,
+      "grad_norm": 0.7321626543998718,
+      "learning_rate": 0.0,
+      "loss": 21.4709,
+      "step": 200
+    },
+    {
+      "epoch": 0.020064205457463884,
+      "eval_loss": 10.55037784576416,
+      "eval_runtime": 70.588,
+      "eval_samples_per_second": 59.472,
+      "eval_steps_per_second": 14.875,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 11943700070400.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null