Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d77ec05daa2b7045a98e3d4031a0ea6d8fcae23cdbbd11138345d867bbefb5c
 size 985240

 version https://git-lfs.github.com/spec/v1
+oid sha256:486c76cdea555b0d27f745c322cac30017e473fbc2c44b357a4b13bf105b550d
 size 985240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb1f3b3637ca6afeaeefb961523725e56c5e036f9252b63864cf0d5f14631ce2
 size 520860

 version https://git-lfs.github.com/spec/v1
+oid sha256:fd3b52bedf94f07ef8838018e3abf99150168455e600696ae67c6c00d350389d
 size 520860

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e97bc478fe2ac7504e7e616cdab9b2772ca519cb3f42fed6b89a554bc1348874
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b4f794a92fa2b9f16fda328afadd716e28907db970d48c60a97f93197611bd7
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8685a85e01d0081c4ee6b3d27083bc45de61653fc346f2b531f3e09e6eff0d83
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f96196bd3544de2c28f6af356470f327df948539b0e3259c46b8a6786b633fd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.564476013183594,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.015048154093097914,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 59.485,
       "eval_steps_per_second": 14.878,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 8957775052800.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.556661605834961,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.020064205457463884,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 59.485,
       "eval_steps_per_second": 14.878,
       "step": 150
+    },
+    {
+      "epoch": 0.015148475120385233,
+      "grad_norm": 0.8411390781402588,
+      "learning_rate": 2.599578947368421e-05,
+      "loss": 20.9485,
+      "step": 151
+    },
+    {
+      "epoch": 0.015248796147672551,
+      "grad_norm": 0.5667639970779419,
+      "learning_rate": 2.5465263157894738e-05,
+      "loss": 21.1919,
+      "step": 152
+    },
+    {
+      "epoch": 0.015349117174959872,
+      "grad_norm": 0.6572590470314026,
+      "learning_rate": 2.493473684210526e-05,
+      "loss": 21.0881,
+      "step": 153
+    },
+    {
+      "epoch": 0.01544943820224719,
+      "grad_norm": 0.5560692548751831,
+      "learning_rate": 2.440421052631579e-05,
+      "loss": 20.9091,
+      "step": 154
+    },
+    {
+      "epoch": 0.015549759229534511,
+      "grad_norm": 0.5668249130249023,
+      "learning_rate": 2.3873684210526313e-05,
+      "loss": 20.9834,
+      "step": 155
+    },
+    {
+      "epoch": 0.01565008025682183,
+      "grad_norm": 0.572685718536377,
+      "learning_rate": 2.3343157894736843e-05,
+      "loss": 21.105,
+      "step": 156
+    },
+    {
+      "epoch": 0.01575040128410915,
+      "grad_norm": 0.6388788819313049,
+      "learning_rate": 2.281263157894737e-05,
+      "loss": 21.3976,
+      "step": 157
+    },
+    {
+      "epoch": 0.01585072231139647,
+      "grad_norm": 0.5789194703102112,
+      "learning_rate": 2.2282105263157892e-05,
+      "loss": 21.18,
+      "step": 158
+    },
+    {
+      "epoch": 0.015951043338683787,
+      "grad_norm": 0.7987357974052429,
+      "learning_rate": 2.175157894736842e-05,
+      "loss": 20.9966,
+      "step": 159
+    },
+    {
+      "epoch": 0.016051364365971106,
+      "grad_norm": 0.6647619009017944,
+      "learning_rate": 2.1221052631578944e-05,
+      "loss": 21.1758,
+      "step": 160
+    },
+    {
+      "epoch": 0.016151685393258428,
+      "grad_norm": 0.49766674637794495,
+      "learning_rate": 2.0690526315789474e-05,
+      "loss": 21.1863,
+      "step": 161
+    },
+    {
+      "epoch": 0.016252006420545747,
+      "grad_norm": 0.4853838384151459,
+      "learning_rate": 2.016e-05,
+      "loss": 21.1732,
+      "step": 162
+    },
+    {
+      "epoch": 0.016352327447833066,
+      "grad_norm": 0.5139999985694885,
+      "learning_rate": 1.9629473684210526e-05,
+      "loss": 20.9764,
+      "step": 163
+    },
+    {
+      "epoch": 0.016452648475120384,
+      "grad_norm": 0.658938467502594,
+      "learning_rate": 1.9098947368421053e-05,
+      "loss": 21.0441,
+      "step": 164
+    },
+    {
+      "epoch": 0.016552969502407703,
+      "grad_norm": 0.4852856397628784,
+      "learning_rate": 1.856842105263158e-05,
+      "loss": 21.0486,
+      "step": 165
+    },
+    {
+      "epoch": 0.016653290529695025,
+      "grad_norm": 0.5753272175788879,
+      "learning_rate": 1.8037894736842105e-05,
+      "loss": 21.2886,
+      "step": 166
+    },
+    {
+      "epoch": 0.016753611556982344,
+      "grad_norm": 0.523672342300415,
+      "learning_rate": 1.750736842105263e-05,
+      "loss": 21.1615,
+      "step": 167
+    },
+    {
+      "epoch": 0.016853932584269662,
+      "grad_norm": 0.5252603888511658,
+      "learning_rate": 1.6976842105263157e-05,
+      "loss": 21.2048,
+      "step": 168
+    },
+    {
+      "epoch": 0.01695425361155698,
+      "grad_norm": 0.5506589412689209,
+      "learning_rate": 1.6446315789473684e-05,
+      "loss": 21.2261,
+      "step": 169
+    },
+    {
+      "epoch": 0.017054574638844303,
+      "grad_norm": 0.7821411490440369,
+      "learning_rate": 1.591578947368421e-05,
+      "loss": 20.8842,
+      "step": 170
+    },
+    {
+      "epoch": 0.017154895666131622,
+      "grad_norm": 0.47702276706695557,
+      "learning_rate": 1.5385263157894736e-05,
+      "loss": 21.1209,
+      "step": 171
+    },
+    {
+      "epoch": 0.01725521669341894,
+      "grad_norm": 0.635858416557312,
+      "learning_rate": 1.485473684210526e-05,
+      "loss": 21.0996,
+      "step": 172
+    },
+    {
+      "epoch": 0.01735553772070626,
+      "grad_norm": 0.6043983697891235,
+      "learning_rate": 1.4324210526315789e-05,
+      "loss": 21.1075,
+      "step": 173
+    },
+    {
+      "epoch": 0.017455858747993578,
+      "grad_norm": 0.614186704158783,
+      "learning_rate": 1.3793684210526316e-05,
+      "loss": 20.9789,
+      "step": 174
+    },
+    {
+      "epoch": 0.0175561797752809,
+      "grad_norm": 0.5823302865028381,
+      "learning_rate": 1.3263157894736841e-05,
+      "loss": 21.2963,
+      "step": 175
+    },
+    {
+      "epoch": 0.01765650080256822,
+      "grad_norm": 0.5777328014373779,
+      "learning_rate": 1.2732631578947369e-05,
+      "loss": 21.2055,
+      "step": 176
+    },
+    {
+      "epoch": 0.017756821829855537,
+      "grad_norm": 0.5664768218994141,
+      "learning_rate": 1.2202105263157895e-05,
+      "loss": 20.9908,
+      "step": 177
+    },
+    {
+      "epoch": 0.017857142857142856,
+      "grad_norm": 0.5607063174247742,
+      "learning_rate": 1.1671578947368421e-05,
+      "loss": 21.2709,
+      "step": 178
+    },
+    {
+      "epoch": 0.01795746388443018,
+      "grad_norm": 0.5337129235267639,
+      "learning_rate": 1.1141052631578946e-05,
+      "loss": 21.0117,
+      "step": 179
+    },
+    {
+      "epoch": 0.018057784911717497,
+      "grad_norm": 0.46326208114624023,
+      "learning_rate": 1.0610526315789472e-05,
+      "loss": 21.2222,
+      "step": 180
+    },
+    {
+      "epoch": 0.018158105939004816,
+      "grad_norm": 0.5655015110969543,
+      "learning_rate": 1.008e-05,
+      "loss": 21.186,
+      "step": 181
+    },
+    {
+      "epoch": 0.018258426966292134,
+      "grad_norm": 0.7058250308036804,
+      "learning_rate": 9.549473684210526e-06,
+      "loss": 20.96,
+      "step": 182
+    },
+    {
+      "epoch": 0.018358747993579453,
+      "grad_norm": 0.6302198767662048,
+      "learning_rate": 9.018947368421052e-06,
+      "loss": 20.9938,
+      "step": 183
+    },
+    {
+      "epoch": 0.018459069020866775,
+      "grad_norm": 0.4981619715690613,
+      "learning_rate": 8.488421052631579e-06,
+      "loss": 21.084,
+      "step": 184
+    },
+    {
+      "epoch": 0.018559390048154094,
+      "grad_norm": 0.464811235666275,
+      "learning_rate": 7.957894736842105e-06,
+      "loss": 21.0503,
+      "step": 185
+    },
+    {
+      "epoch": 0.018659711075441412,
+      "grad_norm": 0.6704608798027039,
+      "learning_rate": 7.42736842105263e-06,
+      "loss": 21.4066,
+      "step": 186
+    },
+    {
+      "epoch": 0.01876003210272873,
+      "grad_norm": 0.6225428581237793,
+      "learning_rate": 6.896842105263158e-06,
+      "loss": 21.4143,
+      "step": 187
+    },
+    {
+      "epoch": 0.01886035313001605,
+      "grad_norm": 0.5495597124099731,
+      "learning_rate": 6.3663157894736845e-06,
+      "loss": 21.1925,
+      "step": 188
+    },
+    {
+      "epoch": 0.018960674157303372,
+      "grad_norm": 0.5154562592506409,
+      "learning_rate": 5.835789473684211e-06,
+      "loss": 21.2569,
+      "step": 189
+    },
+    {
+      "epoch": 0.01906099518459069,
+      "grad_norm": 0.5219792127609253,
+      "learning_rate": 5.305263157894736e-06,
+      "loss": 21.0913,
+      "step": 190
+    },
+    {
+      "epoch": 0.01916131621187801,
+      "grad_norm": 0.6066550016403198,
+      "learning_rate": 4.774736842105263e-06,
+      "loss": 21.0564,
+      "step": 191
+    },
+    {
+      "epoch": 0.019261637239165328,
+      "grad_norm": 0.6340304613113403,
+      "learning_rate": 4.244210526315789e-06,
+      "loss": 21.014,
+      "step": 192
+    },
+    {
+      "epoch": 0.01936195826645265,
+      "grad_norm": 0.47706007957458496,
+      "learning_rate": 3.713684210526315e-06,
+      "loss": 21.0343,
+      "step": 193
+    },
+    {
+      "epoch": 0.01946227929373997,
+      "grad_norm": 0.7116408348083496,
+      "learning_rate": 3.1831578947368422e-06,
+      "loss": 20.94,
+      "step": 194
+    },
+    {
+      "epoch": 0.019562600321027288,
+      "grad_norm": 0.5336193442344666,
+      "learning_rate": 2.652631578947368e-06,
+      "loss": 21.4013,
+      "step": 195
+    },
+    {
+      "epoch": 0.019662921348314606,
+      "grad_norm": 0.6600590348243713,
+      "learning_rate": 2.1221052631578947e-06,
+      "loss": 21.1013,
+      "step": 196
+    },
+    {
+      "epoch": 0.019763242375601925,
+      "grad_norm": 0.5917801856994629,
+      "learning_rate": 1.5915789473684211e-06,
+      "loss": 21.0708,
+      "step": 197
+    },
+    {
+      "epoch": 0.019863563402889247,
+      "grad_norm": 0.6681122183799744,
+      "learning_rate": 1.0610526315789473e-06,
+      "loss": 21.1159,
+      "step": 198
+    },
+    {
+      "epoch": 0.019963884430176566,
+      "grad_norm": 0.6584490537643433,
+      "learning_rate": 5.305263157894737e-07,
+      "loss": 21.0919,
+      "step": 199
+    },
+    {
+      "epoch": 0.020064205457463884,
+      "grad_norm": 0.6781143546104431,
+      "learning_rate": 0.0,
+      "loss": 21.4708,
+      "step": 200
+    },
+    {
+      "epoch": 0.020064205457463884,
+      "eval_loss": 10.556661605834961,
+      "eval_runtime": 70.5615,
+      "eval_samples_per_second": 59.494,
+      "eval_steps_per_second": 14.881,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 11943700070400.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null