Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a6b23489f8a720cc99e9cd000c795de0c1dd0fde8acdbaa15446f3e973eaa1e0
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:c93984d28c4a30b2a4ea5490d7bb7dd5b2ea6edb1fd13b1559a332c60f99a7dc
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7fc6f8fa625078417a9080893827f5159aa356af906eaf09ef5a95526ae43f70
 size 325339796

 version https://git-lfs.github.com/spec/v1
+oid sha256:06dcdaaee1ef9a675b005fa577ab41dedf3aba179f1b06f144edd21328b76b91
 size 325339796

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea32a3deb21f59550ff545058e6c052cfd41665e6eb5122140e0848d1e1981f0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e1bc1eda3a83805b6948a665f2d0d43d38f9319042ad9a85e1c971894802b677
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8685a85e01d0081c4ee6b3d27083bc45de61653fc346f2b531f3e09e6eff0d83
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f96196bd3544de2c28f6af356470f327df948539b0e3259c46b8a6786b633fd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.309267520904541,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.009624639076034648,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 9.567,
       "eval_steps_per_second": 2.392,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.116529964220416e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.285676956176758,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.012832852101379532,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.567,
       "eval_steps_per_second": 2.392,
       "step": 150
+    },
+    {
+      "epoch": 0.009688803336541546,
+      "grad_norm": 0.5816793441772461,
+      "learning_rate": 2.599578947368421e-05,
+      "loss": 2.272,
+      "step": 151
+    },
+    {
+      "epoch": 0.009752967597048443,
+      "grad_norm": 0.6882290244102478,
+      "learning_rate": 2.5465263157894738e-05,
+      "loss": 2.5153,
+      "step": 152
+    },
+    {
+      "epoch": 0.009817131857555342,
+      "grad_norm": 0.6470658779144287,
+      "learning_rate": 2.493473684210526e-05,
+      "loss": 2.5716,
+      "step": 153
+    },
+    {
+      "epoch": 0.00988129611806224,
+      "grad_norm": 0.6339399218559265,
+      "learning_rate": 2.440421052631579e-05,
+      "loss": 2.4833,
+      "step": 154
+    },
+    {
+      "epoch": 0.009945460378569138,
+      "grad_norm": 0.6225025057792664,
+      "learning_rate": 2.3873684210526313e-05,
+      "loss": 2.4854,
+      "step": 155
+    },
+    {
+      "epoch": 0.010009624639076035,
+      "grad_norm": 0.5143991112709045,
+      "learning_rate": 2.3343157894736843e-05,
+      "loss": 2.1436,
+      "step": 156
+    },
+    {
+      "epoch": 0.010073788899582933,
+      "grad_norm": 0.49274468421936035,
+      "learning_rate": 2.281263157894737e-05,
+      "loss": 2.2916,
+      "step": 157
+    },
+    {
+      "epoch": 0.01013795316008983,
+      "grad_norm": 0.5323902368545532,
+      "learning_rate": 2.2282105263157892e-05,
+      "loss": 2.3657,
+      "step": 158
+    },
+    {
+      "epoch": 0.010202117420596728,
+      "grad_norm": 0.5345289707183838,
+      "learning_rate": 2.175157894736842e-05,
+      "loss": 2.4812,
+      "step": 159
+    },
+    {
+      "epoch": 0.010266281681103625,
+      "grad_norm": 0.5379322171211243,
+      "learning_rate": 2.1221052631578944e-05,
+      "loss": 2.2686,
+      "step": 160
+    },
+    {
+      "epoch": 0.010330445941610523,
+      "grad_norm": 0.604354202747345,
+      "learning_rate": 2.0690526315789474e-05,
+      "loss": 2.2495,
+      "step": 161
+    },
+    {
+      "epoch": 0.01039461020211742,
+      "grad_norm": 0.6500736474990845,
+      "learning_rate": 2.016e-05,
+      "loss": 2.4631,
+      "step": 162
+    },
+    {
+      "epoch": 0.010458774462624318,
+      "grad_norm": 0.49164876341819763,
+      "learning_rate": 1.9629473684210526e-05,
+      "loss": 2.2647,
+      "step": 163
+    },
+    {
+      "epoch": 0.010522938723131215,
+      "grad_norm": 0.594844400882721,
+      "learning_rate": 1.9098947368421053e-05,
+      "loss": 2.2196,
+      "step": 164
+    },
+    {
+      "epoch": 0.010587102983638113,
+      "grad_norm": 0.6481039524078369,
+      "learning_rate": 1.856842105263158e-05,
+      "loss": 2.3019,
+      "step": 165
+    },
+    {
+      "epoch": 0.01065126724414501,
+      "grad_norm": 0.5919328927993774,
+      "learning_rate": 1.8037894736842105e-05,
+      "loss": 2.311,
+      "step": 166
+    },
+    {
+      "epoch": 0.010715431504651908,
+      "grad_norm": 0.7396532893180847,
+      "learning_rate": 1.750736842105263e-05,
+      "loss": 2.3443,
+      "step": 167
+    },
+    {
+      "epoch": 0.010779595765158807,
+      "grad_norm": 0.685646116733551,
+      "learning_rate": 1.6976842105263157e-05,
+      "loss": 2.2547,
+      "step": 168
+    },
+    {
+      "epoch": 0.010843760025665705,
+      "grad_norm": 0.6162258386611938,
+      "learning_rate": 1.6446315789473684e-05,
+      "loss": 2.4626,
+      "step": 169
+    },
+    {
+      "epoch": 0.010907924286172602,
+      "grad_norm": 0.6352111101150513,
+      "learning_rate": 1.591578947368421e-05,
+      "loss": 2.1036,
+      "step": 170
+    },
+    {
+      "epoch": 0.0109720885466795,
+      "grad_norm": 0.7053295969963074,
+      "learning_rate": 1.5385263157894736e-05,
+      "loss": 2.3642,
+      "step": 171
+    },
+    {
+      "epoch": 0.011036252807186397,
+      "grad_norm": 0.7251001000404358,
+      "learning_rate": 1.485473684210526e-05,
+      "loss": 2.1675,
+      "step": 172
+    },
+    {
+      "epoch": 0.011100417067693295,
+      "grad_norm": 0.7106326818466187,
+      "learning_rate": 1.4324210526315789e-05,
+      "loss": 2.3521,
+      "step": 173
+    },
+    {
+      "epoch": 0.011164581328200193,
+      "grad_norm": 0.7603542804718018,
+      "learning_rate": 1.3793684210526316e-05,
+      "loss": 2.3575,
+      "step": 174
+    },
+    {
+      "epoch": 0.01122874558870709,
+      "grad_norm": 0.8023301362991333,
+      "learning_rate": 1.3263157894736841e-05,
+      "loss": 2.5608,
+      "step": 175
+    },
+    {
+      "epoch": 0.011292909849213988,
+      "grad_norm": 0.7718851566314697,
+      "learning_rate": 1.2732631578947369e-05,
+      "loss": 2.3804,
+      "step": 176
+    },
+    {
+      "epoch": 0.011357074109720885,
+      "grad_norm": 0.7355964779853821,
+      "learning_rate": 1.2202105263157895e-05,
+      "loss": 2.405,
+      "step": 177
+    },
+    {
+      "epoch": 0.011421238370227783,
+      "grad_norm": 0.7612870335578918,
+      "learning_rate": 1.1671578947368421e-05,
+      "loss": 2.4048,
+      "step": 178
+    },
+    {
+      "epoch": 0.01148540263073468,
+      "grad_norm": 0.8487880229949951,
+      "learning_rate": 1.1141052631578946e-05,
+      "loss": 2.4957,
+      "step": 179
+    },
+    {
+      "epoch": 0.011549566891241578,
+      "grad_norm": 0.8127219080924988,
+      "learning_rate": 1.0610526315789472e-05,
+      "loss": 2.0812,
+      "step": 180
+    },
+    {
+      "epoch": 0.011613731151748475,
+      "grad_norm": 0.8771964907646179,
+      "learning_rate": 1.008e-05,
+      "loss": 2.2291,
+      "step": 181
+    },
+    {
+      "epoch": 0.011677895412255375,
+      "grad_norm": 0.8430835604667664,
+      "learning_rate": 9.549473684210526e-06,
+      "loss": 2.311,
+      "step": 182
+    },
+    {
+      "epoch": 0.011742059672762272,
+      "grad_norm": 0.8586536645889282,
+      "learning_rate": 9.018947368421052e-06,
+      "loss": 2.2969,
+      "step": 183
+    },
+    {
+      "epoch": 0.01180622393326917,
+      "grad_norm": 0.8553078770637512,
+      "learning_rate": 8.488421052631579e-06,
+      "loss": 2.1949,
+      "step": 184
+    },
+    {
+      "epoch": 0.011870388193776067,
+      "grad_norm": 0.9597827792167664,
+      "learning_rate": 7.957894736842105e-06,
+      "loss": 2.5663,
+      "step": 185
+    },
+    {
+      "epoch": 0.011934552454282965,
+      "grad_norm": 0.882953941822052,
+      "learning_rate": 7.42736842105263e-06,
+      "loss": 2.3545,
+      "step": 186
+    },
+    {
+      "epoch": 0.011998716714789862,
+      "grad_norm": 0.9397551417350769,
+      "learning_rate": 6.896842105263158e-06,
+      "loss": 2.2551,
+      "step": 187
+    },
+    {
+      "epoch": 0.01206288097529676,
+      "grad_norm": 0.8707554340362549,
+      "learning_rate": 6.3663157894736845e-06,
+      "loss": 2.3134,
+      "step": 188
+    },
+    {
+      "epoch": 0.012127045235803657,
+      "grad_norm": 0.8545828461647034,
+      "learning_rate": 5.835789473684211e-06,
+      "loss": 2.1863,
+      "step": 189
+    },
+    {
+      "epoch": 0.012191209496310555,
+      "grad_norm": 0.9791707396507263,
+      "learning_rate": 5.305263157894736e-06,
+      "loss": 2.1821,
+      "step": 190
+    },
+    {
+      "epoch": 0.012255373756817452,
+      "grad_norm": 1.0541386604309082,
+      "learning_rate": 4.774736842105263e-06,
+      "loss": 2.4222,
+      "step": 191
+    },
+    {
+      "epoch": 0.01231953801732435,
+      "grad_norm": 0.9878808259963989,
+      "learning_rate": 4.244210526315789e-06,
+      "loss": 2.1891,
+      "step": 192
+    },
+    {
+      "epoch": 0.012383702277831247,
+      "grad_norm": 0.9502939581871033,
+      "learning_rate": 3.713684210526315e-06,
+      "loss": 2.1575,
+      "step": 193
+    },
+    {
+      "epoch": 0.012447866538338145,
+      "grad_norm": 1.0325045585632324,
+      "learning_rate": 3.1831578947368422e-06,
+      "loss": 2.3336,
+      "step": 194
+    },
+    {
+      "epoch": 0.012512030798845043,
+      "grad_norm": 1.0161420106887817,
+      "learning_rate": 2.652631578947368e-06,
+      "loss": 2.3017,
+      "step": 195
+    },
+    {
+      "epoch": 0.01257619505935194,
+      "grad_norm": 1.029266357421875,
+      "learning_rate": 2.1221052631578947e-06,
+      "loss": 2.0349,
+      "step": 196
+    },
+    {
+      "epoch": 0.01264035931985884,
+      "grad_norm": 1.1255306005477905,
+      "learning_rate": 1.5915789473684211e-06,
+      "loss": 1.9766,
+      "step": 197
+    },
+    {
+      "epoch": 0.012704523580365737,
+      "grad_norm": 1.2201471328735352,
+      "learning_rate": 1.0610526315789473e-06,
+      "loss": 2.3093,
+      "step": 198
+    },
+    {
+      "epoch": 0.012768687840872634,
+      "grad_norm": 1.1325277090072632,
+      "learning_rate": 5.305263157894737e-07,
+      "loss": 2.373,
+      "step": 199
+    },
+    {
+      "epoch": 0.012832852101379532,
+      "grad_norm": 1.2911548614501953,
+      "learning_rate": 0.0,
+      "loss": 1.9981,
+      "step": 200
+    },
+    {
+      "epoch": 0.012832852101379532,
+      "eval_loss": 2.285676956176758,
+      "eval_runtime": 686.344,
+      "eval_samples_per_second": 9.562,
+      "eval_steps_per_second": 2.391,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.827631788320358e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null