Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:107069d7ce7050d42b71037b4ead08abef1791b0af2fc56a3fd819dbc3f25b48
 size 522227376

 version https://git-lfs.github.com/spec/v1
+oid sha256:db6527b92998eb4d97dd8557188c97c3bc38aaaf6b2fb98c8ae47b4010f9044d
 size 522227376

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a73bf606a6750dd3402855c0e08a9600800f2b1e6b7d366277b828543b6aa2b1
 size 265476436

 version https://git-lfs.github.com/spec/v1
+oid sha256:e8af17623d3dd2edd153ed0eb4854180aa2d9a74c124355e2f897be02df9df7b
 size 265476436

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c7248cc1865ade94bf3820a52ae78cc3da5a761ebb740767cb5b415b615a30c7
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:94778b2c0a4fafa19b9b9664a360321d3d7bcf837bddaa94e0390eb59a308100
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ef91cf09f7b6a58a39bbfe1ef78ac2fa91c0c15ca1705097a187d272d0433d8d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c47edf82128c3f034f21204d4b9ce5c76cd3269748ce31061b148a0a389d049d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.9038586616516113,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.012642757806902946,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 9.995,
       "eval_steps_per_second": 2.499,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.9816530321408e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.867233395576477,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.016857010409203926,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.995,
       "eval_steps_per_second": 2.499,
       "step": 150
+    },
+    {
+      "epoch": 0.012727042858948965,
+      "grad_norm": 1.444331407546997,
+      "learning_rate": 2.5944210526315793e-05,
+      "loss": 3.8494,
+      "step": 151
+    },
+    {
+      "epoch": 0.012811327910994985,
+      "grad_norm": 1.4190081357955933,
+      "learning_rate": 2.5414736842105266e-05,
+      "loss": 3.6972,
+      "step": 152
+    },
+    {
+      "epoch": 0.012895612963041005,
+      "grad_norm": 1.3863047361373901,
+      "learning_rate": 2.4885263157894737e-05,
+      "loss": 3.593,
+      "step": 153
+    },
+    {
+      "epoch": 0.012979898015087024,
+      "grad_norm": 1.3513926267623901,
+      "learning_rate": 2.4355789473684214e-05,
+      "loss": 3.436,
+      "step": 154
+    },
+    {
+      "epoch": 0.013064183067133044,
+      "grad_norm": 1.2380362749099731,
+      "learning_rate": 2.3826315789473684e-05,
+      "loss": 3.2587,
+      "step": 155
+    },
+    {
+      "epoch": 0.013148468119179064,
+      "grad_norm": 1.4090365171432495,
+      "learning_rate": 2.3296842105263158e-05,
+      "loss": 3.367,
+      "step": 156
+    },
+    {
+      "epoch": 0.013232753171225083,
+      "grad_norm": 1.2374507188796997,
+      "learning_rate": 2.2767368421052635e-05,
+      "loss": 3.4684,
+      "step": 157
+    },
+    {
+      "epoch": 0.013317038223271103,
+      "grad_norm": 1.3896780014038086,
+      "learning_rate": 2.2237894736842105e-05,
+      "loss": 3.9113,
+      "step": 158
+    },
+    {
+      "epoch": 0.013401323275317123,
+      "grad_norm": 1.332416296005249,
+      "learning_rate": 2.170842105263158e-05,
+      "loss": 3.7664,
+      "step": 159
+    },
+    {
+      "epoch": 0.013485608327363141,
+      "grad_norm": 1.4905543327331543,
+      "learning_rate": 2.1178947368421053e-05,
+      "loss": 4.2563,
+      "step": 160
+    },
+    {
+      "epoch": 0.013569893379409162,
+      "grad_norm": 1.4605516195297241,
+      "learning_rate": 2.0649473684210527e-05,
+      "loss": 3.8242,
+      "step": 161
+    },
+    {
+      "epoch": 0.013654178431455182,
+      "grad_norm": 1.4748574495315552,
+      "learning_rate": 2.0120000000000004e-05,
+      "loss": 4.0375,
+      "step": 162
+    },
+    {
+      "epoch": 0.0137384634835012,
+      "grad_norm": 1.5476000308990479,
+      "learning_rate": 1.9590526315789474e-05,
+      "loss": 3.7669,
+      "step": 163
+    },
+    {
+      "epoch": 0.01382274853554722,
+      "grad_norm": 1.3919553756713867,
+      "learning_rate": 1.9061052631578948e-05,
+      "loss": 3.3237,
+      "step": 164
+    },
+    {
+      "epoch": 0.01390703358759324,
+      "grad_norm": 1.464743971824646,
+      "learning_rate": 1.8531578947368422e-05,
+      "loss": 3.8191,
+      "step": 165
+    },
+    {
+      "epoch": 0.01399131863963926,
+      "grad_norm": 1.5848177671432495,
+      "learning_rate": 1.8002105263157896e-05,
+      "loss": 3.9749,
+      "step": 166
+    },
+    {
+      "epoch": 0.01407560369168528,
+      "grad_norm": 1.6681814193725586,
+      "learning_rate": 1.747263157894737e-05,
+      "loss": 4.2865,
+      "step": 167
+    },
+    {
+      "epoch": 0.0141598887437313,
+      "grad_norm": 1.6060197353363037,
+      "learning_rate": 1.6943157894736843e-05,
+      "loss": 3.3702,
+      "step": 168
+    },
+    {
+      "epoch": 0.01424417379577732,
+      "grad_norm": 1.5922913551330566,
+      "learning_rate": 1.6413684210526317e-05,
+      "loss": 3.9925,
+      "step": 169
+    },
+    {
+      "epoch": 0.014328458847823338,
+      "grad_norm": 1.6466814279556274,
+      "learning_rate": 1.588421052631579e-05,
+      "loss": 3.4887,
+      "step": 170
+    },
+    {
+      "epoch": 0.014412743899869358,
+      "grad_norm": 1.7543516159057617,
+      "learning_rate": 1.5354736842105264e-05,
+      "loss": 4.0886,
+      "step": 171
+    },
+    {
+      "epoch": 0.014497028951915378,
+      "grad_norm": 1.7576175928115845,
+      "learning_rate": 1.4825263157894736e-05,
+      "loss": 3.769,
+      "step": 172
+    },
+    {
+      "epoch": 0.014581314003961397,
+      "grad_norm": 1.6161974668502808,
+      "learning_rate": 1.4295789473684212e-05,
+      "loss": 3.7848,
+      "step": 173
+    },
+    {
+      "epoch": 0.014665599056007417,
+      "grad_norm": 1.7880433797836304,
+      "learning_rate": 1.3766315789473686e-05,
+      "loss": 3.5251,
+      "step": 174
+    },
+    {
+      "epoch": 0.014749884108053437,
+      "grad_norm": 1.7841137647628784,
+      "learning_rate": 1.3236842105263158e-05,
+      "loss": 3.9346,
+      "step": 175
+    },
+    {
+      "epoch": 0.014834169160099456,
+      "grad_norm": 1.919546365737915,
+      "learning_rate": 1.2707368421052633e-05,
+      "loss": 4.1778,
+      "step": 176
+    },
+    {
+      "epoch": 0.014918454212145476,
+      "grad_norm": 1.8676812648773193,
+      "learning_rate": 1.2177894736842107e-05,
+      "loss": 3.8695,
+      "step": 177
+    },
+    {
+      "epoch": 0.015002739264191496,
+      "grad_norm": 1.832812786102295,
+      "learning_rate": 1.1648421052631579e-05,
+      "loss": 4.0394,
+      "step": 178
+    },
+    {
+      "epoch": 0.015087024316237515,
+      "grad_norm": 2.0502195358276367,
+      "learning_rate": 1.1118947368421053e-05,
+      "loss": 3.9268,
+      "step": 179
+    },
+    {
+      "epoch": 0.015171309368283535,
+      "grad_norm": 2.0106148719787598,
+      "learning_rate": 1.0589473684210526e-05,
+      "loss": 4.2192,
+      "step": 180
+    },
+    {
+      "epoch": 0.015255594420329555,
+      "grad_norm": 2.064776659011841,
+      "learning_rate": 1.0060000000000002e-05,
+      "loss": 3.9229,
+      "step": 181
+    },
+    {
+      "epoch": 0.015339879472375573,
+      "grad_norm": 2.251138687133789,
+      "learning_rate": 9.530526315789474e-06,
+      "loss": 3.9887,
+      "step": 182
+    },
+    {
+      "epoch": 0.015424164524421594,
+      "grad_norm": 2.255735397338867,
+      "learning_rate": 9.001052631578948e-06,
+      "loss": 4.02,
+      "step": 183
+    },
+    {
+      "epoch": 0.015508449576467614,
+      "grad_norm": 2.071152687072754,
+      "learning_rate": 8.471578947368422e-06,
+      "loss": 3.7057,
+      "step": 184
+    },
+    {
+      "epoch": 0.015592734628513634,
+      "grad_norm": 2.0596232414245605,
+      "learning_rate": 7.942105263157895e-06,
+      "loss": 3.7983,
+      "step": 185
+    },
+    {
+      "epoch": 0.015677019680559654,
+      "grad_norm": 2.190840721130371,
+      "learning_rate": 7.412631578947368e-06,
+      "loss": 3.9793,
+      "step": 186
+    },
+    {
+      "epoch": 0.01576130473260567,
+      "grad_norm": 2.3367197513580322,
+      "learning_rate": 6.883157894736843e-06,
+      "loss": 4.3535,
+      "step": 187
+    },
+    {
+      "epoch": 0.01584558978465169,
+      "grad_norm": 2.380443811416626,
+      "learning_rate": 6.3536842105263166e-06,
+      "loss": 3.8499,
+      "step": 188
+    },
+    {
+      "epoch": 0.01592987483669771,
+      "grad_norm": 2.041156530380249,
+      "learning_rate": 5.8242105263157895e-06,
+      "loss": 3.6364,
+      "step": 189
+    },
+    {
+      "epoch": 0.01601415988874373,
+      "grad_norm": 2.2828075885772705,
+      "learning_rate": 5.294736842105263e-06,
+      "loss": 4.0151,
+      "step": 190
+    },
+    {
+      "epoch": 0.01609844494078975,
+      "grad_norm": 2.1917965412139893,
+      "learning_rate": 4.765263157894737e-06,
+      "loss": 3.9636,
+      "step": 191
+    },
+    {
+      "epoch": 0.016182729992835772,
+      "grad_norm": 2.317061424255371,
+      "learning_rate": 4.235789473684211e-06,
+      "loss": 3.4895,
+      "step": 192
+    },
+    {
+      "epoch": 0.01626701504488179,
+      "grad_norm": 2.4780666828155518,
+      "learning_rate": 3.706315789473684e-06,
+      "loss": 3.2654,
+      "step": 193
+    },
+    {
+      "epoch": 0.01635130009692781,
+      "grad_norm": 2.7359607219696045,
+      "learning_rate": 3.1768421052631583e-06,
+      "loss": 3.6544,
+      "step": 194
+    },
+    {
+      "epoch": 0.01643558514897383,
+      "grad_norm": 2.5973706245422363,
+      "learning_rate": 2.6473684210526316e-06,
+      "loss": 3.615,
+      "step": 195
+    },
+    {
+      "epoch": 0.01651987020101985,
+      "grad_norm": 2.7825827598571777,
+      "learning_rate": 2.1178947368421054e-06,
+      "loss": 3.2319,
+      "step": 196
+    },
+    {
+      "epoch": 0.01660415525306587,
+      "grad_norm": 2.764317750930786,
+      "learning_rate": 1.5884210526315791e-06,
+      "loss": 3.2517,
+      "step": 197
+    },
+    {
+      "epoch": 0.01668844030511189,
+      "grad_norm": 2.831258535385132,
+      "learning_rate": 1.0589473684210527e-06,
+      "loss": 2.949,
+      "step": 198
+    },
+    {
+      "epoch": 0.01677272535715791,
+      "grad_norm": 3.2517249584198,
+      "learning_rate": 5.294736842105263e-07,
+      "loss": 3.4868,
+      "step": 199
+    },
+    {
+      "epoch": 0.016857010409203926,
+      "grad_norm": 4.0189619064331055,
+      "learning_rate": 0.0,
+      "loss": 3.4463,
+      "step": 200
+    },
+    {
+      "epoch": 0.016857010409203926,
+      "eval_loss": 1.867233395576477,
+      "eval_runtime": 498.9005,
+      "eval_samples_per_second": 10.014,
+      "eval_steps_per_second": 2.504,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.6422040428544e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null