Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a387001c9b889d44b1216ff21cb9c14ba950fccf3df8262ca1b2008d6b951061
 size 113284112

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea082eac372fff81ac14bb4fdbac846c846ded09a07efb0ee3eda6522777e9ed
 size 113284112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c4c63196ad99c73cc6817a03760643ac2c31a7480f1eca406f6362ab6fcbd3c0
 size 57846868

 version https://git-lfs.github.com/spec/v1
+oid sha256:bcdbce684187f76d573e60d985e1e82ec35c97f99987c86317753d6f3ff7d22c
 size 57846868

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f852779be01d6f7be5abfe028624b0dade0ab0a86f6ce9a939dd35aed2273acb
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1363a368a375a84fcf4e1a2ab7099bc7f24ac0da08f6379d64fb69d0b2f211d8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:61d92ba00805f178e64a4a7fc7ebf59bf8a4f5750cc8c2818832081930f2c83e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a27b052646dcb561cbd68156c30bd466ce59bda64cf3c8eba9c3c1113af9827c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 3.273843765258789,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.20696791997240427,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 35.078,
       "eval_steps_per_second": 8.777,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.91381412937728e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.997969388961792,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.2759572266298724,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 35.078,
       "eval_steps_per_second": 8.777,
       "step": 150
+    },
+    {
+      "epoch": 0.20834770610555364,
+      "grad_norm": 3.158139228820801,
+      "learning_rate": 4.995066821070679e-05,
+      "loss": 3.6353,
+      "step": 151
+    },
+    {
+      "epoch": 0.209727492238703,
+      "grad_norm": 3.5997750759124756,
+      "learning_rate": 4.980286753286195e-05,
+      "loss": 3.5989,
+      "step": 152
+    },
+    {
+      "epoch": 0.21110727837185236,
+      "grad_norm": 3.574474334716797,
+      "learning_rate": 4.9557181268217227e-05,
+      "loss": 3.4982,
+      "step": 153
+    },
+    {
+      "epoch": 0.21248706450500174,
+      "grad_norm": 2.906959056854248,
+      "learning_rate": 4.9214579028215776e-05,
+      "loss": 3.3847,
+      "step": 154
+    },
+    {
+      "epoch": 0.21386685063815108,
+      "grad_norm": 2.554720640182495,
+      "learning_rate": 4.877641290737884e-05,
+      "loss": 3.3131,
+      "step": 155
+    },
+    {
+      "epoch": 0.21524663677130046,
+      "grad_norm": 2.099410057067871,
+      "learning_rate": 4.8244412147206284e-05,
+      "loss": 3.2781,
+      "step": 156
+    },
+    {
+      "epoch": 0.2166264229044498,
+      "grad_norm": 1.690164566040039,
+      "learning_rate": 4.762067631165049e-05,
+      "loss": 3.2736,
+      "step": 157
+    },
+    {
+      "epoch": 0.21800620903759918,
+      "grad_norm": 1.4456684589385986,
+      "learning_rate": 4.690766700109659e-05,
+      "loss": 3.1785,
+      "step": 158
+    },
+    {
+      "epoch": 0.21938599517074853,
+      "grad_norm": 1.255626916885376,
+      "learning_rate": 4.610819813755038e-05,
+      "loss": 2.9921,
+      "step": 159
+    },
+    {
+      "epoch": 0.2207657813038979,
+      "grad_norm": 1.102941870689392,
+      "learning_rate": 4.522542485937369e-05,
+      "loss": 3.2609,
+      "step": 160
+    },
+    {
+      "epoch": 0.22214556743704725,
+      "grad_norm": 1.0562005043029785,
+      "learning_rate": 4.426283106939474e-05,
+      "loss": 3.011,
+      "step": 161
+    },
+    {
+      "epoch": 0.22352535357019662,
+      "grad_norm": 1.1416147947311401,
+      "learning_rate": 4.3224215685535294e-05,
+      "loss": 2.9283,
+      "step": 162
+    },
+    {
+      "epoch": 0.22490513970334597,
+      "grad_norm": 1.1108524799346924,
+      "learning_rate": 4.211367764821722e-05,
+      "loss": 3.2749,
+      "step": 163
+    },
+    {
+      "epoch": 0.22628492583649534,
+      "grad_norm": 1.2051870822906494,
+      "learning_rate": 4.093559974371725e-05,
+      "loss": 3.0462,
+      "step": 164
+    },
+    {
+      "epoch": 0.22766471196964472,
+      "grad_norm": 1.1099305152893066,
+      "learning_rate": 3.969463130731183e-05,
+      "loss": 3.1848,
+      "step": 165
+    },
+    {
+      "epoch": 0.22904449810279406,
+      "grad_norm": 0.9574990272521973,
+      "learning_rate": 3.8395669874474915e-05,
+      "loss": 3.0329,
+      "step": 166
+    },
+    {
+      "epoch": 0.23042428423594344,
+      "grad_norm": 1.0350350141525269,
+      "learning_rate": 3.704384185254288e-05,
+      "loss": 2.9134,
+      "step": 167
+    },
+    {
+      "epoch": 0.23180407036909279,
+      "grad_norm": 0.8609461784362793,
+      "learning_rate": 3.564448228912682e-05,
+      "loss": 2.9771,
+      "step": 168
+    },
+    {
+      "epoch": 0.23318385650224216,
+      "grad_norm": 1.0324912071228027,
+      "learning_rate": 3.4203113817116957e-05,
+      "loss": 2.9351,
+      "step": 169
+    },
+    {
+      "epoch": 0.2345636426353915,
+      "grad_norm": 0.9436797499656677,
+      "learning_rate": 3.272542485937369e-05,
+      "loss": 2.9729,
+      "step": 170
+    },
+    {
+      "epoch": 0.23594342876854088,
+      "grad_norm": 0.931715726852417,
+      "learning_rate": 3.121724717912138e-05,
+      "loss": 3.2443,
+      "step": 171
+    },
+    {
+      "epoch": 0.23732321490169023,
+      "grad_norm": 0.9500223398208618,
+      "learning_rate": 2.9684532864643122e-05,
+      "loss": 2.9819,
+      "step": 172
+    },
+    {
+      "epoch": 0.2387030010348396,
+      "grad_norm": 0.875689685344696,
+      "learning_rate": 2.8133330839107608e-05,
+      "loss": 2.9361,
+      "step": 173
+    },
+    {
+      "epoch": 0.24008278716798895,
+      "grad_norm": 0.891899824142456,
+      "learning_rate": 2.656976298823284e-05,
+      "loss": 2.9722,
+      "step": 174
+    },
+    {
+      "epoch": 0.24146257330113832,
+      "grad_norm": 1.091869592666626,
+      "learning_rate": 2.5e-05,
+      "loss": 3.1387,
+      "step": 175
+    },
+    {
+      "epoch": 0.2428423594342877,
+      "grad_norm": 0.9385702013969421,
+      "learning_rate": 2.3430237011767167e-05,
+      "loss": 2.9316,
+      "step": 176
+    },
+    {
+      "epoch": 0.24422214556743704,
+      "grad_norm": 0.9347511529922485,
+      "learning_rate": 2.186666916089239e-05,
+      "loss": 2.9608,
+      "step": 177
+    },
+    {
+      "epoch": 0.24560193170058642,
+      "grad_norm": 0.9398484230041504,
+      "learning_rate": 2.031546713535688e-05,
+      "loss": 2.9103,
+      "step": 178
+    },
+    {
+      "epoch": 0.24698171783373576,
+      "grad_norm": 0.9190600514411926,
+      "learning_rate": 1.8782752820878634e-05,
+      "loss": 3.1082,
+      "step": 179
+    },
+    {
+      "epoch": 0.24836150396688514,
+      "grad_norm": 1.0726330280303955,
+      "learning_rate": 1.7274575140626318e-05,
+      "loss": 3.232,
+      "step": 180
+    },
+    {
+      "epoch": 0.24974129010003449,
+      "grad_norm": 0.964190661907196,
+      "learning_rate": 1.5796886182883053e-05,
+      "loss": 3.0588,
+      "step": 181
+    },
+    {
+      "epoch": 0.25112107623318386,
+      "grad_norm": 0.9404963850975037,
+      "learning_rate": 1.4355517710873184e-05,
+      "loss": 2.9696,
+      "step": 182
+    },
+    {
+      "epoch": 0.25250086236633323,
+      "grad_norm": 1.1561006307601929,
+      "learning_rate": 1.2956158147457115e-05,
+      "loss": 3.163,
+      "step": 183
+    },
+    {
+      "epoch": 0.25388064849948255,
+      "grad_norm": 1.0033360719680786,
+      "learning_rate": 1.1604330125525079e-05,
+      "loss": 2.8147,
+      "step": 184
+    },
+    {
+      "epoch": 0.2552604346326319,
+      "grad_norm": 1.0727239847183228,
+      "learning_rate": 1.0305368692688174e-05,
+      "loss": 3.1405,
+      "step": 185
+    },
+    {
+      "epoch": 0.2566402207657813,
+      "grad_norm": 1.106379747390747,
+      "learning_rate": 9.064400256282757e-06,
+      "loss": 3.0658,
+      "step": 186
+    },
+    {
+      "epoch": 0.2580200068989307,
+      "grad_norm": 0.97951740026474,
+      "learning_rate": 7.886322351782783e-06,
+      "loss": 3.1416,
+      "step": 187
+    },
+    {
+      "epoch": 0.25939979303208005,
+      "grad_norm": 1.2267470359802246,
+      "learning_rate": 6.775784314464717e-06,
+      "loss": 3.0927,
+      "step": 188
+    },
+    {
+      "epoch": 0.26077957916522937,
+      "grad_norm": 1.2311291694641113,
+      "learning_rate": 5.737168930605272e-06,
+      "loss": 2.9075,
+      "step": 189
+    },
+    {
+      "epoch": 0.26215936529837874,
+      "grad_norm": 1.1488878726959229,
+      "learning_rate": 4.7745751406263165e-06,
+      "loss": 3.0904,
+      "step": 190
+    },
+    {
+      "epoch": 0.2635391514315281,
+      "grad_norm": 1.127191424369812,
+      "learning_rate": 3.891801862449629e-06,
+      "loss": 3.0047,
+      "step": 191
+    },
+    {
+      "epoch": 0.2649189375646775,
+      "grad_norm": 1.446352481842041,
+      "learning_rate": 3.092332998903416e-06,
+      "loss": 2.971,
+      "step": 192
+    },
+    {
+      "epoch": 0.2662987236978268,
+      "grad_norm": 1.3929004669189453,
+      "learning_rate": 2.379323688349516e-06,
+      "loss": 3.026,
+      "step": 193
+    },
+    {
+      "epoch": 0.2676785098309762,
+      "grad_norm": 1.512418508529663,
+      "learning_rate": 1.7555878527937164e-06,
+      "loss": 2.9506,
+      "step": 194
+    },
+    {
+      "epoch": 0.26905829596412556,
+      "grad_norm": 1.65766441822052,
+      "learning_rate": 1.2235870926211619e-06,
+      "loss": 2.9575,
+      "step": 195
+    },
+    {
+      "epoch": 0.27043808209727493,
+      "grad_norm": 1.5741119384765625,
+      "learning_rate": 7.854209717842231e-07,
+      "loss": 2.9769,
+      "step": 196
+    },
+    {
+      "epoch": 0.2718178682304243,
+      "grad_norm": 1.678270697593689,
+      "learning_rate": 4.4281873178278475e-07,
+      "loss": 3.0054,
+      "step": 197
+    },
+    {
+      "epoch": 0.2731976543635736,
+      "grad_norm": 1.7038488388061523,
+      "learning_rate": 1.9713246713805588e-07,
+      "loss": 3.0776,
+      "step": 198
+    },
+    {
+      "epoch": 0.274577440496723,
+      "grad_norm": 2.050671100616455,
+      "learning_rate": 4.9331789293211026e-08,
+      "loss": 3.1415,
+      "step": 199
+    },
+    {
+      "epoch": 0.2759572266298724,
+      "grad_norm": 2.6361677646636963,
+      "learning_rate": 0.0,
+      "loss": 3.1783,
+      "step": 200
+    },
+    {
+      "epoch": 0.2759572266298724,
+      "eval_loss": 2.997969388961792,
+      "eval_runtime": 69.6597,
+      "eval_samples_per_second": 35.056,
+      "eval_steps_per_second": 8.771,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.055175217250304e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null