Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5bd91c363aa4904e9f319599509f3ba0e876b1a7e25fe2c7b5eef5ce6f4e1d57
 size 201353800

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c8972abe9a71ea8432999ee2059aeaefb0081c5426aae5f97d47c8db86ab930
 size 201353800

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d47e13c4e166898ff52348a45ba89871fbeaf779a58be5c0e9427c19744858da
 size 102462970

 version https://git-lfs.github.com/spec/v1
+oid sha256:938b5fa3a467b688ea7b7c5133e893c97a7ff43ee97aa57c7764d459fbe57167
 size 102462970

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0d8068427cb89df5eca9a54adfe62fd579699f10f253bc2b1ae4a40e5bbc64e5
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:bfb6c4d15023b7f825314115ec967db308a5e76c09b6d9ab6b0f43eb8cd472ab
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2d094abeb22730ed8a1a30db0af3a1ac1bd1a9d1fcc8794319a4b828d3c7261c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3709ad194ceda6ee92d3d2c8f2dd5203a40bedaa701af97b45dc02564222704e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.9060256481170654,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.01178735609602766,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 17.612,
       "eval_steps_per_second": 4.405,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 9281870600601600.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.8893836140632629,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.015716474794703547,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 17.612,
       "eval_steps_per_second": 4.405,
       "step": 150
+    },
+    {
+      "epoch": 0.011865938470001178,
+      "grad_norm": 1.8181077241897583,
+      "learning_rate": 2.597e-05,
+      "loss": 1.7493,
+      "step": 151
+    },
+    {
+      "epoch": 0.011944520843974696,
+      "grad_norm": 2.076934576034546,
+      "learning_rate": 2.544e-05,
+      "loss": 1.7957,
+      "step": 152
+    },
+    {
+      "epoch": 0.012023103217948214,
+      "grad_norm": 2.1660921573638916,
+      "learning_rate": 2.4909999999999997e-05,
+      "loss": 1.5835,
+      "step": 153
+    },
+    {
+      "epoch": 0.012101685591921731,
+      "grad_norm": 1.9632174968719482,
+      "learning_rate": 2.438e-05,
+      "loss": 1.5994,
+      "step": 154
+    },
+    {
+      "epoch": 0.012180267965895249,
+      "grad_norm": 2.900998115539551,
+      "learning_rate": 2.3849999999999997e-05,
+      "loss": 2.1993,
+      "step": 155
+    },
+    {
+      "epoch": 0.012258850339868767,
+      "grad_norm": 2.372654914855957,
+      "learning_rate": 2.3319999999999997e-05,
+      "loss": 1.7,
+      "step": 156
+    },
+    {
+      "epoch": 0.012337432713842285,
+      "grad_norm": 2.6485414505004883,
+      "learning_rate": 2.279e-05,
+      "loss": 1.5784,
+      "step": 157
+    },
+    {
+      "epoch": 0.012416015087815802,
+      "grad_norm": 2.039062738418579,
+      "learning_rate": 2.2259999999999997e-05,
+      "loss": 1.7178,
+      "step": 158
+    },
+    {
+      "epoch": 0.01249459746178932,
+      "grad_norm": 2.2866334915161133,
+      "learning_rate": 2.173e-05,
+      "loss": 1.7329,
+      "step": 159
+    },
+    {
+      "epoch": 0.012573179835762838,
+      "grad_norm": 2.146287202835083,
+      "learning_rate": 2.1199999999999997e-05,
+      "loss": 1.8941,
+      "step": 160
+    },
+    {
+      "epoch": 0.012651762209736355,
+      "grad_norm": 2.0105912685394287,
+      "learning_rate": 2.067e-05,
+      "loss": 1.446,
+      "step": 161
+    },
+    {
+      "epoch": 0.012730344583709873,
+      "grad_norm": 2.406883716583252,
+      "learning_rate": 2.014e-05,
+      "loss": 1.9801,
+      "step": 162
+    },
+    {
+      "epoch": 0.012808926957683391,
+      "grad_norm": 2.3333377838134766,
+      "learning_rate": 1.9609999999999997e-05,
+      "loss": 1.7974,
+      "step": 163
+    },
+    {
+      "epoch": 0.012887509331656909,
+      "grad_norm": 2.1827352046966553,
+      "learning_rate": 1.908e-05,
+      "loss": 1.4272,
+      "step": 164
+    },
+    {
+      "epoch": 0.012966091705630426,
+      "grad_norm": 2.277249574661255,
+      "learning_rate": 1.8549999999999997e-05,
+      "loss": 1.7542,
+      "step": 165
+    },
+    {
+      "epoch": 0.013044674079603944,
+      "grad_norm": 2.420073986053467,
+      "learning_rate": 1.802e-05,
+      "loss": 1.791,
+      "step": 166
+    },
+    {
+      "epoch": 0.013123256453577462,
+      "grad_norm": 2.2049803733825684,
+      "learning_rate": 1.749e-05,
+      "loss": 1.6978,
+      "step": 167
+    },
+    {
+      "epoch": 0.01320183882755098,
+      "grad_norm": 2.5003433227539062,
+      "learning_rate": 1.696e-05,
+      "loss": 1.7995,
+      "step": 168
+    },
+    {
+      "epoch": 0.013280421201524497,
+      "grad_norm": 2.038902997970581,
+      "learning_rate": 1.643e-05,
+      "loss": 1.8227,
+      "step": 169
+    },
+    {
+      "epoch": 0.013359003575498015,
+      "grad_norm": 2.4974002838134766,
+      "learning_rate": 1.5899999999999997e-05,
+      "loss": 1.7088,
+      "step": 170
+    },
+    {
+      "epoch": 0.013437585949471533,
+      "grad_norm": 2.4527053833007812,
+      "learning_rate": 1.537e-05,
+      "loss": 1.909,
+      "step": 171
+    },
+    {
+      "epoch": 0.01351616832344505,
+      "grad_norm": 2.499253273010254,
+      "learning_rate": 1.4839999999999999e-05,
+      "loss": 1.5983,
+      "step": 172
+    },
+    {
+      "epoch": 0.013594750697418568,
+      "grad_norm": 2.979060411453247,
+      "learning_rate": 1.4309999999999999e-05,
+      "loss": 2.1447,
+      "step": 173
+    },
+    {
+      "epoch": 0.013673333071392086,
+      "grad_norm": 2.4462897777557373,
+      "learning_rate": 1.378e-05,
+      "loss": 2.0196,
+      "step": 174
+    },
+    {
+      "epoch": 0.013751915445365604,
+      "grad_norm": 2.631182909011841,
+      "learning_rate": 1.3249999999999999e-05,
+      "loss": 1.8259,
+      "step": 175
+    },
+    {
+      "epoch": 0.013830497819339121,
+      "grad_norm": 2.4574482440948486,
+      "learning_rate": 1.272e-05,
+      "loss": 1.8608,
+      "step": 176
+    },
+    {
+      "epoch": 0.01390908019331264,
+      "grad_norm": 2.9573817253112793,
+      "learning_rate": 1.219e-05,
+      "loss": 2.0493,
+      "step": 177
+    },
+    {
+      "epoch": 0.013987662567286157,
+      "grad_norm": 2.3067586421966553,
+      "learning_rate": 1.1659999999999998e-05,
+      "loss": 1.6649,
+      "step": 178
+    },
+    {
+      "epoch": 0.014066244941259675,
+      "grad_norm": 2.3453915119171143,
+      "learning_rate": 1.1129999999999998e-05,
+      "loss": 1.8298,
+      "step": 179
+    },
+    {
+      "epoch": 0.014144827315233192,
+      "grad_norm": 2.4115869998931885,
+      "learning_rate": 1.0599999999999998e-05,
+      "loss": 1.8186,
+      "step": 180
+    },
+    {
+      "epoch": 0.01422340968920671,
+      "grad_norm": 3.03613018989563,
+      "learning_rate": 1.007e-05,
+      "loss": 1.6435,
+      "step": 181
+    },
+    {
+      "epoch": 0.014301992063180228,
+      "grad_norm": 2.7698209285736084,
+      "learning_rate": 9.54e-06,
+      "loss": 2.1196,
+      "step": 182
+    },
+    {
+      "epoch": 0.014380574437153746,
+      "grad_norm": 3.2634527683258057,
+      "learning_rate": 9.01e-06,
+      "loss": 1.914,
+      "step": 183
+    },
+    {
+      "epoch": 0.014459156811127263,
+      "grad_norm": 2.814863443374634,
+      "learning_rate": 8.48e-06,
+      "loss": 1.9775,
+      "step": 184
+    },
+    {
+      "epoch": 0.014537739185100781,
+      "grad_norm": 2.700026750564575,
+      "learning_rate": 7.949999999999998e-06,
+      "loss": 1.8949,
+      "step": 185
+    },
+    {
+      "epoch": 0.014616321559074299,
+      "grad_norm": 2.699542999267578,
+      "learning_rate": 7.419999999999999e-06,
+      "loss": 1.9938,
+      "step": 186
+    },
+    {
+      "epoch": 0.014694903933047817,
+      "grad_norm": 2.8317461013793945,
+      "learning_rate": 6.89e-06,
+      "loss": 1.9413,
+      "step": 187
+    },
+    {
+      "epoch": 0.014773486307021334,
+      "grad_norm": 2.5641987323760986,
+      "learning_rate": 6.36e-06,
+      "loss": 1.7367,
+      "step": 188
+    },
+    {
+      "epoch": 0.014852068680994852,
+      "grad_norm": 2.798137903213501,
+      "learning_rate": 5.829999999999999e-06,
+      "loss": 1.896,
+      "step": 189
+    },
+    {
+      "epoch": 0.01493065105496837,
+      "grad_norm": 2.7922170162200928,
+      "learning_rate": 5.299999999999999e-06,
+      "loss": 1.9496,
+      "step": 190
+    },
+    {
+      "epoch": 0.015009233428941888,
+      "grad_norm": 3.278426170349121,
+      "learning_rate": 4.77e-06,
+      "loss": 1.742,
+      "step": 191
+    },
+    {
+      "epoch": 0.015087815802915405,
+      "grad_norm": 2.9624500274658203,
+      "learning_rate": 4.24e-06,
+      "loss": 2.2221,
+      "step": 192
+    },
+    {
+      "epoch": 0.015166398176888923,
+      "grad_norm": 3.408240556716919,
+      "learning_rate": 3.7099999999999996e-06,
+      "loss": 1.879,
+      "step": 193
+    },
+    {
+      "epoch": 0.01524498055086244,
+      "grad_norm": 2.60992169380188,
+      "learning_rate": 3.18e-06,
+      "loss": 1.9405,
+      "step": 194
+    },
+    {
+      "epoch": 0.015323562924835958,
+      "grad_norm": 2.4439897537231445,
+      "learning_rate": 2.6499999999999996e-06,
+      "loss": 1.3485,
+      "step": 195
+    },
+    {
+      "epoch": 0.015402145298809476,
+      "grad_norm": 3.6060919761657715,
+      "learning_rate": 2.12e-06,
+      "loss": 2.2208,
+      "step": 196
+    },
+    {
+      "epoch": 0.015480727672782994,
+      "grad_norm": 3.201261281967163,
+      "learning_rate": 1.59e-06,
+      "loss": 1.9352,
+      "step": 197
+    },
+    {
+      "epoch": 0.015559310046756512,
+      "grad_norm": 3.102384328842163,
+      "learning_rate": 1.06e-06,
+      "loss": 1.9048,
+      "step": 198
+    },
+    {
+      "epoch": 0.01563789242073003,
+      "grad_norm": 3.2906687259674072,
+      "learning_rate": 5.3e-07,
+      "loss": 1.9021,
+      "step": 199
+    },
+    {
+      "epoch": 0.015716474794703547,
+      "grad_norm": 2.7191827297210693,
+      "learning_rate": 0.0,
+      "loss": 1.5233,
+      "step": 200
+    },
+    {
+      "epoch": 0.015716474794703547,
+      "eval_loss": 0.8893836140632629,
+      "eval_runtime": 304.9343,
+      "eval_samples_per_second": 17.571,
+      "eval_steps_per_second": 4.394,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.23758274674688e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null