Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e7b1b3a5f541f0057b02ffa8a7206f54fe456c540363c19dd47dae34c38ceaee
 size 522227376

 version https://git-lfs.github.com/spec/v1
+oid sha256:f599aa0c0a049fb7ee912cf3835441176123724e829b85db2d58880b25ae3f7e
 size 522227376

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:90249346b273016aa1b1b3e3b978564adc27dd936d9a715d809e9edf49143100
 size 265476436

 version https://git-lfs.github.com/spec/v1
+oid sha256:af02c82fd042017d904dcb83a7958fe1758c1f4bd454598a93c25e2c1185b08e
 size 265476436

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c7248cc1865ade94bf3820a52ae78cc3da5a761ebb740767cb5b415b615a30c7
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:94778b2c0a4fafa19b9b9664a360321d3d7bcf837bddaa94e0390eb59a308100
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2d094abeb22730ed8a1a30db0af3a1ac1bd1a9d1fcc8794319a4b828d3c7261c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3709ad194ceda6ee92d3d2c8f2dd5203a40bedaa701af97b45dc02564222704e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.9016318321228027,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.012642757806902946,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 10.081,
       "eval_steps_per_second": 2.52,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.9816530321408e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.8669476509094238,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.016857010409203926,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 10.081,
       "eval_steps_per_second": 2.52,
       "step": 150
+    },
+    {
+      "epoch": 0.012727042858948965,
+      "grad_norm": 1.3897639513015747,
+      "learning_rate": 2.597e-05,
+      "loss": 3.8315,
+      "step": 151
+    },
+    {
+      "epoch": 0.012811327910994985,
+      "grad_norm": 1.3828065395355225,
+      "learning_rate": 2.544e-05,
+      "loss": 3.6809,
+      "step": 152
+    },
+    {
+      "epoch": 0.012895612963041005,
+      "grad_norm": 1.3566617965698242,
+      "learning_rate": 2.4909999999999997e-05,
+      "loss": 3.5872,
+      "step": 153
+    },
+    {
+      "epoch": 0.012979898015087024,
+      "grad_norm": 1.3214489221572876,
+      "learning_rate": 2.438e-05,
+      "loss": 3.4207,
+      "step": 154
+    },
+    {
+      "epoch": 0.013064183067133044,
+      "grad_norm": 1.2196868658065796,
+      "learning_rate": 2.3849999999999997e-05,
+      "loss": 3.2518,
+      "step": 155
+    },
+    {
+      "epoch": 0.013148468119179064,
+      "grad_norm": 1.3787480592727661,
+      "learning_rate": 2.3319999999999997e-05,
+      "loss": 3.3574,
+      "step": 156
+    },
+    {
+      "epoch": 0.013232753171225083,
+      "grad_norm": 1.224331021308899,
+      "learning_rate": 2.279e-05,
+      "loss": 3.4614,
+      "step": 157
+    },
+    {
+      "epoch": 0.013317038223271103,
+      "grad_norm": 1.382997751235962,
+      "learning_rate": 2.2259999999999997e-05,
+      "loss": 3.9046,
+      "step": 158
+    },
+    {
+      "epoch": 0.013401323275317123,
+      "grad_norm": 1.3192687034606934,
+      "learning_rate": 2.173e-05,
+      "loss": 3.7665,
+      "step": 159
+    },
+    {
+      "epoch": 0.013485608327363141,
+      "grad_norm": 1.4856247901916504,
+      "learning_rate": 2.1199999999999997e-05,
+      "loss": 4.2557,
+      "step": 160
+    },
+    {
+      "epoch": 0.013569893379409162,
+      "grad_norm": 1.4664103984832764,
+      "learning_rate": 2.067e-05,
+      "loss": 3.8251,
+      "step": 161
+    },
+    {
+      "epoch": 0.013654178431455182,
+      "grad_norm": 1.472501516342163,
+      "learning_rate": 2.014e-05,
+      "loss": 4.0414,
+      "step": 162
+    },
+    {
+      "epoch": 0.0137384634835012,
+      "grad_norm": 1.5374277830123901,
+      "learning_rate": 1.9609999999999997e-05,
+      "loss": 3.7686,
+      "step": 163
+    },
+    {
+      "epoch": 0.01382274853554722,
+      "grad_norm": 1.382415771484375,
+      "learning_rate": 1.908e-05,
+      "loss": 3.3308,
+      "step": 164
+    },
+    {
+      "epoch": 0.01390703358759324,
+      "grad_norm": 1.4553520679473877,
+      "learning_rate": 1.8549999999999997e-05,
+      "loss": 3.8213,
+      "step": 165
+    },
+    {
+      "epoch": 0.01399131863963926,
+      "grad_norm": 1.5830438137054443,
+      "learning_rate": 1.802e-05,
+      "loss": 3.9779,
+      "step": 166
+    },
+    {
+      "epoch": 0.01407560369168528,
+      "grad_norm": 1.6630796194076538,
+      "learning_rate": 1.749e-05,
+      "loss": 4.2908,
+      "step": 167
+    },
+    {
+      "epoch": 0.0141598887437313,
+      "grad_norm": 1.6110979318618774,
+      "learning_rate": 1.696e-05,
+      "loss": 3.3741,
+      "step": 168
+    },
+    {
+      "epoch": 0.01424417379577732,
+      "grad_norm": 1.5861139297485352,
+      "learning_rate": 1.643e-05,
+      "loss": 3.9935,
+      "step": 169
+    },
+    {
+      "epoch": 0.014328458847823338,
+      "grad_norm": 1.6507846117019653,
+      "learning_rate": 1.5899999999999997e-05,
+      "loss": 3.4942,
+      "step": 170
+    },
+    {
+      "epoch": 0.014412743899869358,
+      "grad_norm": 1.7398327589035034,
+      "learning_rate": 1.537e-05,
+      "loss": 4.0904,
+      "step": 171
+    },
+    {
+      "epoch": 0.014497028951915378,
+      "grad_norm": 1.7537189722061157,
+      "learning_rate": 1.4839999999999999e-05,
+      "loss": 3.7626,
+      "step": 172
+    },
+    {
+      "epoch": 0.014581314003961397,
+      "grad_norm": 1.6175459623336792,
+      "learning_rate": 1.4309999999999999e-05,
+      "loss": 3.7885,
+      "step": 173
+    },
+    {
+      "epoch": 0.014665599056007417,
+      "grad_norm": 1.7866084575653076,
+      "learning_rate": 1.378e-05,
+      "loss": 3.5248,
+      "step": 174
+    },
+    {
+      "epoch": 0.014749884108053437,
+      "grad_norm": 1.7713515758514404,
+      "learning_rate": 1.3249999999999999e-05,
+      "loss": 3.9378,
+      "step": 175
+    },
+    {
+      "epoch": 0.014834169160099456,
+      "grad_norm": 1.9118940830230713,
+      "learning_rate": 1.272e-05,
+      "loss": 4.1767,
+      "step": 176
+    },
+    {
+      "epoch": 0.014918454212145476,
+      "grad_norm": 1.8637574911117554,
+      "learning_rate": 1.219e-05,
+      "loss": 3.8656,
+      "step": 177
+    },
+    {
+      "epoch": 0.015002739264191496,
+      "grad_norm": 1.828238844871521,
+      "learning_rate": 1.1659999999999998e-05,
+      "loss": 4.0391,
+      "step": 178
+    },
+    {
+      "epoch": 0.015087024316237515,
+      "grad_norm": 2.0488600730895996,
+      "learning_rate": 1.1129999999999998e-05,
+      "loss": 3.922,
+      "step": 179
+    },
+    {
+      "epoch": 0.015171309368283535,
+      "grad_norm": 2.0183472633361816,
+      "learning_rate": 1.0599999999999998e-05,
+      "loss": 4.2234,
+      "step": 180
+    },
+    {
+      "epoch": 0.015255594420329555,
+      "grad_norm": 2.070134401321411,
+      "learning_rate": 1.007e-05,
+      "loss": 3.918,
+      "step": 181
+    },
+    {
+      "epoch": 0.015339879472375573,
+      "grad_norm": 2.2498276233673096,
+      "learning_rate": 9.54e-06,
+      "loss": 3.9867,
+      "step": 182
+    },
+    {
+      "epoch": 0.015424164524421594,
+      "grad_norm": 2.2584664821624756,
+      "learning_rate": 9.01e-06,
+      "loss": 4.0252,
+      "step": 183
+    },
+    {
+      "epoch": 0.015508449576467614,
+      "grad_norm": 2.067579507827759,
+      "learning_rate": 8.48e-06,
+      "loss": 3.7084,
+      "step": 184
+    },
+    {
+      "epoch": 0.015592734628513634,
+      "grad_norm": 2.0557119846343994,
+      "learning_rate": 7.949999999999998e-06,
+      "loss": 3.7976,
+      "step": 185
+    },
+    {
+      "epoch": 0.015677019680559654,
+      "grad_norm": 2.188267707824707,
+      "learning_rate": 7.419999999999999e-06,
+      "loss": 3.9759,
+      "step": 186
+    },
+    {
+      "epoch": 0.01576130473260567,
+      "grad_norm": 2.3321714401245117,
+      "learning_rate": 6.89e-06,
+      "loss": 4.3558,
+      "step": 187
+    },
+    {
+      "epoch": 0.01584558978465169,
+      "grad_norm": 2.385741949081421,
+      "learning_rate": 6.36e-06,
+      "loss": 3.8495,
+      "step": 188
+    },
+    {
+      "epoch": 0.01592987483669771,
+      "grad_norm": 2.042713165283203,
+      "learning_rate": 5.829999999999999e-06,
+      "loss": 3.6389,
+      "step": 189
+    },
+    {
+      "epoch": 0.01601415988874373,
+      "grad_norm": 2.288698434829712,
+      "learning_rate": 5.299999999999999e-06,
+      "loss": 4.0149,
+      "step": 190
+    },
+    {
+      "epoch": 0.01609844494078975,
+      "grad_norm": 2.1924726963043213,
+      "learning_rate": 4.77e-06,
+      "loss": 3.9639,
+      "step": 191
+    },
+    {
+      "epoch": 0.016182729992835772,
+      "grad_norm": 2.3209054470062256,
+      "learning_rate": 4.24e-06,
+      "loss": 3.4898,
+      "step": 192
+    },
+    {
+      "epoch": 0.01626701504488179,
+      "grad_norm": 2.477083206176758,
+      "learning_rate": 3.7099999999999996e-06,
+      "loss": 3.2621,
+      "step": 193
+    },
+    {
+      "epoch": 0.01635130009692781,
+      "grad_norm": 2.740098237991333,
+      "learning_rate": 3.18e-06,
+      "loss": 3.6511,
+      "step": 194
+    },
+    {
+      "epoch": 0.01643558514897383,
+      "grad_norm": 2.6082754135131836,
+      "learning_rate": 2.6499999999999996e-06,
+      "loss": 3.6124,
+      "step": 195
+    },
+    {
+      "epoch": 0.01651987020101985,
+      "grad_norm": 2.774245023727417,
+      "learning_rate": 2.12e-06,
+      "loss": 3.2226,
+      "step": 196
+    },
+    {
+      "epoch": 0.01660415525306587,
+      "grad_norm": 2.7693519592285156,
+      "learning_rate": 1.59e-06,
+      "loss": 3.2539,
+      "step": 197
+    },
+    {
+      "epoch": 0.01668844030511189,
+      "grad_norm": 2.846680164337158,
+      "learning_rate": 1.06e-06,
+      "loss": 2.9483,
+      "step": 198
+    },
+    {
+      "epoch": 0.01677272535715791,
+      "grad_norm": 3.2634568214416504,
+      "learning_rate": 5.3e-07,
+      "loss": 3.4867,
+      "step": 199
+    },
+    {
+      "epoch": 0.016857010409203926,
+      "grad_norm": 3.9992029666900635,
+      "learning_rate": 0.0,
+      "loss": 3.4449,
+      "step": 200
+    },
+    {
+      "epoch": 0.016857010409203926,
+      "eval_loss": 1.8669476509094238,
+      "eval_runtime": 496.0136,
+      "eval_samples_per_second": 10.072,
+      "eval_steps_per_second": 2.518,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.6422040428544e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null