Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:11bd466a878610e1c68053f8887c6191af5048c156b2d02efd50a208e7dc5cca
 size 838906392

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc774ab0ddb32166e466df47a3b68196a2c08a6acdebbc32388da4c2a64516cb
 size 838906392

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:92bcb1d3fa49234721a33c7cb8dba3f1e0f48b93861ee218ea637bedd5a8a626
 size 426360596

 version https://git-lfs.github.com/spec/v1
+oid sha256:306df05005f74c56c5f1e1be8f6d1adf7eb16b3bbe7dff3e8f67b48b5594ae7f
 size 426360596

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4635510a2b7a47669497456ac6395cdcfa5a4545d6cf55177d720e2c15cc0726
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:db7624b45108df624a439006967bf09110280f0de2fa11ca732ced80656a363c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8ce05761f46e7cf72fb17a02e3a0ca15c9d25ce3babf590eeb40568923b8bac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.5721358060836792,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.545950864422202,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 9.733,
       "eval_steps_per_second": 2.438,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.81890674556928e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.5490885972976685,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.7279344858962693,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.733,
       "eval_steps_per_second": 2.438,
       "step": 150
+    },
+    {
+      "epoch": 0.5495905368516834,
+      "grad_norm": 2.302870035171509,
+      "learning_rate": 1.553232954407171e-05,
+      "loss": 1.8646,
+      "step": 151
+    },
+    {
+      "epoch": 0.5532302092811647,
+      "grad_norm": 2.3190178871154785,
+      "learning_rate": 1.4938160786375572e-05,
+      "loss": 1.9375,
+      "step": 152
+    },
+    {
+      "epoch": 0.556869881710646,
+      "grad_norm": 3.0300214290618896,
+      "learning_rate": 1.435357758543015e-05,
+      "loss": 2.7096,
+      "step": 153
+    },
+    {
+      "epoch": 0.5605095541401274,
+      "grad_norm": 3.025632381439209,
+      "learning_rate": 1.3778739760445552e-05,
+      "loss": 2.897,
+      "step": 154
+    },
+    {
+      "epoch": 0.5641492265696088,
+      "grad_norm": 2.839578866958618,
+      "learning_rate": 1.3213804466343421e-05,
+      "loss": 2.6637,
+      "step": 155
+    },
+    {
+      "epoch": 0.5677888989990901,
+      "grad_norm": 3.25244402885437,
+      "learning_rate": 1.2658926150792322e-05,
+      "loss": 3.0786,
+      "step": 156
+    },
+    {
+      "epoch": 0.5714285714285714,
+      "grad_norm": 3.0265636444091797,
+      "learning_rate": 1.2114256511983274e-05,
+      "loss": 2.5205,
+      "step": 157
+    },
+    {
+      "epoch": 0.5750682438580528,
+      "grad_norm": 2.8974204063415527,
+      "learning_rate": 1.157994445715706e-05,
+      "loss": 2.3908,
+      "step": 158
+    },
+    {
+      "epoch": 0.5787079162875342,
+      "grad_norm": 3.050604820251465,
+      "learning_rate": 1.1056136061894384e-05,
+      "loss": 2.93,
+      "step": 159
+    },
+    {
+      "epoch": 0.5823475887170154,
+      "grad_norm": 3.0116374492645264,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 2.7721,
+      "step": 160
+    },
+    {
+      "epoch": 0.5859872611464968,
+      "grad_norm": 3.1212122440338135,
+      "learning_rate": 1.0040600155253765e-05,
+      "loss": 2.6728,
+      "step": 161
+    },
+    {
+      "epoch": 0.5896269335759782,
+      "grad_norm": 3.024791955947876,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 2.8683,
+      "step": 162
+    },
+    {
+      "epoch": 0.5932666060054596,
+      "grad_norm": 3.1855437755584717,
+      "learning_rate": 9.068759265665384e-06,
+      "loss": 2.9764,
+      "step": 163
+    },
+    {
+      "epoch": 0.5969062784349408,
+      "grad_norm": 2.8845460414886475,
+      "learning_rate": 8.599558442598998e-06,
+      "loss": 2.5825,
+      "step": 164
+    },
+    {
+      "epoch": 0.6005459508644222,
+      "grad_norm": 2.7672841548919678,
+      "learning_rate": 8.141676086873572e-06,
+      "loss": 2.5022,
+      "step": 165
+    },
+    {
+      "epoch": 0.6041856232939036,
+      "grad_norm": 2.711394786834717,
+      "learning_rate": 7.695237378953223e-06,
+      "loss": 2.158,
+      "step": 166
+    },
+    {
+      "epoch": 0.607825295723385,
+      "grad_norm": 2.831782579421997,
+      "learning_rate": 7.260364370723044e-06,
+      "loss": 2.2504,
+      "step": 167
+    },
+    {
+      "epoch": 0.6114649681528662,
+      "grad_norm": 2.912217378616333,
+      "learning_rate": 6.837175952121306e-06,
+      "loss": 2.2321,
+      "step": 168
+    },
+    {
+      "epoch": 0.6151046405823476,
+      "grad_norm": 2.7623188495635986,
+      "learning_rate": 6.425787818636131e-06,
+      "loss": 2.2487,
+      "step": 169
+    },
+    {
+      "epoch": 0.618744313011829,
+      "grad_norm": 2.90129017829895,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 2.3792,
+      "step": 170
+    },
+    {
+      "epoch": 0.6223839854413102,
+      "grad_norm": 3.3646767139434814,
+      "learning_rate": 5.6388590278194096e-06,
+      "loss": 2.8504,
+      "step": 171
+    },
+    {
+      "epoch": 0.6260236578707916,
+      "grad_norm": 3.3314194679260254,
+      "learning_rate": 5.263533508961827e-06,
+      "loss": 2.4542,
+      "step": 172
+    },
+    {
+      "epoch": 0.629663330300273,
+      "grad_norm": 3.1008663177490234,
+      "learning_rate": 4.900438493352055e-06,
+      "loss": 2.2329,
+      "step": 173
+    },
+    {
+      "epoch": 0.6333030027297544,
+      "grad_norm": 2.999218225479126,
+      "learning_rate": 4.549673247541875e-06,
+      "loss": 2.4201,
+      "step": 174
+    },
+    {
+      "epoch": 0.6369426751592356,
+      "grad_norm": 2.6040823459625244,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 1.959,
+      "step": 175
+    },
+    {
+      "epoch": 0.640582347588717,
+      "grad_norm": 2.9458696842193604,
+      "learning_rate": 3.885512251130763e-06,
+      "loss": 2.2246,
+      "step": 176
+    },
+    {
+      "epoch": 0.6442220200181984,
+      "grad_norm": 3.246384620666504,
+      "learning_rate": 3.5722980755146517e-06,
+      "loss": 2.6504,
+      "step": 177
+    },
+    {
+      "epoch": 0.6478616924476797,
+      "grad_norm": 3.316885232925415,
+      "learning_rate": 3.271776770026963e-06,
+      "loss": 2.5356,
+      "step": 178
+    },
+    {
+      "epoch": 0.651501364877161,
+      "grad_norm": 2.5368857383728027,
+      "learning_rate": 2.9840304941919415e-06,
+      "loss": 1.9127,
+      "step": 179
+    },
+    {
+      "epoch": 0.6551410373066424,
+      "grad_norm": 2.7126615047454834,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 2.0505,
+      "step": 180
+    },
+    {
+      "epoch": 0.6587807097361238,
+      "grad_norm": 3.049729585647583,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 2.2564,
+      "step": 181
+    },
+    {
+      "epoch": 0.6624203821656051,
+      "grad_norm": 3.1560466289520264,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 2.059,
+      "step": 182
+    },
+    {
+      "epoch": 0.6660600545950864,
+      "grad_norm": 2.5500495433807373,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 1.6247,
+      "step": 183
+    },
+    {
+      "epoch": 0.6696997270245678,
+      "grad_norm": 2.9407999515533447,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 2.1153,
+      "step": 184
+    },
+    {
+      "epoch": 0.6733393994540491,
+      "grad_norm": 2.8330116271972656,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 1.7676,
+      "step": 185
+    },
+    {
+      "epoch": 0.6769790718835305,
+      "grad_norm": 2.8989205360412598,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 1.7266,
+      "step": 186
+    },
+    {
+      "epoch": 0.6806187443130118,
+      "grad_norm": 3.519871473312378,
+      "learning_rate": 1.1506584608200367e-06,
+      "loss": 2.0414,
+      "step": 187
+    },
+    {
+      "epoch": 0.6842584167424932,
+      "grad_norm": 2.6813058853149414,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 1.684,
+      "step": 188
+    },
+    {
+      "epoch": 0.6878980891719745,
+      "grad_norm": 2.557551860809326,
+      "learning_rate": 8.247462563808817e-07,
+      "loss": 1.6045,
+      "step": 189
+    },
+    {
+      "epoch": 0.6915377616014559,
+      "grad_norm": 3.1362481117248535,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 2.0943,
+      "step": 190
+    },
+    {
+      "epoch": 0.6951774340309372,
+      "grad_norm": 2.4160473346710205,
+      "learning_rate": 5.526064699265753e-07,
+      "loss": 1.6056,
+      "step": 191
+    },
+    {
+      "epoch": 0.6988171064604186,
+      "grad_norm": 3.358062744140625,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 2.0739,
+      "step": 192
+    },
+    {
+      "epoch": 0.7024567788898999,
+      "grad_norm": 3.103926420211792,
+      "learning_rate": 3.3453668231809286e-07,
+      "loss": 1.9202,
+      "step": 193
+    },
+    {
+      "epoch": 0.7060964513193813,
+      "grad_norm": 3.2157270908355713,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 1.9288,
+      "step": 194
+    },
+    {
+      "epoch": 0.7097361237488626,
+      "grad_norm": 2.6933817863464355,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 1.8028,
+      "step": 195
+    },
+    {
+      "epoch": 0.7133757961783439,
+      "grad_norm": 3.2947006225585938,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 2.1877,
+      "step": 196
+    },
+    {
+      "epoch": 0.7170154686078253,
+      "grad_norm": 3.6697769165039062,
+      "learning_rate": 6.150154258476315e-08,
+      "loss": 1.9777,
+      "step": 197
+    },
+    {
+      "epoch": 0.7206551410373067,
+      "grad_norm": 3.170090913772583,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 2.0089,
+      "step": 198
+    },
+    {
+      "epoch": 0.724294813466788,
+      "grad_norm": 4.24803352355957,
+      "learning_rate": 6.834750376549792e-09,
+      "loss": 2.5854,
+      "step": 199
+    },
+    {
+      "epoch": 0.7279344858962693,
+      "grad_norm": 4.326064109802246,
+      "learning_rate": 0.0,
+      "loss": 2.2922,
+      "step": 200
+    },
+    {
+      "epoch": 0.7279344858962693,
+      "eval_loss": 0.5490885972976685,
+      "eval_runtime": 47.5809,
+      "eval_samples_per_second": 9.731,
+      "eval_steps_per_second": 2.438,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.09187566075904e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null