Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:05aa21867173975b54c2433e2ea2e2d33b9477ac7d7330c058890afd12903750
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:6594e85d9f7449a4096e125658d2822d2213cd67060c7bf626c011f2b256e1ca
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6b3a4b1e126c0099f085d2e4a8565a1d034f0d691cbe73ac35fc1487961c814d
 size 325339796

 version https://git-lfs.github.com/spec/v1
+oid sha256:731572c0980e95e37f8070180b44ad7baa54f2a5cf212cda3385a41e6af41b7c
 size 325339796

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:351daf090d5da39941e428a5c5910a98049d94fdb508be4f472927f329dbcf8c
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5fb135e5a703b43d84755b57fbc457d652b3debcd823950d90d961e8dfc7c522
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc4a786186a574bdc543ff4b4563aab7c5e0b442c74c85899bb42a25553c5d0c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca62d85cf5423834480d3c20680f93b185c8e6574a8a14021d285e0b05c7449f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.9647265076637268,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.5145797598627787,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 13.242,
       "eval_steps_per_second": 3.317,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.0722672366465843e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.9573606848716736,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.6861063464837049,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 13.242,
       "eval_steps_per_second": 3.317,
       "step": 150
+    },
+    {
+      "epoch": 0.5180102915951973,
+      "grad_norm": 0.2788316011428833,
+      "learning_rate": 9.319397726443026e-06,
+      "loss": 1.0724,
+      "step": 151
+    },
+    {
+      "epoch": 0.5214408233276158,
+      "grad_norm": 0.29611045122146606,
+      "learning_rate": 8.962896471825342e-06,
+      "loss": 1.1351,
+      "step": 152
+    },
+    {
+      "epoch": 0.5248713550600344,
+      "grad_norm": 0.31029313802719116,
+      "learning_rate": 8.61214655125809e-06,
+      "loss": 1.0506,
+      "step": 153
+    },
+    {
+      "epoch": 0.5283018867924528,
+      "grad_norm": 0.3131580352783203,
+      "learning_rate": 8.267243856267331e-06,
+      "loss": 1.0794,
+      "step": 154
+    },
+    {
+      "epoch": 0.5317324185248714,
+      "grad_norm": 0.3210444152355194,
+      "learning_rate": 7.928282679806052e-06,
+      "loss": 1.0677,
+      "step": 155
+    },
+    {
+      "epoch": 0.5351629502572899,
+      "grad_norm": 0.3272711932659149,
+      "learning_rate": 7.595355690475393e-06,
+      "loss": 1.0264,
+      "step": 156
+    },
+    {
+      "epoch": 0.5385934819897084,
+      "grad_norm": 0.3380805253982544,
+      "learning_rate": 7.268553907189964e-06,
+      "loss": 1.0323,
+      "step": 157
+    },
+    {
+      "epoch": 0.5420240137221269,
+      "grad_norm": 0.324920654296875,
+      "learning_rate": 6.947966674294236e-06,
+      "loss": 1.0284,
+      "step": 158
+    },
+    {
+      "epoch": 0.5454545454545454,
+      "grad_norm": 0.34995001554489136,
+      "learning_rate": 6.6336816371366305e-06,
+      "loss": 0.9675,
+      "step": 159
+    },
+    {
+      "epoch": 0.548885077186964,
+      "grad_norm": 0.3302232623100281,
+      "learning_rate": 6.325784718108196e-06,
+      "loss": 0.9964,
+      "step": 160
+    },
+    {
+      "epoch": 0.5523156089193825,
+      "grad_norm": 0.34856972098350525,
+      "learning_rate": 6.0243600931522595e-06,
+      "loss": 0.9764,
+      "step": 161
+    },
+    {
+      "epoch": 0.5557461406518011,
+      "grad_norm": 0.35645270347595215,
+      "learning_rate": 5.72949016875158e-06,
+      "loss": 1.0893,
+      "step": 162
+    },
+    {
+      "epoch": 0.5591766723842195,
+      "grad_norm": 0.34165555238723755,
+      "learning_rate": 5.44125555939923e-06,
+      "loss": 1.0396,
+      "step": 163
+    },
+    {
+      "epoch": 0.5626072041166381,
+      "grad_norm": 0.3658033013343811,
+      "learning_rate": 5.159735065559399e-06,
+      "loss": 0.9651,
+      "step": 164
+    },
+    {
+      "epoch": 0.5660377358490566,
+      "grad_norm": 0.3548159599304199,
+      "learning_rate": 4.885005652124144e-06,
+      "loss": 1.0168,
+      "step": 165
+    },
+    {
+      "epoch": 0.5694682675814752,
+      "grad_norm": 0.35155749320983887,
+      "learning_rate": 4.617142427371934e-06,
+      "loss": 0.9882,
+      "step": 166
+    },
+    {
+      "epoch": 0.5728987993138936,
+      "grad_norm": 0.3642813265323639,
+      "learning_rate": 4.3562186224338265e-06,
+      "loss": 1.0211,
+      "step": 167
+    },
+    {
+      "epoch": 0.5763293310463122,
+      "grad_norm": 0.36729952692985535,
+      "learning_rate": 4.102305571272783e-06,
+      "loss": 1.0002,
+      "step": 168
+    },
+    {
+      "epoch": 0.5797598627787307,
+      "grad_norm": 0.3639819025993347,
+      "learning_rate": 3.855472691181678e-06,
+      "loss": 0.9742,
+      "step": 169
+    },
+    {
+      "epoch": 0.5831903945111492,
+      "grad_norm": 0.3743532598018646,
+      "learning_rate": 3.615787463805331e-06,
+      "loss": 1.0119,
+      "step": 170
+    },
+    {
+      "epoch": 0.5866209262435678,
+      "grad_norm": 0.39362025260925293,
+      "learning_rate": 3.383315416691646e-06,
+      "loss": 0.852,
+      "step": 171
+    },
+    {
+      "epoch": 0.5900514579759862,
+      "grad_norm": 0.36990445852279663,
+      "learning_rate": 3.158120105377096e-06,
+      "loss": 1.0292,
+      "step": 172
+    },
+    {
+      "epoch": 0.5934819897084048,
+      "grad_norm": 0.395361989736557,
+      "learning_rate": 2.940263096011233e-06,
+      "loss": 0.9459,
+      "step": 173
+    },
+    {
+      "epoch": 0.5969125214408233,
+      "grad_norm": 0.38748112320899963,
+      "learning_rate": 2.729803948525125e-06,
+      "loss": 0.9552,
+      "step": 174
+    },
+    {
+      "epoch": 0.6003430531732419,
+      "grad_norm": 0.3924119472503662,
+      "learning_rate": 2.526800200348275e-06,
+      "loss": 0.9526,
+      "step": 175
+    },
+    {
+      "epoch": 0.6037735849056604,
+      "grad_norm": 0.39469361305236816,
+      "learning_rate": 2.3313073506784575e-06,
+      "loss": 0.9458,
+      "step": 176
+    },
+    {
+      "epoch": 0.6072041166380789,
+      "grad_norm": 0.39666324853897095,
+      "learning_rate": 2.143378845308791e-06,
+      "loss": 0.9717,
+      "step": 177
+    },
+    {
+      "epoch": 0.6106346483704974,
+      "grad_norm": 0.40818580985069275,
+      "learning_rate": 1.9630660620161777e-06,
+      "loss": 0.9296,
+      "step": 178
+    },
+    {
+      "epoch": 0.614065180102916,
+      "grad_norm": 0.4014730155467987,
+      "learning_rate": 1.790418296515165e-06,
+      "loss": 1.0224,
+      "step": 179
+    },
+    {
+      "epoch": 0.6174957118353345,
+      "grad_norm": 0.43228840827941895,
+      "learning_rate": 1.625482748980961e-06,
+      "loss": 0.9433,
+      "step": 180
+    },
+    {
+      "epoch": 0.6209262435677531,
+      "grad_norm": 0.4143679141998291,
+      "learning_rate": 1.4683045111453942e-06,
+      "loss": 1.0152,
+      "step": 181
+    },
+    {
+      "epoch": 0.6243567753001715,
+      "grad_norm": 0.4030207395553589,
+      "learning_rate": 1.3189265539692707e-06,
+      "loss": 0.9542,
+      "step": 182
+    },
+    {
+      "epoch": 0.62778730703259,
+      "grad_norm": 0.44624990224838257,
+      "learning_rate": 1.1773897158945557e-06,
+      "loss": 0.9112,
+      "step": 183
+    },
+    {
+      "epoch": 0.6312178387650086,
+      "grad_norm": 0.45174139738082886,
+      "learning_rate": 1.0437326916795432e-06,
+      "loss": 1.0063,
+      "step": 184
+    },
+    {
+      "epoch": 0.6346483704974271,
+      "grad_norm": 0.4539107382297516,
+      "learning_rate": 9.179920218200888e-07,
+      "loss": 0.9326,
+      "step": 185
+    },
+    {
+      "epoch": 0.6380789022298456,
+      "grad_norm": 0.4369543492794037,
+      "learning_rate": 8.002020825598277e-07,
+      "loss": 0.9534,
+      "step": 186
+    },
+    {
+      "epoch": 0.6415094339622641,
+      "grad_norm": 0.4899698495864868,
+      "learning_rate": 6.90395076492022e-07,
+      "loss": 1.0126,
+      "step": 187
+    },
+    {
+      "epoch": 0.6449399656946827,
+      "grad_norm": 0.4699549973011017,
+      "learning_rate": 5.886010237557194e-07,
+      "loss": 0.9727,
+      "step": 188
+    },
+    {
+      "epoch": 0.6483704974271012,
+      "grad_norm": 0.4589851498603821,
+      "learning_rate": 4.94847753828529e-07,
+      "loss": 0.9804,
+      "step": 189
+    },
+    {
+      "epoch": 0.6518010291595198,
+      "grad_norm": 0.5151140689849854,
+      "learning_rate": 4.091608979183303e-07,
+      "loss": 0.9987,
+      "step": 190
+    },
+    {
+      "epoch": 0.6552315608919382,
+      "grad_norm": 0.5203879475593567,
+      "learning_rate": 3.315638819559452e-07,
+      "loss": 0.9682,
+      "step": 191
+    },
+    {
+      "epoch": 0.6586620926243568,
+      "grad_norm": 0.4911907911300659,
+      "learning_rate": 2.6207792019074414e-07,
+      "loss": 0.9684,
+      "step": 192
+    },
+    {
+      "epoch": 0.6620926243567753,
+      "grad_norm": 0.5031991600990295,
+      "learning_rate": 2.0072200939085573e-07,
+      "loss": 0.9443,
+      "step": 193
+    },
+    {
+      "epoch": 0.6655231560891939,
+      "grad_norm": 0.5311073064804077,
+      "learning_rate": 1.475129236496575e-07,
+      "loss": 0.9774,
+      "step": 194
+    },
+    {
+      "epoch": 0.6689536878216124,
+      "grad_norm": 0.5446680188179016,
+      "learning_rate": 1.0246520979990459e-07,
+      "loss": 0.9561,
+      "step": 195
+    },
+    {
+      "epoch": 0.6723842195540308,
+      "grad_norm": 0.5319830775260925,
+      "learning_rate": 6.559118343676396e-08,
+      "loss": 0.9256,
+      "step": 196
+    },
+    {
+      "epoch": 0.6758147512864494,
+      "grad_norm": 0.5953107476234436,
+      "learning_rate": 3.690092555085789e-08,
+      "loss": 1.0225,
+      "step": 197
+    },
+    {
+      "epoch": 0.6792452830188679,
+      "grad_norm": 0.6728432774543762,
+      "learning_rate": 1.640227977221853e-08,
+      "loss": 0.986,
+      "step": 198
+    },
+    {
+      "epoch": 0.6826758147512865,
+      "grad_norm": 0.6387631893157959,
+      "learning_rate": 4.1008502259298755e-09,
+      "loss": 0.9947,
+      "step": 199
+    },
+    {
+      "epoch": 0.6861063464837049,
+      "grad_norm": 0.9395211935043335,
+      "learning_rate": 0.0,
+      "loss": 0.9974,
+      "step": 200
+    },
+    {
+      "epoch": 0.6861063464837049,
+      "eval_loss": 0.9573606848716736,
+      "eval_runtime": 36.8437,
+      "eval_samples_per_second": 13.327,
+      "eval_steps_per_second": 3.338,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.7608054677635072e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null