Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a0a9b5d3333a2a80ecab4e9fbf68128d439a8bdd3dac15dd44a8ff7f270c94e
 size 985240

 version https://git-lfs.github.com/spec/v1
+oid sha256:fcdb159a661d9dceee0d19f34d9e31de06548e070b3571ae126cb5b2a883579a
 size 985240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8eef9a0cc37d49233b9103c2c99d57ee7ff11f553ddf627ab3fa0201d2ac67aa
 size 520860

 version https://git-lfs.github.com/spec/v1
+oid sha256:283c865260a16be2ac8782a4e93a773da9f9f26a742339641c77dd1d283a726c
 size 520860

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:520ed0f0305e51b9eba187b564a25f613b36cf1399f5af5141d8672ac8c7a0ad
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:163e15ce907d9da1284a6876075df6feaab3d046987352c38c44629120d62edf
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ef91cf09f7b6a58a39bbfe1ef78ac2fa91c0c15ca1705097a187d272d0433d8d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c47edf82128c3f034f21204d4b9ce5c76cd3269748ce31061b148a0a389d049d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.508695602416992,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.016938625712833832,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 123.362,
       "eval_steps_per_second": 30.865,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4493817151488.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.493440628051758,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.022584834283778443,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 123.362,
       "eval_steps_per_second": 30.865,
       "step": 150
+    },
+    {
+      "epoch": 0.017051549884252726,
+      "grad_norm": 1.5685151815414429,
+      "learning_rate": 2.5944210526315793e-05,
+      "loss": 21.5095,
+      "step": 151
+    },
+    {
+      "epoch": 0.017164474055671616,
+      "grad_norm": 1.5771194696426392,
+      "learning_rate": 2.5414736842105266e-05,
+      "loss": 21.547,
+      "step": 152
+    },
+    {
+      "epoch": 0.01727739822709051,
+      "grad_norm": 2.027747869491577,
+      "learning_rate": 2.4885263157894737e-05,
+      "loss": 21.3808,
+      "step": 153
+    },
+    {
+      "epoch": 0.017390322398509402,
+      "grad_norm": 1.4902698993682861,
+      "learning_rate": 2.4355789473684214e-05,
+      "loss": 21.533,
+      "step": 154
+    },
+    {
+      "epoch": 0.017503246569928292,
+      "grad_norm": 2.0053372383117676,
+      "learning_rate": 2.3826315789473684e-05,
+      "loss": 21.4025,
+      "step": 155
+    },
+    {
+      "epoch": 0.017616170741347185,
+      "grad_norm": 2.245124101638794,
+      "learning_rate": 2.3296842105263158e-05,
+      "loss": 21.2401,
+      "step": 156
+    },
+    {
+      "epoch": 0.01772909491276608,
+      "grad_norm": 2.0497140884399414,
+      "learning_rate": 2.2767368421052635e-05,
+      "loss": 21.3229,
+      "step": 157
+    },
+    {
+      "epoch": 0.01784201908418497,
+      "grad_norm": 1.991555094718933,
+      "learning_rate": 2.2237894736842105e-05,
+      "loss": 21.1808,
+      "step": 158
+    },
+    {
+      "epoch": 0.017954943255603862,
+      "grad_norm": 2.4944097995758057,
+      "learning_rate": 2.170842105263158e-05,
+      "loss": 21.1312,
+      "step": 159
+    },
+    {
+      "epoch": 0.018067867427022755,
+      "grad_norm": 1.869414210319519,
+      "learning_rate": 2.1178947368421053e-05,
+      "loss": 21.0831,
+      "step": 160
+    },
+    {
+      "epoch": 0.018180791598441645,
+      "grad_norm": 1.6277885437011719,
+      "learning_rate": 2.0649473684210527e-05,
+      "loss": 20.8569,
+      "step": 161
+    },
+    {
+      "epoch": 0.01829371576986054,
+      "grad_norm": 1.5575450658798218,
+      "learning_rate": 2.0120000000000004e-05,
+      "loss": 20.8602,
+      "step": 162
+    },
+    {
+      "epoch": 0.018406639941279432,
+      "grad_norm": 1.6564252376556396,
+      "learning_rate": 1.9590526315789474e-05,
+      "loss": 20.7874,
+      "step": 163
+    },
+    {
+      "epoch": 0.01851956411269832,
+      "grad_norm": 1.4694855213165283,
+      "learning_rate": 1.9061052631578948e-05,
+      "loss": 21.0323,
+      "step": 164
+    },
+    {
+      "epoch": 0.018632488284117215,
+      "grad_norm": 1.6086772680282593,
+      "learning_rate": 1.8531578947368422e-05,
+      "loss": 20.6948,
+      "step": 165
+    },
+    {
+      "epoch": 0.01874541245553611,
+      "grad_norm": 1.4179946184158325,
+      "learning_rate": 1.8002105263157896e-05,
+      "loss": 20.9274,
+      "step": 166
+    },
+    {
+      "epoch": 0.018858336626954998,
+      "grad_norm": 1.503804326057434,
+      "learning_rate": 1.747263157894737e-05,
+      "loss": 20.9024,
+      "step": 167
+    },
+    {
+      "epoch": 0.01897126079837389,
+      "grad_norm": 1.5939704179763794,
+      "learning_rate": 1.6943157894736843e-05,
+      "loss": 20.8224,
+      "step": 168
+    },
+    {
+      "epoch": 0.019084184969792785,
+      "grad_norm": 1.6570841073989868,
+      "learning_rate": 1.6413684210526317e-05,
+      "loss": 20.6158,
+      "step": 169
+    },
+    {
+      "epoch": 0.019197109141211675,
+      "grad_norm": 1.64534330368042,
+      "learning_rate": 1.588421052631579e-05,
+      "loss": 20.8458,
+      "step": 170
+    },
+    {
+      "epoch": 0.019310033312630568,
+      "grad_norm": 1.5698976516723633,
+      "learning_rate": 1.5354736842105264e-05,
+      "loss": 20.8154,
+      "step": 171
+    },
+    {
+      "epoch": 0.01942295748404946,
+      "grad_norm": 1.5585131645202637,
+      "learning_rate": 1.4825263157894736e-05,
+      "loss": 20.6118,
+      "step": 172
+    },
+    {
+      "epoch": 0.019535881655468355,
+      "grad_norm": 1.5782846212387085,
+      "learning_rate": 1.4295789473684212e-05,
+      "loss": 20.7412,
+      "step": 173
+    },
+    {
+      "epoch": 0.019648805826887245,
+      "grad_norm": 1.4744038581848145,
+      "learning_rate": 1.3766315789473686e-05,
+      "loss": 20.7732,
+      "step": 174
+    },
+    {
+      "epoch": 0.019761729998306138,
+      "grad_norm": 1.7839460372924805,
+      "learning_rate": 1.3236842105263158e-05,
+      "loss": 20.7025,
+      "step": 175
+    },
+    {
+      "epoch": 0.01987465416972503,
+      "grad_norm": 1.6446399688720703,
+      "learning_rate": 1.2707368421052633e-05,
+      "loss": 21.1127,
+      "step": 176
+    },
+    {
+      "epoch": 0.01998757834114392,
+      "grad_norm": 1.668971061706543,
+      "learning_rate": 1.2177894736842107e-05,
+      "loss": 20.7067,
+      "step": 177
+    },
+    {
+      "epoch": 0.020100502512562814,
+      "grad_norm": 1.616200566291809,
+      "learning_rate": 1.1648421052631579e-05,
+      "loss": 20.9103,
+      "step": 178
+    },
+    {
+      "epoch": 0.020213426683981708,
+      "grad_norm": 1.5561143159866333,
+      "learning_rate": 1.1118947368421053e-05,
+      "loss": 20.7981,
+      "step": 179
+    },
+    {
+      "epoch": 0.020326350855400598,
+      "grad_norm": 1.7111828327178955,
+      "learning_rate": 1.0589473684210526e-05,
+      "loss": 21.0354,
+      "step": 180
+    },
+    {
+      "epoch": 0.02043927502681949,
+      "grad_norm": 1.6354975700378418,
+      "learning_rate": 1.0060000000000002e-05,
+      "loss": 21.0103,
+      "step": 181
+    },
+    {
+      "epoch": 0.020552199198238384,
+      "grad_norm": 1.591170072555542,
+      "learning_rate": 9.530526315789474e-06,
+      "loss": 20.667,
+      "step": 182
+    },
+    {
+      "epoch": 0.020665123369657274,
+      "grad_norm": 1.515537977218628,
+      "learning_rate": 9.001052631578948e-06,
+      "loss": 20.9441,
+      "step": 183
+    },
+    {
+      "epoch": 0.020778047541076167,
+      "grad_norm": 1.6177928447723389,
+      "learning_rate": 8.471578947368422e-06,
+      "loss": 20.8519,
+      "step": 184
+    },
+    {
+      "epoch": 0.02089097171249506,
+      "grad_norm": 1.6929750442504883,
+      "learning_rate": 7.942105263157895e-06,
+      "loss": 21.0193,
+      "step": 185
+    },
+    {
+      "epoch": 0.02100389588391395,
+      "grad_norm": 1.6867022514343262,
+      "learning_rate": 7.412631578947368e-06,
+      "loss": 20.7455,
+      "step": 186
+    },
+    {
+      "epoch": 0.021116820055332844,
+      "grad_norm": 1.6948421001434326,
+      "learning_rate": 6.883157894736843e-06,
+      "loss": 21.1823,
+      "step": 187
+    },
+    {
+      "epoch": 0.021229744226751737,
+      "grad_norm": 1.7096174955368042,
+      "learning_rate": 6.3536842105263166e-06,
+      "loss": 21.1038,
+      "step": 188
+    },
+    {
+      "epoch": 0.021342668398170627,
+      "grad_norm": 1.7069429159164429,
+      "learning_rate": 5.8242105263157895e-06,
+      "loss": 20.7988,
+      "step": 189
+    },
+    {
+      "epoch": 0.02145559256958952,
+      "grad_norm": 1.64662766456604,
+      "learning_rate": 5.294736842105263e-06,
+      "loss": 20.9152,
+      "step": 190
+    },
+    {
+      "epoch": 0.021568516741008414,
+      "grad_norm": 1.5974547863006592,
+      "learning_rate": 4.765263157894737e-06,
+      "loss": 20.8237,
+      "step": 191
+    },
+    {
+      "epoch": 0.021681440912427304,
+      "grad_norm": 1.6250189542770386,
+      "learning_rate": 4.235789473684211e-06,
+      "loss": 21.1155,
+      "step": 192
+    },
+    {
+      "epoch": 0.021794365083846197,
+      "grad_norm": 1.6363251209259033,
+      "learning_rate": 3.706315789473684e-06,
+      "loss": 20.8028,
+      "step": 193
+    },
+    {
+      "epoch": 0.02190728925526509,
+      "grad_norm": 1.579261302947998,
+      "learning_rate": 3.1768421052631583e-06,
+      "loss": 20.9767,
+      "step": 194
+    },
+    {
+      "epoch": 0.02202021342668398,
+      "grad_norm": 1.5533766746520996,
+      "learning_rate": 2.6473684210526316e-06,
+      "loss": 20.9212,
+      "step": 195
+    },
+    {
+      "epoch": 0.022133137598102873,
+      "grad_norm": 1.6319046020507812,
+      "learning_rate": 2.1178947368421054e-06,
+      "loss": 20.8396,
+      "step": 196
+    },
+    {
+      "epoch": 0.022246061769521767,
+      "grad_norm": 1.763071060180664,
+      "learning_rate": 1.5884210526315791e-06,
+      "loss": 20.986,
+      "step": 197
+    },
+    {
+      "epoch": 0.02235898594094066,
+      "grad_norm": 1.830514669418335,
+      "learning_rate": 1.0589473684210527e-06,
+      "loss": 20.8883,
+      "step": 198
+    },
+    {
+      "epoch": 0.02247191011235955,
+      "grad_norm": 1.82379150390625,
+      "learning_rate": 5.294736842105263e-07,
+      "loss": 20.9161,
+      "step": 199
+    },
+    {
+      "epoch": 0.022584834283778443,
+      "grad_norm": 2.095973014831543,
+      "learning_rate": 0.0,
+      "loss": 20.7837,
+      "step": 200
+    },
+    {
+      "epoch": 0.022584834283778443,
+      "eval_loss": 10.493440628051758,
+      "eval_runtime": 30.2141,
+      "eval_samples_per_second": 123.419,
+      "eval_steps_per_second": 30.88,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5986779660288.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null