Training in progress, step 150, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a6809709cc6e0b32acb814a9eb2ea10485e2e30a7abfc2c9b4b635a04a03b642
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:729425801a16d9eb3cedd8ba8dcad9d1468f3d229ac72825d5f8196e51ceaeed
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0fc0b6e6a51ab504fa5b81fae3661f568391ff25aeb91ed011f92a1a5964026e
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:010af75d831f82c300e7e41ed6398d510b91a5e41614620a6bba0c142959ad62
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:80e2bf3019df065cb5a9f9969cad64bcd1e961e034fab484e16358f1afda784b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:06f6e6d5f4581cfac81193952f9945589db5e0f5ac9c07959c292ad2bd16af59
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ab3d2b0401484126213dd055d9044edf00d7b06db4fe9dbad6027ee8b5d34b1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:123ecf29cfd4fe3b008c987ce1ef9f63c2ad00365e06a3691aa36827aaded381
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.492374897003174,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.07524454477050414,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -731,6 +731,364 @@
       "eval_samples_per_second": 14.864,
       "eval_steps_per_second": 3.716,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -759,7 +1117,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.613269972693811e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.4503767490386963,
+  "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.11286681715575621,
   "eval_steps": 50,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.864,
       "eval_steps_per_second": 3.716,
       "step": 100
+    },
+    {
+      "epoch": 0.07599699021820917,
+      "grad_norm": 4.632258415222168,
+      "learning_rate": 5.3043157894736836e-05,
+      "loss": 4.8204,
+      "step": 101
+    },
+    {
+      "epoch": 0.07674943566591422,
+      "grad_norm": 5.53971004486084,
+      "learning_rate": 5.2507368421052635e-05,
+      "loss": 5.2553,
+      "step": 102
+    },
+    {
+      "epoch": 0.07750188111361926,
+      "grad_norm": 3.8668930530548096,
+      "learning_rate": 5.197157894736842e-05,
+      "loss": 4.3132,
+      "step": 103
+    },
+    {
+      "epoch": 0.0782543265613243,
+      "grad_norm": 5.11984920501709,
+      "learning_rate": 5.143578947368421e-05,
+      "loss": 4.5635,
+      "step": 104
+    },
+    {
+      "epoch": 0.07900677200902935,
+      "grad_norm": 4.6450347900390625,
+      "learning_rate": 5.09e-05,
+      "loss": 3.4141,
+      "step": 105
+    },
+    {
+      "epoch": 0.0797592174567344,
+      "grad_norm": 5.824936389923096,
+      "learning_rate": 5.036421052631578e-05,
+      "loss": 4.7563,
+      "step": 106
+    },
+    {
+      "epoch": 0.08051166290443942,
+      "grad_norm": 4.639711380004883,
+      "learning_rate": 4.982842105263158e-05,
+      "loss": 4.3026,
+      "step": 107
+    },
+    {
+      "epoch": 0.08126410835214447,
+      "grad_norm": 8.702680587768555,
+      "learning_rate": 4.9292631578947366e-05,
+      "loss": 5.7537,
+      "step": 108
+    },
+    {
+      "epoch": 0.08201655379984951,
+      "grad_norm": 5.901224613189697,
+      "learning_rate": 4.875684210526315e-05,
+      "loss": 5.0042,
+      "step": 109
+    },
+    {
+      "epoch": 0.08276899924755456,
+      "grad_norm": 8.818628311157227,
+      "learning_rate": 4.822105263157894e-05,
+      "loss": 5.3414,
+      "step": 110
+    },
+    {
+      "epoch": 0.0835214446952596,
+      "grad_norm": 7.103747367858887,
+      "learning_rate": 4.7685263157894735e-05,
+      "loss": 5.2836,
+      "step": 111
+    },
+    {
+      "epoch": 0.08427389014296463,
+      "grad_norm": 6.621494770050049,
+      "learning_rate": 4.714947368421052e-05,
+      "loss": 4.9538,
+      "step": 112
+    },
+    {
+      "epoch": 0.08502633559066967,
+      "grad_norm": 8.953717231750488,
+      "learning_rate": 4.661368421052631e-05,
+      "loss": 5.1009,
+      "step": 113
+    },
+    {
+      "epoch": 0.08577878103837472,
+      "grad_norm": 8.522113800048828,
+      "learning_rate": 4.6077894736842104e-05,
+      "loss": 5.0785,
+      "step": 114
+    },
+    {
+      "epoch": 0.08653122648607976,
+      "grad_norm": 6.302427291870117,
+      "learning_rate": 4.554210526315789e-05,
+      "loss": 4.6044,
+      "step": 115
+    },
+    {
+      "epoch": 0.0872836719337848,
+      "grad_norm": 8.132070541381836,
+      "learning_rate": 4.500631578947368e-05,
+      "loss": 4.9639,
+      "step": 116
+    },
+    {
+      "epoch": 0.08803611738148984,
+      "grad_norm": 7.749171733856201,
+      "learning_rate": 4.447052631578947e-05,
+      "loss": 5.2865,
+      "step": 117
+    },
+    {
+      "epoch": 0.08878856282919488,
+      "grad_norm": 5.969038963317871,
+      "learning_rate": 4.393473684210526e-05,
+      "loss": 4.7642,
+      "step": 118
+    },
+    {
+      "epoch": 0.08954100827689992,
+      "grad_norm": 6.512506484985352,
+      "learning_rate": 4.339894736842105e-05,
+      "loss": 2.9121,
+      "step": 119
+    },
+    {
+      "epoch": 0.09029345372460497,
+      "grad_norm": 6.616455554962158,
+      "learning_rate": 4.2863157894736835e-05,
+      "loss": 5.1259,
+      "step": 120
+    },
+    {
+      "epoch": 0.09104589917231001,
+      "grad_norm": 8.678909301757812,
+      "learning_rate": 4.2327368421052634e-05,
+      "loss": 5.4049,
+      "step": 121
+    },
+    {
+      "epoch": 0.09179834462001504,
+      "grad_norm": 7.853146553039551,
+      "learning_rate": 4.179157894736842e-05,
+      "loss": 5.1167,
+      "step": 122
+    },
+    {
+      "epoch": 0.09255079006772009,
+      "grad_norm": 6.326202392578125,
+      "learning_rate": 4.1255789473684204e-05,
+      "loss": 5.1124,
+      "step": 123
+    },
+    {
+      "epoch": 0.09330323551542513,
+      "grad_norm": 6.513983726501465,
+      "learning_rate": 4.072e-05,
+      "loss": 4.8156,
+      "step": 124
+    },
+    {
+      "epoch": 0.09405568096313018,
+      "grad_norm": 7.685911178588867,
+      "learning_rate": 4.018421052631579e-05,
+      "loss": 4.3464,
+      "step": 125
+    },
+    {
+      "epoch": 0.09480812641083522,
+      "grad_norm": 8.669236183166504,
+      "learning_rate": 3.9648421052631573e-05,
+      "loss": 4.7814,
+      "step": 126
+    },
+    {
+      "epoch": 0.09556057185854025,
+      "grad_norm": 7.881282806396484,
+      "learning_rate": 3.9112631578947365e-05,
+      "loss": 5.0522,
+      "step": 127
+    },
+    {
+      "epoch": 0.0963130173062453,
+      "grad_norm": 6.99576473236084,
+      "learning_rate": 3.857684210526316e-05,
+      "loss": 4.4357,
+      "step": 128
+    },
+    {
+      "epoch": 0.09706546275395034,
+      "grad_norm": 8.76285171508789,
+      "learning_rate": 3.804105263157894e-05,
+      "loss": 5.5137,
+      "step": 129
+    },
+    {
+      "epoch": 0.09781790820165538,
+      "grad_norm": 7.629359245300293,
+      "learning_rate": 3.7505263157894734e-05,
+      "loss": 4.3788,
+      "step": 130
+    },
+    {
+      "epoch": 0.09857035364936043,
+      "grad_norm": 9.962780952453613,
+      "learning_rate": 3.6969473684210526e-05,
+      "loss": 4.5262,
+      "step": 131
+    },
+    {
+      "epoch": 0.09932279909706546,
+      "grad_norm": 7.68848180770874,
+      "learning_rate": 3.643368421052631e-05,
+      "loss": 4.803,
+      "step": 132
+    },
+    {
+      "epoch": 0.1000752445447705,
+      "grad_norm": 12.457582473754883,
+      "learning_rate": 3.5897894736842103e-05,
+      "loss": 5.286,
+      "step": 133
+    },
+    {
+      "epoch": 0.10082768999247554,
+      "grad_norm": 10.64576244354248,
+      "learning_rate": 3.5362105263157895e-05,
+      "loss": 5.5489,
+      "step": 134
+    },
+    {
+      "epoch": 0.10158013544018059,
+      "grad_norm": 8.363515853881836,
+      "learning_rate": 3.482631578947368e-05,
+      "loss": 4.724,
+      "step": 135
+    },
+    {
+      "epoch": 0.10233258088788563,
+      "grad_norm": 9.08327579498291,
+      "learning_rate": 3.429052631578947e-05,
+      "loss": 4.5964,
+      "step": 136
+    },
+    {
+      "epoch": 0.10308502633559068,
+      "grad_norm": 10.975964546203613,
+      "learning_rate": 3.375473684210526e-05,
+      "loss": 4.7091,
+      "step": 137
+    },
+    {
+      "epoch": 0.1038374717832957,
+      "grad_norm": 8.273202896118164,
+      "learning_rate": 3.321894736842105e-05,
+      "loss": 3.8967,
+      "step": 138
+    },
+    {
+      "epoch": 0.10458991723100075,
+      "grad_norm": 10.234407424926758,
+      "learning_rate": 3.268315789473684e-05,
+      "loss": 5.5976,
+      "step": 139
+    },
+    {
+      "epoch": 0.1053423626787058,
+      "grad_norm": 8.687202453613281,
+      "learning_rate": 3.2147368421052627e-05,
+      "loss": 4.7047,
+      "step": 140
+    },
+    {
+      "epoch": 0.10609480812641084,
+      "grad_norm": 9.25235652923584,
+      "learning_rate": 3.161157894736842e-05,
+      "loss": 4.7048,
+      "step": 141
+    },
+    {
+      "epoch": 0.10684725357411588,
+      "grad_norm": 10.904390335083008,
+      "learning_rate": 3.107578947368421e-05,
+      "loss": 5.0487,
+      "step": 142
+    },
+    {
+      "epoch": 0.10759969902182091,
+      "grad_norm": 12.776407241821289,
+      "learning_rate": 3.0539999999999996e-05,
+      "loss": 5.6051,
+      "step": 143
+    },
+    {
+      "epoch": 0.10835214446952596,
+      "grad_norm": 10.124897003173828,
+      "learning_rate": 3.0004210526315784e-05,
+      "loss": 5.6051,
+      "step": 144
+    },
+    {
+      "epoch": 0.109104589917231,
+      "grad_norm": 10.322992324829102,
+      "learning_rate": 2.946842105263158e-05,
+      "loss": 4.5447,
+      "step": 145
+    },
+    {
+      "epoch": 0.10985703536493605,
+      "grad_norm": 17.68702507019043,
+      "learning_rate": 2.8932631578947368e-05,
+      "loss": 5.1524,
+      "step": 146
+    },
+    {
+      "epoch": 0.11060948081264109,
+      "grad_norm": 14.997350692749023,
+      "learning_rate": 2.8396842105263153e-05,
+      "loss": 5.7192,
+      "step": 147
+    },
+    {
+      "epoch": 0.11136192626034612,
+      "grad_norm": 12.031723022460938,
+      "learning_rate": 2.786105263157895e-05,
+      "loss": 5.2456,
+      "step": 148
+    },
+    {
+      "epoch": 0.11211437170805116,
+      "grad_norm": 17.6466007232666,
+      "learning_rate": 2.7325263157894737e-05,
+      "loss": 6.2655,
+      "step": 149
+    },
+    {
+      "epoch": 0.11286681715575621,
+      "grad_norm": 18.988000869750977,
+      "learning_rate": 2.6789473684210522e-05,
+      "loss": 7.0167,
+      "step": 150
+    },
+    {
+      "epoch": 0.11286681715575621,
+      "eval_loss": 2.4503767490386963,
+      "eval_runtime": 37.6662,
+      "eval_samples_per_second": 14.867,
+      "eval_steps_per_second": 3.717,
+      "step": 150
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 5.437792434153062e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null