Training in progress, step 178, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +200 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:45a76d21e8240cc769158382aec9f6df89c6a5acb870553cf60a8d5a4eea15f6
 size 34456

 version https://git-lfs.github.com/spec/v1
+oid sha256:49e1675f058496425cb360c348fc6124c09b2ac8ec41d8c7feba18ed0c46345a
 size 34456

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:147f317114b26a2cde3186bda7be74f8da5b606d375cb421aac73099820ba740
 size 73222

 version https://git-lfs.github.com/spec/v1
+oid sha256:13e4d45ca32c88e79e75ee8e31726a4ad7ce46cfc98e1b5c00c438f04eaefe40
 size 73222

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3540ef75a83e32e2a105c3eb2c2e4e2abded82469ef011ec799eac944acee9d3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b22af1a69aed4cc95de7744115b1d53ef93fdd980454932bba3d5b2f4fe9efe
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:400e85cd31bb8cbbe64211436e490aa3113b033965821187553a5ca8b55d7f62
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:081d1f5acdcf54d3de0b373d287b553d0238f70fe938b04f3f8bf6870fa7d1d6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 11.5,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 2.5316455696202533,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,202 @@
       "eval_samples_per_second": 215.85,
       "eval_steps_per_second": 53.963,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1308,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 24118847078400.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 11.5,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 3.0042194092827006,
   "eval_steps": 50,
+  "global_step": 178,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 215.85,
       "eval_steps_per_second": 53.963,
       "step": 150
+    },
+    {
+      "epoch": 2.548523206751055,
+      "grad_norm": 0.00035565474536269903,
+      "learning_rate": 6.238828904562316e-06,
+      "loss": 46.0,
+      "step": 151
+    },
+    {
+      "epoch": 2.5654008438818563,
+      "grad_norm": 0.0004177090304438025,
+      "learning_rate": 5.794230324769517e-06,
+      "loss": 46.0,
+      "step": 152
+    },
+    {
+      "epoch": 2.5822784810126582,
+      "grad_norm": 0.00039405698771588504,
+      "learning_rate": 5.365089537819434e-06,
+      "loss": 46.0,
+      "step": 153
+    },
+    {
+      "epoch": 2.59915611814346,
+      "grad_norm": 0.0003038046706933528,
+      "learning_rate": 4.951556604879048e-06,
+      "loss": 46.0,
+      "step": 154
+    },
+    {
+      "epoch": 2.6160337552742616,
+      "grad_norm": 0.00027907994808629155,
+      "learning_rate": 4.5537761293894535e-06,
+      "loss": 46.0,
+      "step": 155
+    },
+    {
+      "epoch": 2.632911392405063,
+      "grad_norm": 0.0007548971334472299,
+      "learning_rate": 4.1718872065011904e-06,
+      "loss": 46.0,
+      "step": 156
+    },
+    {
+      "epoch": 2.649789029535865,
+      "grad_norm": 0.00024210047558881342,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 46.0,
+      "step": 157
+    },
+    {
+      "epoch": 2.6666666666666665,
+      "grad_norm": 0.00046050691162236035,
+      "learning_rate": 3.4563125677897932e-06,
+      "loss": 46.0,
+      "step": 158
+    },
+    {
+      "epoch": 2.6835443037974684,
+      "grad_norm": 0.0004280286666471511,
+      "learning_rate": 3.1228770728000455e-06,
+      "loss": 46.0,
+      "step": 159
+    },
+    {
+      "epoch": 2.70042194092827,
+      "grad_norm": 0.000559692329261452,
+      "learning_rate": 2.8058334845816213e-06,
+      "loss": 46.0,
+      "step": 160
+    },
+    {
+      "epoch": 2.717299578059072,
+      "grad_norm": 0.0003143562644254416,
+      "learning_rate": 2.5052926663577e-06,
+      "loss": 46.0,
+      "step": 161
+    },
+    {
+      "epoch": 2.7341772151898733,
+      "grad_norm": 0.0003422136069275439,
+      "learning_rate": 2.221359710692961e-06,
+      "loss": 46.0,
+      "step": 162
+    },
+    {
+      "epoch": 2.7510548523206753,
+      "grad_norm": 0.00047853466821834445,
+      "learning_rate": 1.9541339027450256e-06,
+      "loss": 46.0,
+      "step": 163
+    },
+    {
+      "epoch": 2.7679324894514767,
+      "grad_norm": 0.00032720083254389465,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 46.0,
+      "step": 164
+    },
+    {
+      "epoch": 2.7848101265822782,
+      "grad_norm": 0.0003442906599957496,
+      "learning_rate": 1.4701716273304521e-06,
+      "loss": 46.0,
+      "step": 165
+    },
+    {
+      "epoch": 2.80168776371308,
+      "grad_norm": 0.000351409544236958,
+      "learning_rate": 1.2536043909088191e-06,
+      "loss": 46.0,
+      "step": 166
+    },
+    {
+      "epoch": 2.818565400843882,
+      "grad_norm": 0.0003428487398196012,
+      "learning_rate": 1.0540827051175818e-06,
+      "loss": 46.0,
+      "step": 167
+    },
+    {
+      "epoch": 2.8354430379746836,
+      "grad_norm": 0.00041585814324207604,
+      "learning_rate": 8.716763383355864e-07,
+      "loss": 46.0,
+      "step": 168
+    },
+    {
+      "epoch": 2.852320675105485,
+      "grad_norm": 0.0002979324199259281,
+      "learning_rate": 7.064490740882057e-07,
+      "loss": 46.0,
+      "step": 169
+    },
+    {
+      "epoch": 2.869198312236287,
+      "grad_norm": 0.000414243753766641,
+      "learning_rate": 5.584586887435739e-07,
+      "loss": 46.0,
+      "step": 170
+    },
+    {
+      "epoch": 2.8860759493670884,
+      "grad_norm": 0.000617706507910043,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 46.0,
+      "step": 171
+    },
+    {
+      "epoch": 2.9029535864978904,
+      "grad_norm": 0.0005887034349143505,
+      "learning_rate": 3.143895053378698e-07,
+      "loss": 46.0,
+      "step": 172
+    },
+    {
+      "epoch": 2.919831223628692,
+      "grad_norm": 0.00023495641653425992,
+      "learning_rate": 2.1839605294330933e-07,
+      "loss": 46.0,
+      "step": 173
+    },
+    {
+      "epoch": 2.9367088607594938,
+      "grad_norm": 0.00034931753179989755,
+      "learning_rate": 1.3981014094099353e-07,
+      "loss": 46.0,
+      "step": 174
+    },
+    {
+      "epoch": 2.9535864978902953,
+      "grad_norm": 0.0007667237659916282,
+      "learning_rate": 7.865924910916977e-08,
+      "loss": 46.0,
+      "step": 175
+    },
+    {
+      "epoch": 2.970464135021097,
+      "grad_norm": 0.0006193204899318516,
+      "learning_rate": 3.496476058006959e-08,
+      "loss": 46.0,
+      "step": 176
+    },
+    {
+      "epoch": 2.9873417721518987,
+      "grad_norm": 0.0005399351357482374,
+      "learning_rate": 8.741954362678772e-09,
+      "loss": 46.0,
+      "step": 177
+    },
+    {
+      "epoch": 3.0042194092827006,
+      "grad_norm": 0.0003315158828627318,
+      "learning_rate": 0.0,
+      "loss": 46.0,
+      "step": 178
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 28621031866368.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null