Training in progress, step 177, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +193 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1358177e76c4b440f4b2a850ea72d85bc15dca5128a0f7ab0dd5eed189b75b16
 size 332316480

 version https://git-lfs.github.com/spec/v1
+oid sha256:4538c89ae7b384988a55ac4d2770e33ce4883b789360d12ea594a18a4384c487
 size 332316480

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4dd856a296a549422dedf8c90adebef2cd0901c74ed38e5e6976b73937ee3dc6
 size 169157892

 version https://git-lfs.github.com/spec/v1
+oid sha256:cdc528fc87ac0fb4c09aefc75d9d80be0bbe965638af5e0ecc61e5bbe68c5ee4
 size 169157892

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9918b7bcbbe1cf42dfa31a9c14f98aeb098d32eb3dd21aa5cc697d84c9b7d1ba
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ce61678def645785b7a3771d660da38b3c29ae16edd926b09a7bd3dcddab8ce0
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae143a1b3a6f3911d7de6f885a33334066ae6c29ef03002bdce21e41331f97e8
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3920ee4a294e40209fd09294d2bc1408892fffa1eb0ce50b11d8a2f2e706c9ae
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.9860224723815918,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 2.5531914893617023,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,195 @@
       "eval_samples_per_second": 22.175,
       "eval_steps_per_second": 5.6,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1301,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 6.21552039100416e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.9860224723815918,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 3.0127659574468084,
   "eval_steps": 50,
+  "global_step": 177,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 22.175,
       "eval_steps_per_second": 5.6,
       "step": 150
+    },
+    {
+      "epoch": 2.570212765957447,
+      "grad_norm": 1.4013265371322632,
+      "learning_rate": 5.86243717785463e-06,
+      "loss": 0.3459,
+      "step": 151
+    },
+    {
+      "epoch": 2.5872340425531917,
+      "grad_norm": 1.444057822227478,
+      "learning_rate": 5.428342404965076e-06,
+      "loss": 0.277,
+      "step": 152
+    },
+    {
+      "epoch": 2.604255319148936,
+      "grad_norm": 1.328897476196289,
+      "learning_rate": 5.010020576785174e-06,
+      "loss": 0.2324,
+      "step": 153
+    },
+    {
+      "epoch": 2.621276595744681,
+      "grad_norm": 1.4151270389556885,
+      "learning_rate": 4.607619728353818e-06,
+      "loss": 0.2435,
+      "step": 154
+    },
+    {
+      "epoch": 2.6382978723404253,
+      "grad_norm": 1.3859100341796875,
+      "learning_rate": 4.221282260619891e-06,
+      "loss": 0.2068,
+      "step": 155
+    },
+    {
+      "epoch": 2.65531914893617,
+      "grad_norm": 1.1932073831558228,
+      "learning_rate": 3.851144890049535e-06,
+      "loss": 0.156,
+      "step": 156
+    },
+    {
+      "epoch": 2.6723404255319148,
+      "grad_norm": 1.4522919654846191,
+      "learning_rate": 3.4973386002452535e-06,
+      "loss": 0.2544,
+      "step": 157
+    },
+    {
+      "epoch": 2.6893617021276595,
+      "grad_norm": 1.2598811388015747,
+      "learning_rate": 3.159988595593616e-06,
+      "loss": 0.1501,
+      "step": 158
+    },
+    {
+      "epoch": 2.706382978723404,
+      "grad_norm": 1.3632055521011353,
+      "learning_rate": 2.839214256958106e-06,
+      "loss": 0.1397,
+      "step": 159
+    },
+    {
+      "epoch": 2.723404255319149,
+      "grad_norm": 1.353302240371704,
+      "learning_rate": 2.53512909943287e-06,
+      "loss": 0.305,
+      "step": 160
+    },
+    {
+      "epoch": 2.7404255319148936,
+      "grad_norm": 1.339839220046997,
+      "learning_rate": 2.2478407321721296e-06,
+      "loss": 0.366,
+      "step": 161
+    },
+    {
+      "epoch": 2.7574468085106383,
+      "grad_norm": 1.3590214252471924,
+      "learning_rate": 1.977450820309684e-06,
+      "loss": 0.3311,
+      "step": 162
+    },
+    {
+      "epoch": 2.774468085106383,
+      "grad_norm": 1.4828448295593262,
+      "learning_rate": 1.7240550489817653e-06,
+      "loss": 0.3547,
+      "step": 163
+    },
+    {
+      "epoch": 2.7914893617021277,
+      "grad_norm": 1.3542424440383911,
+      "learning_rate": 1.4877430894662036e-06,
+      "loss": 0.2935,
+      "step": 164
+    },
+    {
+      "epoch": 2.8085106382978724,
+      "grad_norm": 1.304952621459961,
+      "learning_rate": 1.268598567449647e-06,
+      "loss": 0.2388,
+      "step": 165
+    },
+    {
+      "epoch": 2.825531914893617,
+      "grad_norm": 1.453139066696167,
+      "learning_rate": 1.0666990334342707e-06,
+      "loss": 0.2971,
+      "step": 166
+    },
+    {
+      "epoch": 2.842553191489362,
+      "grad_norm": 1.335195541381836,
+      "learning_rate": 8.821159352943143e-07,
+      "loss": 0.2004,
+      "step": 167
+    },
+    {
+      "epoch": 2.8595744680851065,
+      "grad_norm": 1.482730746269226,
+      "learning_rate": 7.149145929922607e-07,
+      "loss": 0.2955,
+      "step": 168
+    },
+    {
+      "epoch": 2.876595744680851,
+      "grad_norm": 1.3346716165542603,
+      "learning_rate": 5.651541754634726e-07,
+      "loss": 0.2225,
+      "step": 169
+    },
+    {
+      "epoch": 2.8936170212765955,
+      "grad_norm": 1.362627625465393,
+      "learning_rate": 4.3288767967760715e-07,
+      "loss": 0.1993,
+      "step": 170
+    },
+    {
+      "epoch": 2.9106382978723406,
+      "grad_norm": 1.4417704343795776,
+      "learning_rate": 3.1816191188415166e-07,
+      "loss": 0.2222,
+      "step": 171
+    },
+    {
+      "epoch": 2.927659574468085,
+      "grad_norm": 1.2695422172546387,
+      "learning_rate": 2.2101747104866788e-07,
+      "loss": 0.1648,
+      "step": 172
+    },
+    {
+      "epoch": 2.94468085106383,
+      "grad_norm": 1.325673222541809,
+      "learning_rate": 1.4148873448573408e-07,
+      "loss": 0.1262,
+      "step": 173
+    },
+    {
+      "epoch": 2.9617021276595743,
+      "grad_norm": 1.330980896949768,
+      "learning_rate": 7.960384569353219e-08,
+      "loss": 0.2392,
+      "step": 174
+    },
+    {
+      "epoch": 2.978723404255319,
+      "grad_norm": 1.4023689031600952,
+      "learning_rate": 3.538470439448105e-08,
+      "loss": 0.2823,
+      "step": 175
+    },
+    {
+      "epoch": 2.9957446808510637,
+      "grad_norm": 1.465209722518921,
+      "learning_rate": 8.846958785418968e-09,
+      "loss": 0.192,
+      "step": 176
+    },
+    {
+      "epoch": 3.0127659574468084,
+      "grad_norm": 1.1684350967407227,
+      "learning_rate": 0.0,
+      "loss": 0.345,
+      "step": 177
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.334314061384909e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null