Training in progress, step 173, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +165 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:72b3d916e8a4fdfaab55e20dc1808304928b55b1c36d10b6e9523affd6f38658
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f6cbb051fd52a75595fb64c5fd2e8e7230570e2a697c49ad2b0349925416df3
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d12791b8ea40173f34c615b70dd40b291e24012b785b4ef0b1ee5f091354afd2
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:eee1d680869a2c696abb2860d15082bd64de16eb1029a501e43293dec53ad300
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f96e38a5a8ece995167ed6945365ae20f203f39ea686b6119d239afbe8f5d471
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f76e2539888437e78d98f2899dc74a1a42477b02eeea6802aa68d2c353a7bb46
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5cb01d4f0da8d959e9ca5a4773566e24c639c6b8f18e1285757275e715c7b05f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:13cea8c13b7ff143621afe3809a9208b72f31395569d02b28f3b5848f77c0a7e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.4396902620792389,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 2.608695652173913,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,167 @@
       "eval_samples_per_second": 10.497,
       "eval_steps_per_second": 2.706,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1273,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.1742225999056077e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.4396902620792389,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 3.008695652173913,
   "eval_steps": 50,
+  "global_step": 173,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 10.497,
       "eval_steps_per_second": 2.706,
       "step": 150
+    },
+    {
+      "epoch": 2.626086956521739,
+      "grad_norm": 5.254273414611816,
+      "learning_rate": 4.427853541662091e-06,
+      "loss": 0.425,
+      "step": 151
+    },
+    {
+      "epoch": 2.643478260869565,
+      "grad_norm": 10.881125450134277,
+      "learning_rate": 4.039859192235779e-06,
+      "loss": 0.6446,
+      "step": 152
+    },
+    {
+      "epoch": 2.660869565217391,
+      "grad_norm": 8.573467254638672,
+      "learning_rate": 3.668937158903901e-06,
+      "loss": 0.1585,
+      "step": 153
+    },
+    {
+      "epoch": 2.6782608695652175,
+      "grad_norm": 8.719847679138184,
+      "learning_rate": 3.315225224059809e-06,
+      "loss": 0.3111,
+      "step": 154
+    },
+    {
+      "epoch": 2.6956521739130435,
+      "grad_norm": 4.025722026824951,
+      "learning_rate": 2.9788547772478416e-06,
+      "loss": 0.3628,
+      "step": 155
+    },
+    {
+      "epoch": 2.7130434782608694,
+      "grad_norm": 12.714775085449219,
+      "learning_rate": 2.6599507663574384e-06,
+      "loss": 0.2022,
+      "step": 156
+    },
+    {
+      "epoch": 2.730434782608696,
+      "grad_norm": 17.537216186523438,
+      "learning_rate": 2.3586316512101416e-06,
+      "loss": 0.1643,
+      "step": 157
+    },
+    {
+      "epoch": 2.747826086956522,
+      "grad_norm": 6.424210071563721,
+      "learning_rate": 2.0750093595565733e-06,
+      "loss": 2.195,
+      "step": 158
+    },
+    {
+      "epoch": 2.765217391304348,
+      "grad_norm": 6.1707048416137695,
+      "learning_rate": 1.8091892454998594e-06,
+      "loss": 0.915,
+      "step": 159
+    },
+    {
+      "epoch": 2.782608695652174,
+      "grad_norm": 9.390337944030762,
+      "learning_rate": 1.5612700503608968e-06,
+      "loss": 0.8495,
+      "step": 160
+    },
+    {
+      "epoch": 2.8,
+      "grad_norm": 4.434484958648682,
+      "learning_rate": 1.33134386599994e-06,
+      "loss": 0.3239,
+      "step": 161
+    },
+    {
+      "epoch": 2.8173913043478263,
+      "grad_norm": 6.292799472808838,
+      "learning_rate": 1.1194961006082972e-06,
+      "loss": 0.5492,
+      "step": 162
+    },
+    {
+      "epoch": 2.8347826086956522,
+      "grad_norm": 4.593890190124512,
+      "learning_rate": 9.258054469825972e-07,
+      "loss": 0.3086,
+      "step": 163
+    },
+    {
+      "epoch": 2.8521739130434782,
+      "grad_norm": 5.500854969024658,
+      "learning_rate": 7.503438532937168e-07,
+      "loss": 0.2752,
+      "step": 164
+    },
+    {
+      "epoch": 2.869565217391304,
+      "grad_norm": 9.617355346679688,
+      "learning_rate": 5.931764963608866e-07,
+      "loss": 0.3153,
+      "step": 165
+    },
+    {
+      "epoch": 2.8869565217391306,
+      "grad_norm": 7.330923080444336,
+      "learning_rate": 4.543617574412184e-07,
+      "loss": 0.6061,
+      "step": 166
+    },
+    {
+      "epoch": 2.9043478260869566,
+      "grad_norm": 4.938751220703125,
+      "learning_rate": 3.339512005434309e-07,
+      "loss": 0.2001,
+      "step": 167
+    },
+    {
+      "epoch": 2.9217391304347826,
+      "grad_norm": 9.237695693969727,
+      "learning_rate": 2.319895532739369e-07,
+      "loss": 0.1176,
+      "step": 168
+    },
+    {
+      "epoch": 2.9391304347826086,
+      "grad_norm": 13.457283973693848,
+      "learning_rate": 1.4851469022234e-07,
+      "loss": 0.2774,
+      "step": 169
+    },
+    {
+      "epoch": 2.9565217391304346,
+      "grad_norm": 2.380657911300659,
+      "learning_rate": 8.355761889260461e-08,
+      "loss": 0.042,
+      "step": 170
+    },
+    {
+      "epoch": 2.973913043478261,
+      "grad_norm": 12.363837242126465,
+      "learning_rate": 3.7142468185014104e-08,
+      "loss": 0.1025,
+      "step": 171
+    },
+    {
+      "epoch": 2.991304347826087,
+      "grad_norm": 5.269643783569336,
+      "learning_rate": 9.286479433257e-09,
+      "loss": 1.1058,
+      "step": 172
+    },
+    {
+      "epoch": 3.008695652173913,
+      "grad_norm": 4.399047374725342,
+      "learning_rate": 0.0,
+      "loss": 1.715,
+      "step": 173
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.5064824501174272e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null