Training in progress, step 450, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +48 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5d24b0397b84515fd5c8658413acbf5679ee218e370af53374322a5e02dad939
 size 35237104

 version https://git-lfs.github.com/spec/v1
+oid sha256:f23d0780064bbab1ccdb905633151564803f8906ba910ccef298b9cef97f2812
 size 35237104

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ccaa10718baaf5bd64601309ed15285ab5d78611ccf0666d7ebf10ed6f211462
 size 18810356

 version https://git-lfs.github.com/spec/v1
+oid sha256:c07bbcc8c827083fddbb7316a1c3dfff70d03ebe42ece58f6f5091d6a660ef41
 size 18810356

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ef41991dbb561d3a2f5aceab35c3651f4204f14eaf4c97c7bbc11367b613fe59
 size 14308

 version https://git-lfs.github.com/spec/v1
+oid sha256:740b4d5b5706c9b622ccec788006a80e3bb923be218c04c7170c76a352dd6003
 size 14308

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f5f1cac8669e836e149701a48af17007b43f815a942690ea0b80d521bdf222a6
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b638125345eee66261d2423c36c01b02453186ef536e9a5b8085fc8faa3a2a46
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.735464572906494,
-  "best_model_checkpoint": "miner_id_24/checkpoint-400",
-  "epoch": 0.018060729201941527,
   "eval_steps": 50,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -359,6 +359,49 @@
       "eval_samples_per_second": 50.391,
       "eval_steps_per_second": 12.6,
       "step": 400
     }
   ],
   "logging_steps": 10,
@@ -387,7 +430,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3609277468508160.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.7266805171966553,
+  "best_model_checkpoint": "miner_id_24/checkpoint-450",
+  "epoch": 0.02031832035218422,
   "eval_steps": 50,
+  "global_step": 450,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 50.391,
       "eval_steps_per_second": 12.6,
       "step": 400
+    },
+    {
+      "epoch": 0.018512247431990068,
+      "grad_norm": 2.952505588531494,
+      "learning_rate": 1.9575758076567897e-05,
+      "loss": 2.5131,
+      "step": 410
+    },
+    {
+      "epoch": 0.018963765662038605,
+      "grad_norm": 3.583050012588501,
+      "learning_rate": 1.557507014396634e-05,
+      "loss": 2.4948,
+      "step": 420
+    },
+    {
+      "epoch": 0.019415283892087142,
+      "grad_norm": 3.9211292266845703,
+      "learning_rate": 1.1997871731959984e-05,
+      "loss": 2.6896,
+      "step": 430
+    },
+    {
+      "epoch": 0.019866802122135683,
+      "grad_norm": 3.595341444015503,
+      "learning_rate": 8.861590591633399e-06,
+      "loss": 2.6609,
+      "step": 440
+    },
+    {
+      "epoch": 0.02031832035218422,
+      "grad_norm": 6.866179466247559,
+      "learning_rate": 6.181506369444397e-06,
+      "loss": 3.1241,
+      "step": 450
+    },
+    {
+      "epoch": 0.02031832035218422,
+      "eval_loss": 2.7266805171966553,
+      "eval_runtime": 186.0159,
+      "eval_samples_per_second": 50.135,
+      "eval_steps_per_second": 12.537,
+      "step": 450
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 4059873906524160.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null