Training in progress, step 250, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +48 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:892c6e2e6435c73463c3134fd84a37b5f246ee300223faba464a5be9a30532b4
 size 645975704

 version https://git-lfs.github.com/spec/v1
+oid sha256:a2245154e86cfc0620e9548fd4159206aae2de3b21ba48f94df102e9dac72a4f
 size 645975704

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c7fb74db006dd44b6e502d7da881da69cec15dd4690ca116f9dd1533ecbde16a
 size 328468404

 version https://git-lfs.github.com/spec/v1
+oid sha256:82c34aa219f3b252bfb1eea4d56a9762eb25fa457aefbe1745f978607f31db0c
 size 328468404

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7faf92e9d1cf9db27d7d53e0f09be3f79b4c8736a000208b6975d432c5fe0a63
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:466f77911b2363e9039eb587c905ca68553d2acfdd4167c910996a379579799a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c9abf3b00130ebbc35657fb7788755e92886d9a53e2a5677dea2a31074b05473
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c025e03e0d8ded968e4f23b186aaafce8f23c2a50b134c4f77cea95627049ac8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.3247433006763458,
-  "best_model_checkpoint": "miner_id_24/checkpoint-200",
-  "epoch": 0.21750951604132682,
   "eval_steps": 50,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -187,6 +187,49 @@
       "eval_samples_per_second": 17.029,
       "eval_steps_per_second": 4.257,
       "step": 200
     }
   ],
   "logging_steps": 10,
@@ -215,7 +258,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.59915573477376e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.259118914604187,
+  "best_model_checkpoint": "miner_id_24/checkpoint-250",
+  "epoch": 0.27188689505165853,
   "eval_steps": 50,
+  "global_step": 250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 17.029,
       "eval_steps_per_second": 4.257,
       "step": 200
+    },
+    {
+      "epoch": 0.22838499184339314,
+      "grad_norm": 0.5655252933502197,
+      "learning_rate": 0.00015102897041285315,
+      "loss": 0.2144,
+      "step": 210
+    },
+    {
+      "epoch": 0.2392604676454595,
+      "grad_norm": 0.7886796593666077,
+      "learning_rate": 0.00014433369230867077,
+      "loss": 0.1604,
+      "step": 220
+    },
+    {
+      "epoch": 0.2501359434475258,
+      "grad_norm": 1.0239461660385132,
+      "learning_rate": 0.0001374467844093695,
+      "loss": 0.1419,
+      "step": 230
+    },
+    {
+      "epoch": 0.26101141924959215,
+      "grad_norm": 4.1843976974487305,
+      "learning_rate": 0.0001304017990379651,
+      "loss": 0.2756,
+      "step": 240
+    },
+    {
+      "epoch": 0.27188689505165853,
+      "grad_norm": 9.119248390197754,
+      "learning_rate": 0.0001232330586550277,
+      "loss": 0.2422,
+      "step": 250
+    },
+    {
+      "epoch": 0.27188689505165853,
+      "eval_loss": 0.259118914604187,
+      "eval_runtime": 22.8194,
+      "eval_samples_per_second": 17.003,
+      "eval_steps_per_second": 4.251,
+      "step": 250
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 4.505609771679744e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null