Training in progress, step 81, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:993aff0d616f08ed583ec7c732efc989a542cf8cebf9d98347703d267646f68b
 size 161533192

 version https://git-lfs.github.com/spec/v1
+oid sha256:cfd5afce7348151819c0fd9dc2b22dc5f859dd6fcac0662afa81383ff8c4a698
 size 161533192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57b285bf5ad6cf6b9cea547ac2fd5562d6d03592e0a4c91fadc5c1eefd109e36
 size 82460660

 version https://git-lfs.github.com/spec/v1
+oid sha256:43f4fad86dc10a669aa6ac202151c9d47ea9911b8025eefd006f339499dcb91e
 size 82460660

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:66a117ea053aab6fa5cc98c0bc1598d57e433e7e150af316d9ef492cf6161b89
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:809515f485ff8339ea759cadf35a9cf928f6fc34958e3f872da46cb4afcba971
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d899e046e5eb8fee9b81979db8db7b88d6dc92f30a13d049b906f3a3be1dfc0f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:bec84b33191b76a1f66109f05ca2863b65ba9c0ae66be6a4921ad95ad9f1a38e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8181818181818182,
   "eval_steps": 9,
-  "global_step": 72,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -247,6 +247,35 @@
       "eval_samples_per_second": 8.683,
       "eval_steps_per_second": 1.115,
       "step": 72
     }
   ],
   "logging_steps": 3,
@@ -266,7 +295,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.705654411717509e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9204545454545454,
   "eval_steps": 9,
+  "global_step": 81,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 8.683,
       "eval_steps_per_second": 1.115,
       "step": 72
+    },
+    {
+      "epoch": 0.8522727272727273,
+      "grad_norm": 1.535409688949585,
+      "learning_rate": 8.930309757836517e-06,
+      "loss": 2.0856,
+      "step": 75
+    },
+    {
+      "epoch": 0.8863636363636364,
+      "grad_norm": 1.479998230934143,
+      "learning_rate": 7.016504991533726e-06,
+      "loss": 1.9215,
+      "step": 78
+    },
+    {
+      "epoch": 0.9204545454545454,
+      "grad_norm": 1.4978396892547607,
+      "learning_rate": 5.299731159831953e-06,
+      "loss": 2.0011,
+      "step": 81
+    },
+    {
+      "epoch": 0.9204545454545454,
+      "eval_loss": 1.9259672164916992,
+      "eval_runtime": 17.0378,
+      "eval_samples_per_second": 8.687,
+      "eval_steps_per_second": 1.115,
+      "step": 81
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 1.9258516001154662e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null