Training in progress, step 28, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:978e4dd255178784ce2dcd3a2c3fd79d986c756ff0205379f135c8f8e9f382d2
 size 682685984

 version https://git-lfs.github.com/spec/v1
+oid sha256:09cd71d593251741cccc7701ec9e223e84e8d9dcd240c7777b7e3461ed2d2524
 size 682685984

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0284e8eb63ec1ec0cef3004b8423583300e44072c72a15a5cf081b28a6b201c6
 size 85498196

 version https://git-lfs.github.com/spec/v1
+oid sha256:b4f86960f918313f8b9dfbbf8ef758f4588efe47fb179e4fe2de276428ee074f
 size 85498196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:60cc77b0b0bf090ec2a03f2d6f01581488c32af3545b07c31844fee6e2c39c13
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1de830288606b38e6cfca5b625f3b4e9a64b2aee6f091f612490c0af98ad6b5b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a4c7e21bc965f34fd3d72d9cb2881ca0e9f4bcb64151c1f8562417f8d5125510
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7ed58e5b5053e09a44cd1fe67ab0f07cc790b003abec6db1f957ea4f926e4423
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.02456140350877193,
   "eval_steps": 14,
-  "global_step": 14,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -30,6 +30,21 @@
       "eval_samples_per_second": 72.293,
       "eval_steps_per_second": 18.073,
       "step": 14
     }
   ],
   "logging_steps": 10,
@@ -49,7 +64,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 924952370872320.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.04912280701754386,
   "eval_steps": 14,
+  "global_step": 28,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 72.293,
       "eval_steps_per_second": 18.073,
       "step": 14
+    },
+    {
+      "epoch": 0.03508771929824561,
+      "grad_norm": 3.184525489807129,
+      "learning_rate": 0.0002,
+      "loss": 3.373,
+      "step": 20
+    },
+    {
+      "epoch": 0.04912280701754386,
+      "eval_loss": 1.5565054416656494,
+      "eval_runtime": 3.2952,
+      "eval_samples_per_second": 72.834,
+      "eval_steps_per_second": 18.209,
+      "step": 28
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1849904741744640.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null