Training in progress, step 63, checkpoint

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ba64bad8552e0c9fa8e5c0cba4dfd84da312a799b94eaa654a185791a7c2c91
 size 83115256

 version https://git-lfs.github.com/spec/v1
+oid sha256:72c4365c90e4673e37a4a2c158f4b3127797f0351d6eea9b8490f9b553c5b266
 size 83115256

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48dac4f7c5175a62c2d1a3914987c0bea4dd1bd3b234a472f2e9b22aa244d978
 size 42608388

 version https://git-lfs.github.com/spec/v1
+oid sha256:290fb60611823f559a1778dd2e7fd99444caee266092dc9891526caca2f08df6
 size 42608388

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d06609511f040c8c42a7e00cea1ec053a60567af34ba4410b1ad054b2eb6247
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:42dd685047e6d843f81d8c7a97b9fe8e3a8a836d0baa0ca1458618da98aab18c
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c980deaa36d5045c0af89e3e197d07224f4a228b702151413212eb3c9e980524
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:7c303d82f711309bdf0967f8f4652d2e044e68225e894969369a6e258f673387
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4cb270cabc01be9731b23f750c54fb072017bd017706d55fcca97da50f8b0ff8
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb886efaa016e515326abe05c4e04003a68e6a0c5f0758d9cac3926ffc093da1
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a686836c4cfc5a5fdcd6156f1963e12b5f5985ce7c1ab1c8500c5b965b0e85c0
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:89bbd9e9f39698108fc006d2cafcf2df8819db67c29ee6862b47f75b545174af
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2011b1e019073e4bafc29de9703ff0a6e7c1252c3a53d804807bd1c99d390d1c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e53b95eff99f2334cbf7ed4c962db83cb42e931305982518e928382563b5670d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.3456,
   "eval_steps": 9,
-  "global_step": 54,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -189,6 +189,35 @@
       "eval_samples_per_second": 228.948,
       "eval_steps_per_second": 7.182,
       "step": 54
     }
   ],
   "logging_steps": 3,
@@ -208,7 +237,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8.70588303236137e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.4032,
   "eval_steps": 9,
+  "global_step": 63,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 228.948,
       "eval_steps_per_second": 7.182,
       "step": 54
+    },
+    {
+      "epoch": 0.3648,
+      "grad_norm": 0.898175060749054,
+      "learning_rate": 4.6512176312793736e-05,
+      "loss": 2.0437,
+      "step": 57
+    },
+    {
+      "epoch": 0.384,
+      "grad_norm": 0.8860452175140381,
+      "learning_rate": 4.131759111665349e-05,
+      "loss": 2.0314,
+      "step": 60
+    },
+    {
+      "epoch": 0.4032,
+      "grad_norm": 0.7955260872840881,
+      "learning_rate": 3.6218132209150045e-05,
+      "loss": 2.0663,
+      "step": 63
+    },
+    {
+      "epoch": 0.4032,
+      "eval_loss": 2.0011115074157715,
+      "eval_runtime": 4.6261,
+      "eval_samples_per_second": 227.405,
+      "eval_steps_per_second": 7.133,
+      "step": 63
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 1.0163565512097792e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null