Training in progress, step 150, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:038346555883593068d400099f443ffbe19927b96a76193c9f52144a8eeaf1ea
 size 95402480

 version https://git-lfs.github.com/spec/v1
+oid sha256:84cb9b4e3cc2701b2275671820e05cc4b29e6a7942990b4f01dc83ab2b9b20ad
 size 95402480

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8bc08782f957537aa75152c9aaa95fbeba86f1175ac2bc7ade44bed1d68365ef
 size 48843572

 version https://git-lfs.github.com/spec/v1
+oid sha256:0fcb6169f724813efc0836f537d87a9c6d31737ad8f1e2b3646c99a21b1e3f8b
 size 48843572

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:001ebc33bb12515ad9bf608baa754af5e1565c46e663d6cda25210150092d5bf
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a2a7f225c9ae6999207d2502cf8d9088baf71b692579a31c0187571fc4d1e4ee
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:431c6c53bdba57f808cd9c2e6f738bda5a26247416d68b6f96cb4f3eef6f54ca
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d1320cc0c91c904e1b37627facbc7bdd09ad072c5e9a4fe376e5569a5cdf3a73
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.48639455782312924,
   "eval_steps": 13,
-  "global_step": 143,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -432,6 +432,27 @@
       "eval_samples_per_second": 22.549,
       "eval_steps_per_second": 2.824,
       "step": 143
     }
   ],
   "logging_steps": 3,
@@ -446,12 +467,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.051690368195625e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5102040816326531,
   "eval_steps": 13,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 22.549,
       "eval_steps_per_second": 2.824,
       "step": 143
+    },
+    {
+      "epoch": 0.4897959183673469,
+      "grad_norm": 6.776644706726074,
+      "learning_rate": 2.262559558016325e-07,
+      "loss": 11.1286,
+      "step": 144
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 11.629470825195312,
+      "learning_rate": 5.662812383859795e-08,
+      "loss": 11.9387,
+      "step": 147
+    },
+    {
+      "epoch": 0.5102040816326531,
+      "grad_norm": 8.106785774230957,
+      "learning_rate": 0.0,
+      "loss": 11.5558,
+      "step": 150
     }
   ],
   "logging_steps": 3,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.1030619465606758e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null