Training in progress, step 264, checkpoint

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7503bcb2c3f34a1858d513a2596512e90f6888a1c9572e7fe5512c62e91046c
 size 73911112

 version https://git-lfs.github.com/spec/v1
+oid sha256:53a1f1c3e2a923e2982197cd900077574f24b219229aa019511f875042742237
 size 73911112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5faf222eddf43df3300085931995fa326f25cdcf01906bf0ee0b2737cdd72beb
 size 37431220

 version https://git-lfs.github.com/spec/v1
+oid sha256:06ce5385174b266947c6bbd0cded6839fb22865d7049d84dd957347235517437
 size 37431220

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e6d08bfa0ecdc29e58b4a5362f7ced16937eb7fa35e931e744fd7df0b5c2675
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:2b539f01b7ad3345aa5a738855f4967eed8463acf8b5cd8b2265ca0e07cf0599
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7133058984910837,
   "eval_steps": 500,
-  "global_step": 260,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1827,6 +1827,34 @@
       "learning_rate": 7.878787878787879e-05,
       "loss": 1.1713,
       "step": 260
     }
   ],
   "logging_steps": 1,
@@ -1846,7 +1874,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.2152157285707776e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7242798353909465,
   "eval_steps": 500,
+  "global_step": 264,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 7.878787878787879e-05,
       "loss": 1.1713,
       "step": 260
+    },
+    {
+      "epoch": 0.7160493827160493,
+      "grad_norm": 0.16383200883865356,
+      "learning_rate": 7.803030303030304e-05,
+      "loss": 1.1593,
+      "step": 261
+    },
+    {
+      "epoch": 0.7187928669410151,
+      "grad_norm": 0.16724437475204468,
+      "learning_rate": 7.727272727272727e-05,
+      "loss": 1.3,
+      "step": 262
+    },
+    {
+      "epoch": 0.7215363511659808,
+      "grad_norm": 0.18244515359401703,
+      "learning_rate": 7.651515151515152e-05,
+      "loss": 1.264,
+      "step": 263
+    },
+    {
+      "epoch": 0.7242798353909465,
+      "grad_norm": 0.15599456429481506,
+      "learning_rate": 7.575757575757576e-05,
+      "loss": 1.2125,
+      "step": 264
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3.264356883202867e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null