Training in progress, step 20, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b7fdda4247f547bbef47d82b18782e559c41ea5ccb07360802750bd03a68e8a1
 size 95402480

 version https://git-lfs.github.com/spec/v1
+oid sha256:dd1e239ed2b671b38e00aa4eb0aab5776334c708dcb7c2505a8f2c4d6201fc43
 size 95402480

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0794549e7e5f6b1b9337cc2010fbbc79074e2b98cedfca199966fa2d4625de5a
 size 190922618

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d4ba61bda1c629af16f3a52efda548d45c27c509c6e581d4e87d6a79297ac6c
 size 190922618

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cda63210db676312cf7ff3e5da12a71fe2d1acee7b091fe2a081541d581404e4
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5772d5fa91c89f598ea280b7f830741cf9aed4b4f76ab224de73c2c1482ae6ce
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b4849a6ac0a1d895740f1ab4eba9d346b8d898008d0cfe93dd108cd928d7c63e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:46d68217ddf3e899bf1495ee33b9ec14e2c91ed912ffcb460a7bb01192b04b7d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0007731560228854183,
   "eval_steps": 8,
-  "global_step": 10,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -44,6 +44,35 @@
       "learning_rate": 9e-05,
       "loss": 9.7368,
       "step": 9
     }
   ],
   "logging_steps": 3,
@@ -63,7 +92,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2069133017481216.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0015463120457708365,
   "eval_steps": 8,
+  "global_step": 20,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9e-05,
       "loss": 9.7368,
       "step": 9
+    },
+    {
+      "epoch": 0.000927787227462502,
+      "grad_norm": 6.120208740234375,
+      "learning_rate": 9.755282581475769e-05,
+      "loss": 8.5474,
+      "step": 12
+    },
+    {
+      "epoch": 0.0011597340343281275,
+      "grad_norm": 21.290348052978516,
+      "learning_rate": 8.535533905932738e-05,
+      "loss": 8.543,
+      "step": 15
+    },
+    {
+      "epoch": 0.0012370496366166692,
+      "eval_loss": 0.7007141709327698,
+      "eval_runtime": 490.0839,
+      "eval_samples_per_second": 11.112,
+      "eval_steps_per_second": 5.556,
+      "step": 16
+    },
+    {
+      "epoch": 0.0013916808411937528,
+      "grad_norm": 13.922175407409668,
+      "learning_rate": 6.545084971874738e-05,
+      "loss": 8.3382,
+      "step": 18
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 3995567206170624.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null