Training in progress, step 12, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +53 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eb755141d71b34746981158d735bf6eebdb682c01a6703f9cba021b623aa1c4a
 size 41581360

 version https://git-lfs.github.com/spec/v1
+oid sha256:f48c348a378b36fa2a4f7311f920ac4c6d290ec3d9d19a69e92acdbde3a5972c
 size 41581360

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e95636f2c1b9976bb322fd885d65e4d65db798b6144752fa532461c8a881fce4
 size 21505540

 version https://git-lfs.github.com/spec/v1
+oid sha256:d0e475fa23046df7e5979e96e83553c8d3af18145688794e1784ce7a5e5d4707
 size 21505540

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:530eddfbd2f0e25306cd6d0a8655a3cfe1e1af46c6a93abefd8982a321b96e39
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:83eb8ccd812e2a94dfc483ddae0d9b7b7ab4077cf01e4da59286cd13ea8d6029
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0099cb7287625b29b67c4fcf42ff20fae623b429bfb10f5ac695bc54f2be54fd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:183ab984e067f0684f97cf6c258fa276e42ea5c29910668cb653a16e870010e9
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.015267175572519083,
   "eval_steps": 6,
-  "global_step": 6,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -65,6 +65,56 @@
       "eval_samples_per_second": 21.797,
       "eval_steps_per_second": 10.899,
       "step": 6
     }
   ],
   "logging_steps": 1,
@@ -84,7 +134,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 300058398425088.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.030534351145038167,
   "eval_steps": 6,
+  "global_step": 12,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 21.797,
       "eval_steps_per_second": 10.899,
       "step": 6
+    },
+    {
+      "epoch": 0.017811704834605598,
+      "grad_norm": 2.5362422466278076,
+      "learning_rate": 0.00014,
+      "loss": 0.8316,
+      "step": 7
+    },
+    {
+      "epoch": 0.020356234096692113,
+      "grad_norm": 2.6198008060455322,
+      "learning_rate": 0.00016,
+      "loss": 0.6159,
+      "step": 8
+    },
+    {
+      "epoch": 0.022900763358778626,
+      "grad_norm": 2.256551742553711,
+      "learning_rate": 0.00018,
+      "loss": 0.4382,
+      "step": 9
+    },
+    {
+      "epoch": 0.02544529262086514,
+      "grad_norm": 2.475969076156616,
+      "learning_rate": 0.0002,
+      "loss": 0.3562,
+      "step": 10
+    },
+    {
+      "epoch": 0.027989821882951654,
+      "grad_norm": 3.510289192199707,
+      "learning_rate": 0.00019749279121818235,
+      "loss": 0.3165,
+      "step": 11
+    },
+    {
+      "epoch": 0.030534351145038167,
+      "grad_norm": 1.8848199844360352,
+      "learning_rate": 0.0001900968867902419,
+      "loss": 0.2195,
+      "step": 12
+    },
+    {
+      "epoch": 0.030534351145038167,
+      "eval_loss": 0.1976952999830246,
+      "eval_runtime": 7.5845,
+      "eval_samples_per_second": 21.887,
+      "eval_steps_per_second": 10.943,
+      "step": 12
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 600116796850176.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null