Training in progress, step 100, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +116 -25
last-checkpoint/training_args.bin +1 -1

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6fca072b2c749413dd5ce3683dd32b4c068e76430d79ceb2f957de0ea2ec09ea
 size 131251312

 version https://git-lfs.github.com/spec/v1
+oid sha256:228058b2e2c5093b95942a8c6188d917712e871458f4a4b99601a43efa4262e2
 size 131251312

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8169611cb1c8a402ac90dcbdee5673db1566e3b8599f63a52edae45763b12854
 size 61093892

 version https://git-lfs.github.com/spec/v1
+oid sha256:064125a886fe952aaedb264e4a7daa313b8e3048086f9d3fb8cea54f7bc2023f
 size 61093892

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:608fccb6c056ce88cdfd5355e6be2046f4d107a24a87c6b0d2c3b200ce6bb4ea
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d7ecf15e83ac4d18e0d90f8a44821af2f304313a6ae05eeb21767226a79c463
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6cdce33ebc5972235a89e7008a7bf54a98fa227109b4975663485ad96089f907
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3a60c7d771c1fd156acee762fba03c724cb41829a3f71df370ecd1d20b134982
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,67 +2,158 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8045977011494253,
   "eval_steps": 500,
-  "global_step": 35,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.11494252873563218,
-      "grad_norm": 0.9143027067184448,
       "learning_rate": 0.00016,
-      "loss": 2.7102,
       "step": 5
     },
     {
       "epoch": 0.22988505747126436,
-      "grad_norm": 0.8938872814178467,
-      "learning_rate": 0.00017333333333333334,
-      "loss": 1.9796,
       "step": 10
     },
     {
       "epoch": 0.3448275862068966,
-      "grad_norm": 0.9273212552070618,
-      "learning_rate": 0.00014,
-      "loss": 1.4091,
       "step": 15
     },
     {
       "epoch": 0.45977011494252873,
-      "grad_norm": 0.556845486164093,
-      "learning_rate": 0.00010666666666666667,
-      "loss": 1.2369,
       "step": 20
     },
     {
       "epoch": 0.5747126436781609,
-      "grad_norm": 0.3468436598777771,
-      "learning_rate": 7.333333333333333e-05,
-      "loss": 1.071,
       "step": 25
     },
     {
       "epoch": 0.6896551724137931,
-      "grad_norm": 0.4009336829185486,
-      "learning_rate": 4e-05,
-      "loss": 1.0702,
       "step": 30
     },
     {
       "epoch": 0.8045977011494253,
-      "grad_norm": 0.2942411005496979,
-      "learning_rate": 6.666666666666667e-06,
-      "loss": 0.9927,
       "step": 35
     }
   ],
   "logging_steps": 5,
-  "max_steps": 35,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 1,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -76,7 +167,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1947736416225600.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.2758620689655173,
   "eval_steps": 500,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.11494252873563218,
+      "grad_norm": 0.9145353436470032,
       "learning_rate": 0.00016,
+      "loss": 2.7096,
       "step": 5
     },
     {
       "epoch": 0.22988505747126436,
+      "grad_norm": 0.88957279920578,
+      "learning_rate": 0.00019157894736842104,
+      "loss": 1.9743,
       "step": 10
     },
     {
       "epoch": 0.3448275862068966,
+      "grad_norm": 1.1346380710601807,
+      "learning_rate": 0.00018105263157894739,
+      "loss": 1.3837,
       "step": 15
     },
     {
       "epoch": 0.45977011494252873,
+      "grad_norm": 0.4288289546966553,
+      "learning_rate": 0.0001705263157894737,
+      "loss": 1.2195,
       "step": 20
     },
     {
       "epoch": 0.5747126436781609,
+      "grad_norm": 0.3345566391944885,
+      "learning_rate": 0.00016,
+      "loss": 1.0581,
       "step": 25
     },
     {
       "epoch": 0.6896551724137931,
+      "grad_norm": 0.29079899191856384,
+      "learning_rate": 0.00014947368421052633,
+      "loss": 1.0582,
       "step": 30
     },
     {
       "epoch": 0.8045977011494253,
+      "grad_norm": 0.39675313234329224,
+      "learning_rate": 0.00013894736842105264,
+      "loss": 0.9695,
       "step": 35
+    },
+    {
+      "epoch": 0.9195402298850575,
+      "grad_norm": 0.31116706132888794,
+      "learning_rate": 0.00012842105263157895,
+      "loss": 1.1187,
+      "step": 40
+    },
+    {
+      "epoch": 1.0229885057471264,
+      "grad_norm": 0.24762211740016937,
+      "learning_rate": 0.00011789473684210525,
+      "loss": 1.121,
+      "step": 45
+    },
+    {
+      "epoch": 1.1379310344827587,
+      "grad_norm": 0.34917253255844116,
+      "learning_rate": 0.00010736842105263158,
+      "loss": 0.9679,
+      "step": 50
+    },
+    {
+      "epoch": 1.2528735632183907,
+      "grad_norm": 0.36430123448371887,
+      "learning_rate": 9.68421052631579e-05,
+      "loss": 0.9875,
+      "step": 55
+    },
+    {
+      "epoch": 1.367816091954023,
+      "grad_norm": 0.4741533696651459,
+      "learning_rate": 8.631578947368421e-05,
+      "loss": 1.0023,
+      "step": 60
+    },
+    {
+      "epoch": 1.4827586206896552,
+      "grad_norm": 0.25711435079574585,
+      "learning_rate": 7.578947368421054e-05,
+      "loss": 1.0899,
+      "step": 65
+    },
+    {
+      "epoch": 1.5977011494252875,
+      "grad_norm": 0.2915607690811157,
+      "learning_rate": 6.526315789473685e-05,
+      "loss": 1.0803,
+      "step": 70
+    },
+    {
+      "epoch": 1.7126436781609196,
+      "grad_norm": 0.2989424765110016,
+      "learning_rate": 5.4736842105263165e-05,
+      "loss": 1.0777,
+      "step": 75
+    },
+    {
+      "epoch": 1.8275862068965516,
+      "grad_norm": 0.32646244764328003,
+      "learning_rate": 4.421052631578947e-05,
+      "loss": 1.0842,
+      "step": 80
+    },
+    {
+      "epoch": 1.9425287356321839,
+      "grad_norm": 0.2813500463962555,
+      "learning_rate": 3.368421052631579e-05,
+      "loss": 1.1267,
+      "step": 85
+    },
+    {
+      "epoch": 2.045977011494253,
+      "grad_norm": 0.3768353760242462,
+      "learning_rate": 2.3157894736842107e-05,
+      "loss": 0.9741,
+      "step": 90
+    },
+    {
+      "epoch": 2.160919540229885,
+      "grad_norm": 0.29582270979881287,
+      "learning_rate": 1.2631578947368422e-05,
+      "loss": 1.0951,
+      "step": 95
+    },
+    {
+      "epoch": 2.2758620689655173,
+      "grad_norm": 0.3590467572212219,
+      "learning_rate": 2.105263157894737e-06,
+      "loss": 0.9928,
+      "step": 100
     }
   ],
   "logging_steps": 5,
+  "max_steps": 100,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 5557554405792480.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2734847dfff81ea8622046f310eb8aeae1a36e938141362dc50bb07314c32ab2
 size 5752

 version https://git-lfs.github.com/spec/v1
+oid sha256:f956480e3a7cc6b3b40b9db889dba9bd9d06ee68c04dd9c29257e7d05713e73d
 size 5752