Training in progress, step 2500, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/global_step2500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
last-checkpoint/global_step2500/mp_rank_00_model_states.pt +3 -0
last-checkpoint/latest +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d748f56c86d9b2ec091d44a85ccd94175df63c0bf630350ae13455b500f5873c
 size 42002584

 version https://git-lfs.github.com/spec/v1
+oid sha256:a3f3c155fe4ab7a572e2a5fa77a282892e0e22cc6fc61a8c53fab449042d36d5
 size 42002584

last-checkpoint/global_step2500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7123c17202c44d1134a2cc67dc65c0b8e5a4ed55c834053daf4f258264a8d8e
+size 251710672

last-checkpoint/global_step2500/mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6804f47defbca7f6cdbaa7926c39523f91414e3d1d47ca0d149cd29f923df239
+size 47955328

last-checkpoint/latest CHANGED Viewed

	@@ -1 +1 @@
1	- ~~global_step2000~~


1	+ global_step2500

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12ff687702c9cb54cfdeb1509074bb19e28d6929ec859a93af35778558181b6e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f01a4ea3836466236461a7e0a4041dbb7858ed9853ebc8a640dc7aeab392d3fa
 size 14244

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 11.396011396011396,
   "eval_steps": 1000,
-  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -294,6 +294,76 @@
       "learning_rate": 0.0001002002002002002,
       "loss": 0.0611,
       "step": 2000
     }
   ],
   "logging_steps": 50,
@@ -313,7 +383,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.442536950726656e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 14.245014245014245,
   "eval_steps": 1000,
+  "global_step": 2500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0001002002002002002,
       "loss": 0.0611,
       "step": 2000
+    },
+    {
+      "epoch": 11.68091168091168,
+      "grad_norm": 0.2988643944263458,
+      "learning_rate": 9.76976976976977e-05,
+      "loss": 0.0607,
+      "step": 2050
+    },
+    {
+      "epoch": 11.965811965811966,
+      "grad_norm": 0.8949713110923767,
+      "learning_rate": 9.51951951951952e-05,
+      "loss": 0.0632,
+      "step": 2100
+    },
+    {
+      "epoch": 12.250712250712251,
+      "grad_norm": 0.11667460948228836,
+      "learning_rate": 9.26926926926927e-05,
+      "loss": 0.0605,
+      "step": 2150
+    },
+    {
+      "epoch": 12.535612535612536,
+      "grad_norm": 0.1387569159269333,
+      "learning_rate": 9.019019019019019e-05,
+      "loss": 0.0605,
+      "step": 2200
+    },
+    {
+      "epoch": 12.820512820512821,
+      "grad_norm": 0.4826744794845581,
+      "learning_rate": 8.76876876876877e-05,
+      "loss": 0.0619,
+      "step": 2250
+    },
+    {
+      "epoch": 13.105413105413106,
+      "grad_norm": 0.09396378695964813,
+      "learning_rate": 8.518518518518518e-05,
+      "loss": 0.0588,
+      "step": 2300
+    },
+    {
+      "epoch": 13.39031339031339,
+      "grad_norm": 0.6452879905700684,
+      "learning_rate": 8.268268268268269e-05,
+      "loss": 0.0584,
+      "step": 2350
+    },
+    {
+      "epoch": 13.675213675213675,
+      "grad_norm": 0.5694031119346619,
+      "learning_rate": 8.018018018018019e-05,
+      "loss": 0.0582,
+      "step": 2400
+    },
+    {
+      "epoch": 13.96011396011396,
+      "grad_norm": 0.34324464201927185,
+      "learning_rate": 7.767767767767768e-05,
+      "loss": 0.0602,
+      "step": 2450
+    },
+    {
+      "epoch": 14.245014245014245,
+      "grad_norm": 0.07841510325670242,
+      "learning_rate": 7.517517517517519e-05,
+      "loss": 0.0593,
+      "step": 2500
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 6.806026140306637e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null