Training in progress, step 1000, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/global_step1000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
last-checkpoint/global_step1000/mp_rank_00_model_states.pt +3 -0
last-checkpoint/latest +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb87aacff72e1602dfd126734b3b0a18e3f3d5965e2a314893e17028ba457700
 size 42002584

 version https://git-lfs.github.com/spec/v1
+oid sha256:bbc5d4c3ea89cdc82e2f431aa8d1e7c54e201d61a41d4760248be0ded6f9736d
 size 42002584

last-checkpoint/global_step1000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52d3be43090292fc3f408bfa7323dda07b05a49247f049954c58fd96ffd03826
+size 251710672

last-checkpoint/global_step1000/mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ce18e282a0f287d33a4933b44086a76be864c3d30a6b509b2f377111f5059ef
+size 153747385

last-checkpoint/latest CHANGED Viewed

	@@ -1 +1 @@
1	- ~~global_step500~~


1	+ global_step1000

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:38d028753a88adea82238f430350e9b4298093ea8e04516836108c27462c1365
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7212cfde88fad4aa734540c0da66cf99f58abbd8a4c5ec6aecca4d821435319d
 size 14244

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.849002849002849,
   "eval_steps": 1000,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -84,6 +84,76 @@
       "learning_rate": 0.00017527527527527528,
       "loss": 0.4327,
       "step": 500
     }
   ],
   "logging_steps": 50,
@@ -103,7 +173,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.5049248454213632e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 5.698005698005698,
   "eval_steps": 1000,
+  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00017527527527527528,
       "loss": 0.4327,
       "step": 500
+    },
+    {
+      "epoch": 3.133903133903134,
+      "grad_norm": 3.940809488296509,
+      "learning_rate": 0.00017277277277277277,
+      "loss": 0.3364,
+      "step": 550
+    },
+    {
+      "epoch": 3.4188034188034186,
+      "grad_norm": 3.061803102493286,
+      "learning_rate": 0.00017027027027027028,
+      "loss": 0.2445,
+      "step": 600
+    },
+    {
+      "epoch": 3.7037037037037037,
+      "grad_norm": 3.389284372329712,
+      "learning_rate": 0.00016776776776776777,
+      "loss": 0.2597,
+      "step": 650
+    },
+    {
+      "epoch": 3.9886039886039883,
+      "grad_norm": 3.320084810256958,
+      "learning_rate": 0.00016526526526526526,
+      "loss": 0.2698,
+      "step": 700
+    },
+    {
+      "epoch": 4.273504273504273,
+      "grad_norm": 2.7199738025665283,
+      "learning_rate": 0.00016276276276276275,
+      "loss": 0.1781,
+      "step": 750
+    },
+    {
+      "epoch": 4.5584045584045585,
+      "grad_norm": 3.226743459701538,
+      "learning_rate": 0.00016026026026026027,
+      "loss": 0.1902,
+      "step": 800
+    },
+    {
+      "epoch": 4.843304843304844,
+      "grad_norm": 4.62879753112793,
+      "learning_rate": 0.00015775775775775776,
+      "loss": 0.209,
+      "step": 850
+    },
+    {
+      "epoch": 5.128205128205128,
+      "grad_norm": 2.747284412384033,
+      "learning_rate": 0.00015525525525525525,
+      "loss": 0.1786,
+      "step": 900
+    },
+    {
+      "epoch": 5.413105413105413,
+      "grad_norm": 2.259187936782837,
+      "learning_rate": 0.00015275275275275277,
+      "loss": 0.1536,
+      "step": 950
+    },
+    {
+      "epoch": 5.698005698005698,
+      "grad_norm": 1.9622772932052612,
+      "learning_rate": 0.00015025025025025026,
+      "loss": 0.156,
+      "step": 1000
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 3.0228228471783424e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null