Training in progress, step 3000, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/global_step3000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
last-checkpoint/global_step3000/mp_rank_00_model_states.pt +3 -0
last-checkpoint/latest +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a3f3c155fe4ab7a572e2a5fa77a282892e0e22cc6fc61a8c53fab449042d36d5
 size 42002584

 version https://git-lfs.github.com/spec/v1
+oid sha256:77e3105355c2df4b040acd4c2944a96c6e4176ce252181e7699f9ea948f127e2
 size 42002584

last-checkpoint/global_step3000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:26b4cc6752f251b6fb91be7b0760735f433091c5b90914b97085bbbb5e101cea
+size 251710672

last-checkpoint/global_step3000/mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:be4ad9578cfb99fd2b3d6f4d885e5682fcfae1e17bb18e6eb202fe75572e5cb0
+size 47955328

last-checkpoint/latest CHANGED Viewed

	@@ -1 +1 @@
1	- ~~global_step2500~~


1	+ global_step3000

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f01a4ea3836466236461a7e0a4041dbb7858ed9853ebc8a640dc7aeab392d3fa
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b552361bcc18d8148b831b255d9e181d4beeb2d1ba3aee8779f232860eb665c7
 size 14244

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 14.245014245014245,
   "eval_steps": 1000,
-  "global_step": 2500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -364,6 +364,76 @@
       "learning_rate": 7.517517517517519e-05,
       "loss": 0.0593,
       "step": 2500
     }
   ],
   "logging_steps": 50,
@@ -383,7 +453,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.806026140306637e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 17.094017094017094,
   "eval_steps": 1000,
+  "global_step": 3000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 7.517517517517519e-05,
       "loss": 0.0593,
       "step": 2500
+    },
+    {
+      "epoch": 14.52991452991453,
+      "grad_norm": 0.1685921549797058,
+      "learning_rate": 7.267267267267268e-05,
+      "loss": 0.0579,
+      "step": 2550
+    },
+    {
+      "epoch": 14.814814814814815,
+      "grad_norm": 0.32586222887039185,
+      "learning_rate": 7.017017017017016e-05,
+      "loss": 0.0533,
+      "step": 2600
+    },
+    {
+      "epoch": 15.0997150997151,
+      "grad_norm": 0.6495370864868164,
+      "learning_rate": 6.766766766766767e-05,
+      "loss": 0.0575,
+      "step": 2650
+    },
+    {
+      "epoch": 15.384615384615385,
+      "grad_norm": 0.10936163365840912,
+      "learning_rate": 6.516516516516516e-05,
+      "loss": 0.0539,
+      "step": 2700
+    },
+    {
+      "epoch": 15.66951566951567,
+      "grad_norm": 0.09928351640701294,
+      "learning_rate": 6.266266266266266e-05,
+      "loss": 0.0573,
+      "step": 2750
+    },
+    {
+      "epoch": 15.954415954415955,
+      "grad_norm": 0.07429605722427368,
+      "learning_rate": 6.016016016016016e-05,
+      "loss": 0.0541,
+      "step": 2800
+    },
+    {
+      "epoch": 16.23931623931624,
+      "grad_norm": 0.0647626668214798,
+      "learning_rate": 5.765765765765766e-05,
+      "loss": 0.0546,
+      "step": 2850
+    },
+    {
+      "epoch": 16.524216524216524,
+      "grad_norm": 0.06490299850702286,
+      "learning_rate": 5.515515515515516e-05,
+      "loss": 0.0537,
+      "step": 2900
+    },
+    {
+      "epoch": 16.80911680911681,
+      "grad_norm": 0.07492049783468246,
+      "learning_rate": 5.2652652652652655e-05,
+      "loss": 0.0567,
+      "step": 2950
+    },
+    {
+      "epoch": 17.094017094017094,
+      "grad_norm": 0.18874266743659973,
+      "learning_rate": 5.015015015015015e-05,
+      "loss": 0.0534,
+      "step": 3000
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 8.179327598749286e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null