Training in progress, step 800, checkpoint

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bff897368000117674ab917c547b982f586c6107309cfbb2c96448dc97e48098
 size 389081912

 version https://git-lfs.github.com/spec/v1
+oid sha256:dfda392f7a94c935387f9bf12a827b16f6fffd7d46c14248e603312888236d00
 size 389081912

last-checkpoint/global_step800/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:f0dabd7e346b6aceb3fd0e119cf55b4c38dc05a6fb22da69f8a6186be397daea
+size 1167094117

last-checkpoint/global_step800/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:ed8f1bb9ff29c9f4cf6775190b74624fec599750d4793a75a4aef36014f2c490
+size 1167094245

last-checkpoint/global_step800/mp_rank_00_model_states.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e90d3b640128be11fec02d7f42f6fcf9fd8023afb3081c6b5b5278004fb84ef
+size 1222740115

last-checkpoint/latest CHANGED Viewed

	@@ -1 +1 @@
1	- ~~global_step600~~


1	+ global_step800

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e9d16aa7e6166e2439dbd61297667a7a16a2094ce8dd6fa6e0711599b36bc4e
 size 14917

 version https://git-lfs.github.com/spec/v1
+oid sha256:b4ef64e1574edf5b383c0a9ec160d79a8b77abcef4ff433f7cbbf54c1c492691
 size 14917

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d3cd81cc8241929aa48fca6f3fadc52245b3ac08a33a6d1e16a6ef3e1b487ea1
 size 14917

 version https://git-lfs.github.com/spec/v1
+oid sha256:988a2f6ca79ecc74ec145b907cf959d1629de03ec03686b8f87073791df23729
 size 14917

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.08276432857438444,
   "eval_steps": 500,
-  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -35,6 +35,15 @@
       "mean_token_accuracy": 0.5912152025103569,
       "num_tokens": 39065992.0,
       "step": 600
     }
   ],
   "logging_steps": 200,
@@ -54,7 +63,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.062955348973322e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.11035243809917926,
   "eval_steps": 500,
+  "global_step": 800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "mean_token_accuracy": 0.5912152025103569,
       "num_tokens": 39065992.0,
       "step": 600
+    },
+    {
+      "epoch": 0.11035243809917926,
+      "grad_norm": 0.35369324684143066,
+      "learning_rate": 1.9561870602841773e-05,
+      "loss": 1.8443,
+      "mean_token_accuracy": 0.5940310730040074,
+      "num_tokens": 52073767.0,
+      "step": 800
     }
   ],
   "logging_steps": 200,
       "attributes": {}
     }
   },
+  "total_flos": 9.414702487610327e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null