Training in progress, step 1500, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step1500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1500/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/trainer_state.json +73 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 42002584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f6c9491adb6089056fda920e27d5868f9c470a97da0823064432f6788c44756
|
3 |
size 42002584
|
last-checkpoint/global_step1500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4d6134158e7c13a72c0bffc0be795bbfef434634617e8cf04f155ed2264f145
|
3 |
+
size 251710672
|
last-checkpoint/global_step1500/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:daf5aaaab0959575e566af04b3c87174d7001e7459c813c42a628f90466e3b99
|
3 |
+
size 47955328
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step1500
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:721c0a2243fa57a1e6ca760a9478078a0f6bb44a899a4293351badcaa44577ca
|
3 |
size 14244
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -154,6 +154,76 @@
|
|
154 |
"learning_rate": 0.00015025025025025026,
|
155 |
"loss": 0.0876,
|
156 |
"step": 1000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
}
|
158 |
],
|
159 |
"logging_steps": 50,
|
@@ -173,7 +243,7 @@
|
|
173 |
"attributes": {}
|
174 |
}
|
175 |
},
|
176 |
-
"total_flos":
|
177 |
"train_batch_size": 2,
|
178 |
"trial_name": null,
|
179 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 8.547008547008547,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 1500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
154 |
"learning_rate": 0.00015025025025025026,
|
155 |
"loss": 0.0876,
|
156 |
"step": 1000
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 5.982905982905983,
|
160 |
+
"grad_norm": 1.0592001676559448,
|
161 |
+
"learning_rate": 0.00014774774774774775,
|
162 |
+
"loss": 0.0909,
|
163 |
+
"step": 1050
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 6.267806267806268,
|
167 |
+
"grad_norm": 1.4785758256912231,
|
168 |
+
"learning_rate": 0.00014524524524524526,
|
169 |
+
"loss": 0.0838,
|
170 |
+
"step": 1100
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 6.552706552706553,
|
174 |
+
"grad_norm": 0.6601145267486572,
|
175 |
+
"learning_rate": 0.00014274274274274275,
|
176 |
+
"loss": 0.0827,
|
177 |
+
"step": 1150
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 6.837606837606837,
|
181 |
+
"grad_norm": 1.1280301809310913,
|
182 |
+
"learning_rate": 0.00014024024024024024,
|
183 |
+
"loss": 0.0762,
|
184 |
+
"step": 1200
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 7.122507122507122,
|
188 |
+
"grad_norm": 1.467143177986145,
|
189 |
+
"learning_rate": 0.00013773773773773776,
|
190 |
+
"loss": 0.0799,
|
191 |
+
"step": 1250
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 7.407407407407407,
|
195 |
+
"grad_norm": 1.773697018623352,
|
196 |
+
"learning_rate": 0.00013523523523523525,
|
197 |
+
"loss": 0.0765,
|
198 |
+
"step": 1300
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 7.6923076923076925,
|
202 |
+
"grad_norm": 3.1821281909942627,
|
203 |
+
"learning_rate": 0.00013273273273273274,
|
204 |
+
"loss": 0.0787,
|
205 |
+
"step": 1350
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 7.977207977207978,
|
209 |
+
"grad_norm": 0.9911046028137207,
|
210 |
+
"learning_rate": 0.00013023023023023023,
|
211 |
+
"loss": 0.0731,
|
212 |
+
"step": 1400
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 8.262108262108262,
|
216 |
+
"grad_norm": 2.560380458831787,
|
217 |
+
"learning_rate": 0.00012772772772772775,
|
218 |
+
"loss": 0.069,
|
219 |
+
"step": 1450
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 8.547008547008547,
|
223 |
+
"grad_norm": 1.88752281665802,
|
224 |
+
"learning_rate": 0.00012522522522522524,
|
225 |
+
"loss": 0.0708,
|
226 |
+
"step": 1500
|
227 |
}
|
228 |
],
|
229 |
"logging_steps": 50,
|
|
|
243 |
"attributes": {}
|
244 |
}
|
245 |
},
|
246 |
+
"total_flos": 4.082014482962842e+16,
|
247 |
"train_batch_size": 2,
|
248 |
"trial_name": null,
|
249 |
"trial_params": null
|