Training in progress, step 3800, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 35237104
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b097fe16dbacfd6384828bee8b0af2e19ba6892664dceddd1f0d546f9c31571
|
3 |
size 35237104
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 18810356
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e666d9eea54720024abc1ab42e7b827ea7a1bb020bb9aef9673ec0cc15120d8a
|
3 |
size 18810356
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:957a4cfc2b6817a96aeac2e1fcff9ce55867670984c61b5a97af9cd4dda67c60
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1dd23ba82b3ca1f0ee26353d9cc3ddfc63459acf74051174c6a70a84f210c52
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 2.8474695682525635,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-3600",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -26211,6 +26211,714 @@
|
|
26211 |
"eval_samples_per_second": 59.226,
|
26212 |
"eval_steps_per_second": 14.807,
|
26213 |
"step": 3700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26214 |
}
|
26215 |
],
|
26216 |
"logging_steps": 1,
|
@@ -26225,7 +26933,7 @@
|
|
26225 |
"early_stopping_threshold": 0.0
|
26226 |
},
|
26227 |
"attributes": {
|
26228 |
-
"early_stopping_patience_counter":
|
26229 |
}
|
26230 |
},
|
26231 |
"TrainerControl": {
|
@@ -26234,12 +26942,12 @@
|
|
26234 |
"should_evaluate": false,
|
26235 |
"should_log": false,
|
26236 |
"should_save": true,
|
26237 |
-
"should_training_stop":
|
26238 |
},
|
26239 |
"attributes": {}
|
26240 |
}
|
26241 |
},
|
26242 |
-
"total_flos": 2.
|
26243 |
"train_batch_size": 4,
|
26244 |
"trial_name": null,
|
26245 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 2.8474695682525635,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-3600",
|
4 |
+
"epoch": 1.0575712248234599,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 3800,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
26211 |
"eval_samples_per_second": 59.226,
|
26212 |
"eval_steps_per_second": 14.807,
|
26213 |
"step": 3700
|
26214 |
+
},
|
26215 |
+
{
|
26216 |
+
"epoch": 1.0300205238807527,
|
26217 |
+
"grad_norm": 2.34928560256958,
|
26218 |
+
"learning_rate": 9.551476636488089e-05,
|
26219 |
+
"loss": 2.4443,
|
26220 |
+
"step": 3701
|
26221 |
+
},
|
26222 |
+
{
|
26223 |
+
"epoch": 1.030298813789265,
|
26224 |
+
"grad_norm": 2.5956695079803467,
|
26225 |
+
"learning_rate": 9.547103778606931e-05,
|
26226 |
+
"loss": 2.798,
|
26227 |
+
"step": 3702
|
26228 |
+
},
|
26229 |
+
{
|
26230 |
+
"epoch": 1.030577103697777,
|
26231 |
+
"grad_norm": 2.6470494270324707,
|
26232 |
+
"learning_rate": 9.542731007504356e-05,
|
26233 |
+
"loss": 2.7361,
|
26234 |
+
"step": 3703
|
26235 |
+
},
|
26236 |
+
{
|
26237 |
+
"epoch": 1.0308553936062894,
|
26238 |
+
"grad_norm": 2.35915207862854,
|
26239 |
+
"learning_rate": 9.538358324018215e-05,
|
26240 |
+
"loss": 2.4667,
|
26241 |
+
"step": 3704
|
26242 |
+
},
|
26243 |
+
{
|
26244 |
+
"epoch": 1.0311336835148015,
|
26245 |
+
"grad_norm": 2.26962947845459,
|
26246 |
+
"learning_rate": 9.533985728986359e-05,
|
26247 |
+
"loss": 2.5105,
|
26248 |
+
"step": 3705
|
26249 |
+
},
|
26250 |
+
{
|
26251 |
+
"epoch": 1.0314119734233138,
|
26252 |
+
"grad_norm": 2.273303747177124,
|
26253 |
+
"learning_rate": 9.529613223246607e-05,
|
26254 |
+
"loss": 2.7761,
|
26255 |
+
"step": 3706
|
26256 |
+
},
|
26257 |
+
{
|
26258 |
+
"epoch": 1.031690263331826,
|
26259 |
+
"grad_norm": 2.518406867980957,
|
26260 |
+
"learning_rate": 9.525240807636766e-05,
|
26261 |
+
"loss": 2.6669,
|
26262 |
+
"step": 3707
|
26263 |
+
},
|
26264 |
+
{
|
26265 |
+
"epoch": 1.0319685532403382,
|
26266 |
+
"grad_norm": 2.45633602142334,
|
26267 |
+
"learning_rate": 9.52086848299463e-05,
|
26268 |
+
"loss": 2.5069,
|
26269 |
+
"step": 3708
|
26270 |
+
},
|
26271 |
+
{
|
26272 |
+
"epoch": 1.0322468431488503,
|
26273 |
+
"grad_norm": 3.081700086593628,
|
26274 |
+
"learning_rate": 9.51649625015797e-05,
|
26275 |
+
"loss": 2.6667,
|
26276 |
+
"step": 3709
|
26277 |
+
},
|
26278 |
+
{
|
26279 |
+
"epoch": 1.0325251330573626,
|
26280 |
+
"grad_norm": 2.3902275562286377,
|
26281 |
+
"learning_rate": 9.512124109964539e-05,
|
26282 |
+
"loss": 2.5777,
|
26283 |
+
"step": 3710
|
26284 |
+
},
|
26285 |
+
{
|
26286 |
+
"epoch": 1.0328034229658747,
|
26287 |
+
"grad_norm": 2.087824583053589,
|
26288 |
+
"learning_rate": 9.507752063252078e-05,
|
26289 |
+
"loss": 2.4125,
|
26290 |
+
"step": 3711
|
26291 |
+
},
|
26292 |
+
{
|
26293 |
+
"epoch": 1.033081712874387,
|
26294 |
+
"grad_norm": 2.584324836730957,
|
26295 |
+
"learning_rate": 9.503380110858304e-05,
|
26296 |
+
"loss": 2.596,
|
26297 |
+
"step": 3712
|
26298 |
+
},
|
26299 |
+
{
|
26300 |
+
"epoch": 1.033360002782899,
|
26301 |
+
"grad_norm": 2.4409570693969727,
|
26302 |
+
"learning_rate": 9.499008253620919e-05,
|
26303 |
+
"loss": 2.5021,
|
26304 |
+
"step": 3713
|
26305 |
+
},
|
26306 |
+
{
|
26307 |
+
"epoch": 1.0336382926914114,
|
26308 |
+
"grad_norm": 2.357409954071045,
|
26309 |
+
"learning_rate": 9.494636492377607e-05,
|
26310 |
+
"loss": 2.4717,
|
26311 |
+
"step": 3714
|
26312 |
+
},
|
26313 |
+
{
|
26314 |
+
"epoch": 1.0339165825999235,
|
26315 |
+
"grad_norm": 2.637960195541382,
|
26316 |
+
"learning_rate": 9.490264827966033e-05,
|
26317 |
+
"loss": 2.7572,
|
26318 |
+
"step": 3715
|
26319 |
+
},
|
26320 |
+
{
|
26321 |
+
"epoch": 1.0341948725084356,
|
26322 |
+
"grad_norm": 2.5555248260498047,
|
26323 |
+
"learning_rate": 9.485893261223842e-05,
|
26324 |
+
"loss": 2.6943,
|
26325 |
+
"step": 3716
|
26326 |
+
},
|
26327 |
+
{
|
26328 |
+
"epoch": 1.0344731624169479,
|
26329 |
+
"grad_norm": 2.3676626682281494,
|
26330 |
+
"learning_rate": 9.481521792988664e-05,
|
26331 |
+
"loss": 2.7945,
|
26332 |
+
"step": 3717
|
26333 |
+
},
|
26334 |
+
{
|
26335 |
+
"epoch": 1.03475145232546,
|
26336 |
+
"grad_norm": 2.2306549549102783,
|
26337 |
+
"learning_rate": 9.477150424098105e-05,
|
26338 |
+
"loss": 2.3112,
|
26339 |
+
"step": 3718
|
26340 |
+
},
|
26341 |
+
{
|
26342 |
+
"epoch": 1.0350297422339723,
|
26343 |
+
"grad_norm": 2.835977792739868,
|
26344 |
+
"learning_rate": 9.47277915538975e-05,
|
26345 |
+
"loss": 2.8379,
|
26346 |
+
"step": 3719
|
26347 |
+
},
|
26348 |
+
{
|
26349 |
+
"epoch": 1.0353080321424843,
|
26350 |
+
"grad_norm": 2.4739816188812256,
|
26351 |
+
"learning_rate": 9.468407987701179e-05,
|
26352 |
+
"loss": 2.6375,
|
26353 |
+
"step": 3720
|
26354 |
+
},
|
26355 |
+
{
|
26356 |
+
"epoch": 1.0355863220509967,
|
26357 |
+
"grad_norm": 2.7540197372436523,
|
26358 |
+
"learning_rate": 9.464036921869941e-05,
|
26359 |
+
"loss": 2.416,
|
26360 |
+
"step": 3721
|
26361 |
+
},
|
26362 |
+
{
|
26363 |
+
"epoch": 1.0358646119595087,
|
26364 |
+
"grad_norm": 2.581390142440796,
|
26365 |
+
"learning_rate": 9.459665958733565e-05,
|
26366 |
+
"loss": 2.3666,
|
26367 |
+
"step": 3722
|
26368 |
+
},
|
26369 |
+
{
|
26370 |
+
"epoch": 1.036142901868021,
|
26371 |
+
"grad_norm": 2.3293237686157227,
|
26372 |
+
"learning_rate": 9.455295099129563e-05,
|
26373 |
+
"loss": 2.5112,
|
26374 |
+
"step": 3723
|
26375 |
+
},
|
26376 |
+
{
|
26377 |
+
"epoch": 1.0364211917765331,
|
26378 |
+
"grad_norm": 2.709442377090454,
|
26379 |
+
"learning_rate": 9.450924343895428e-05,
|
26380 |
+
"loss": 3.0548,
|
26381 |
+
"step": 3724
|
26382 |
+
},
|
26383 |
+
{
|
26384 |
+
"epoch": 1.0366994816850454,
|
26385 |
+
"grad_norm": 2.5723798274993896,
|
26386 |
+
"learning_rate": 9.446553693868633e-05,
|
26387 |
+
"loss": 2.7705,
|
26388 |
+
"step": 3725
|
26389 |
+
},
|
26390 |
+
{
|
26391 |
+
"epoch": 1.0369777715935575,
|
26392 |
+
"grad_norm": 2.4315478801727295,
|
26393 |
+
"learning_rate": 9.442183149886627e-05,
|
26394 |
+
"loss": 2.7446,
|
26395 |
+
"step": 3726
|
26396 |
+
},
|
26397 |
+
{
|
26398 |
+
"epoch": 1.0372560615020698,
|
26399 |
+
"grad_norm": 2.1737117767333984,
|
26400 |
+
"learning_rate": 9.437812712786844e-05,
|
26401 |
+
"loss": 2.534,
|
26402 |
+
"step": 3727
|
26403 |
+
},
|
26404 |
+
{
|
26405 |
+
"epoch": 1.037534351410582,
|
26406 |
+
"grad_norm": 2.1455442905426025,
|
26407 |
+
"learning_rate": 9.433442383406696e-05,
|
26408 |
+
"loss": 2.3731,
|
26409 |
+
"step": 3728
|
26410 |
+
},
|
26411 |
+
{
|
26412 |
+
"epoch": 1.0378126413190942,
|
26413 |
+
"grad_norm": 2.4659476280212402,
|
26414 |
+
"learning_rate": 9.429072162583567e-05,
|
26415 |
+
"loss": 2.3374,
|
26416 |
+
"step": 3729
|
26417 |
+
},
|
26418 |
+
{
|
26419 |
+
"epoch": 1.0380909312276063,
|
26420 |
+
"grad_norm": 2.5009284019470215,
|
26421 |
+
"learning_rate": 9.424702051154836e-05,
|
26422 |
+
"loss": 2.6745,
|
26423 |
+
"step": 3730
|
26424 |
+
},
|
26425 |
+
{
|
26426 |
+
"epoch": 1.0383692211361186,
|
26427 |
+
"grad_norm": 2.334843158721924,
|
26428 |
+
"learning_rate": 9.420332049957846e-05,
|
26429 |
+
"loss": 2.3093,
|
26430 |
+
"step": 3731
|
26431 |
+
},
|
26432 |
+
{
|
26433 |
+
"epoch": 1.0386475110446307,
|
26434 |
+
"grad_norm": 2.483473539352417,
|
26435 |
+
"learning_rate": 9.415962159829926e-05,
|
26436 |
+
"loss": 2.4984,
|
26437 |
+
"step": 3732
|
26438 |
+
},
|
26439 |
+
{
|
26440 |
+
"epoch": 1.038925800953143,
|
26441 |
+
"grad_norm": 2.4551875591278076,
|
26442 |
+
"learning_rate": 9.411592381608381e-05,
|
26443 |
+
"loss": 2.423,
|
26444 |
+
"step": 3733
|
26445 |
+
},
|
26446 |
+
{
|
26447 |
+
"epoch": 1.039204090861655,
|
26448 |
+
"grad_norm": 2.400089740753174,
|
26449 |
+
"learning_rate": 9.407222716130499e-05,
|
26450 |
+
"loss": 2.0407,
|
26451 |
+
"step": 3734
|
26452 |
+
},
|
26453 |
+
{
|
26454 |
+
"epoch": 1.0394823807701674,
|
26455 |
+
"grad_norm": 2.617560863494873,
|
26456 |
+
"learning_rate": 9.402853164233538e-05,
|
26457 |
+
"loss": 2.8515,
|
26458 |
+
"step": 3735
|
26459 |
+
},
|
26460 |
+
{
|
26461 |
+
"epoch": 1.0397606706786795,
|
26462 |
+
"grad_norm": 2.454972505569458,
|
26463 |
+
"learning_rate": 9.398483726754746e-05,
|
26464 |
+
"loss": 2.3012,
|
26465 |
+
"step": 3736
|
26466 |
+
},
|
26467 |
+
{
|
26468 |
+
"epoch": 1.0400389605871918,
|
26469 |
+
"grad_norm": 2.8407280445098877,
|
26470 |
+
"learning_rate": 9.394114404531338e-05,
|
26471 |
+
"loss": 2.7096,
|
26472 |
+
"step": 3737
|
26473 |
+
},
|
26474 |
+
{
|
26475 |
+
"epoch": 1.040317250495704,
|
26476 |
+
"grad_norm": 2.6554818153381348,
|
26477 |
+
"learning_rate": 9.389745198400513e-05,
|
26478 |
+
"loss": 2.7251,
|
26479 |
+
"step": 3738
|
26480 |
+
},
|
26481 |
+
{
|
26482 |
+
"epoch": 1.0405955404042162,
|
26483 |
+
"grad_norm": 2.69405460357666,
|
26484 |
+
"learning_rate": 9.385376109199448e-05,
|
26485 |
+
"loss": 2.3692,
|
26486 |
+
"step": 3739
|
26487 |
+
},
|
26488 |
+
{
|
26489 |
+
"epoch": 1.0408738303127283,
|
26490 |
+
"grad_norm": 2.626368761062622,
|
26491 |
+
"learning_rate": 9.381007137765292e-05,
|
26492 |
+
"loss": 2.6053,
|
26493 |
+
"step": 3740
|
26494 |
+
},
|
26495 |
+
{
|
26496 |
+
"epoch": 1.0411521202212404,
|
26497 |
+
"grad_norm": 2.5650789737701416,
|
26498 |
+
"learning_rate": 9.376638284935176e-05,
|
26499 |
+
"loss": 2.7401,
|
26500 |
+
"step": 3741
|
26501 |
+
},
|
26502 |
+
{
|
26503 |
+
"epoch": 1.0414304101297527,
|
26504 |
+
"grad_norm": 2.240908622741699,
|
26505 |
+
"learning_rate": 9.372269551546211e-05,
|
26506 |
+
"loss": 2.4553,
|
26507 |
+
"step": 3742
|
26508 |
+
},
|
26509 |
+
{
|
26510 |
+
"epoch": 1.0417087000382648,
|
26511 |
+
"grad_norm": 2.357799530029297,
|
26512 |
+
"learning_rate": 9.367900938435479e-05,
|
26513 |
+
"loss": 2.818,
|
26514 |
+
"step": 3743
|
26515 |
+
},
|
26516 |
+
{
|
26517 |
+
"epoch": 1.041986989946777,
|
26518 |
+
"grad_norm": 2.7151036262512207,
|
26519 |
+
"learning_rate": 9.36353244644004e-05,
|
26520 |
+
"loss": 2.7159,
|
26521 |
+
"step": 3744
|
26522 |
+
},
|
26523 |
+
{
|
26524 |
+
"epoch": 1.0422652798552892,
|
26525 |
+
"grad_norm": 2.4492530822753906,
|
26526 |
+
"learning_rate": 9.359164076396937e-05,
|
26527 |
+
"loss": 2.4194,
|
26528 |
+
"step": 3745
|
26529 |
+
},
|
26530 |
+
{
|
26531 |
+
"epoch": 1.0425435697638015,
|
26532 |
+
"grad_norm": 2.7243385314941406,
|
26533 |
+
"learning_rate": 9.354795829143182e-05,
|
26534 |
+
"loss": 2.739,
|
26535 |
+
"step": 3746
|
26536 |
+
},
|
26537 |
+
{
|
26538 |
+
"epoch": 1.0428218596723136,
|
26539 |
+
"grad_norm": 2.5599193572998047,
|
26540 |
+
"learning_rate": 9.350427705515766e-05,
|
26541 |
+
"loss": 2.6885,
|
26542 |
+
"step": 3747
|
26543 |
+
},
|
26544 |
+
{
|
26545 |
+
"epoch": 1.0431001495808259,
|
26546 |
+
"grad_norm": 2.4527359008789062,
|
26547 |
+
"learning_rate": 9.346059706351659e-05,
|
26548 |
+
"loss": 2.5548,
|
26549 |
+
"step": 3748
|
26550 |
+
},
|
26551 |
+
{
|
26552 |
+
"epoch": 1.043378439489338,
|
26553 |
+
"grad_norm": 2.3975369930267334,
|
26554 |
+
"learning_rate": 9.341691832487804e-05,
|
26555 |
+
"loss": 2.4877,
|
26556 |
+
"step": 3749
|
26557 |
+
},
|
26558 |
+
{
|
26559 |
+
"epoch": 1.0436567293978503,
|
26560 |
+
"grad_norm": 2.267547369003296,
|
26561 |
+
"learning_rate": 9.337324084761118e-05,
|
26562 |
+
"loss": 2.2493,
|
26563 |
+
"step": 3750
|
26564 |
+
},
|
26565 |
+
{
|
26566 |
+
"epoch": 1.0439350193063623,
|
26567 |
+
"grad_norm": 2.2563583850860596,
|
26568 |
+
"learning_rate": 9.332956464008503e-05,
|
26569 |
+
"loss": 2.4411,
|
26570 |
+
"step": 3751
|
26571 |
+
},
|
26572 |
+
{
|
26573 |
+
"epoch": 1.0442133092148747,
|
26574 |
+
"grad_norm": 2.401918649673462,
|
26575 |
+
"learning_rate": 9.328588971066827e-05,
|
26576 |
+
"loss": 2.8112,
|
26577 |
+
"step": 3752
|
26578 |
+
},
|
26579 |
+
{
|
26580 |
+
"epoch": 1.0444915991233867,
|
26581 |
+
"grad_norm": 2.5725367069244385,
|
26582 |
+
"learning_rate": 9.324221606772935e-05,
|
26583 |
+
"loss": 2.5735,
|
26584 |
+
"step": 3753
|
26585 |
+
},
|
26586 |
+
{
|
26587 |
+
"epoch": 1.044769889031899,
|
26588 |
+
"grad_norm": 2.5067310333251953,
|
26589 |
+
"learning_rate": 9.319854371963653e-05,
|
26590 |
+
"loss": 2.4901,
|
26591 |
+
"step": 3754
|
26592 |
+
},
|
26593 |
+
{
|
26594 |
+
"epoch": 1.0450481789404111,
|
26595 |
+
"grad_norm": 2.332139730453491,
|
26596 |
+
"learning_rate": 9.315487267475777e-05,
|
26597 |
+
"loss": 2.3954,
|
26598 |
+
"step": 3755
|
26599 |
+
},
|
26600 |
+
{
|
26601 |
+
"epoch": 1.0453264688489234,
|
26602 |
+
"grad_norm": 2.5820467472076416,
|
26603 |
+
"learning_rate": 9.311120294146078e-05,
|
26604 |
+
"loss": 2.685,
|
26605 |
+
"step": 3756
|
26606 |
+
},
|
26607 |
+
{
|
26608 |
+
"epoch": 1.0456047587574355,
|
26609 |
+
"grad_norm": 2.7918787002563477,
|
26610 |
+
"learning_rate": 9.306753452811308e-05,
|
26611 |
+
"loss": 2.8996,
|
26612 |
+
"step": 3757
|
26613 |
+
},
|
26614 |
+
{
|
26615 |
+
"epoch": 1.0458830486659478,
|
26616 |
+
"grad_norm": 2.6018383502960205,
|
26617 |
+
"learning_rate": 9.302386744308185e-05,
|
26618 |
+
"loss": 2.6813,
|
26619 |
+
"step": 3758
|
26620 |
+
},
|
26621 |
+
{
|
26622 |
+
"epoch": 1.04616133857446,
|
26623 |
+
"grad_norm": 2.2214136123657227,
|
26624 |
+
"learning_rate": 9.298020169473402e-05,
|
26625 |
+
"loss": 2.5446,
|
26626 |
+
"step": 3759
|
26627 |
+
},
|
26628 |
+
{
|
26629 |
+
"epoch": 1.0464396284829722,
|
26630 |
+
"grad_norm": 2.195864677429199,
|
26631 |
+
"learning_rate": 9.293653729143636e-05,
|
26632 |
+
"loss": 2.3378,
|
26633 |
+
"step": 3760
|
26634 |
+
},
|
26635 |
+
{
|
26636 |
+
"epoch": 1.0467179183914843,
|
26637 |
+
"grad_norm": 2.4155166149139404,
|
26638 |
+
"learning_rate": 9.289287424155536e-05,
|
26639 |
+
"loss": 2.5393,
|
26640 |
+
"step": 3761
|
26641 |
+
},
|
26642 |
+
{
|
26643 |
+
"epoch": 1.0469962082999966,
|
26644 |
+
"grad_norm": 2.280897617340088,
|
26645 |
+
"learning_rate": 9.284921255345715e-05,
|
26646 |
+
"loss": 2.472,
|
26647 |
+
"step": 3762
|
26648 |
+
},
|
26649 |
+
{
|
26650 |
+
"epoch": 1.0472744982085087,
|
26651 |
+
"grad_norm": 2.2828261852264404,
|
26652 |
+
"learning_rate": 9.280555223550767e-05,
|
26653 |
+
"loss": 2.4396,
|
26654 |
+
"step": 3763
|
26655 |
+
},
|
26656 |
+
{
|
26657 |
+
"epoch": 1.047552788117021,
|
26658 |
+
"grad_norm": 2.5520870685577393,
|
26659 |
+
"learning_rate": 9.27618932960726e-05,
|
26660 |
+
"loss": 2.5667,
|
26661 |
+
"step": 3764
|
26662 |
+
},
|
26663 |
+
{
|
26664 |
+
"epoch": 1.047831078025533,
|
26665 |
+
"grad_norm": 2.394723892211914,
|
26666 |
+
"learning_rate": 9.271823574351736e-05,
|
26667 |
+
"loss": 2.5952,
|
26668 |
+
"step": 3765
|
26669 |
+
},
|
26670 |
+
{
|
26671 |
+
"epoch": 1.0481093679340452,
|
26672 |
+
"grad_norm": 2.639838695526123,
|
26673 |
+
"learning_rate": 9.267457958620703e-05,
|
26674 |
+
"loss": 2.7062,
|
26675 |
+
"step": 3766
|
26676 |
+
},
|
26677 |
+
{
|
26678 |
+
"epoch": 1.0483876578425575,
|
26679 |
+
"grad_norm": 2.5440590381622314,
|
26680 |
+
"learning_rate": 9.263092483250657e-05,
|
26681 |
+
"loss": 2.5498,
|
26682 |
+
"step": 3767
|
26683 |
+
},
|
26684 |
+
{
|
26685 |
+
"epoch": 1.0486659477510696,
|
26686 |
+
"grad_norm": 2.683584213256836,
|
26687 |
+
"learning_rate": 9.258727149078055e-05,
|
26688 |
+
"loss": 2.6906,
|
26689 |
+
"step": 3768
|
26690 |
+
},
|
26691 |
+
{
|
26692 |
+
"epoch": 1.048944237659582,
|
26693 |
+
"grad_norm": 2.7254245281219482,
|
26694 |
+
"learning_rate": 9.254361956939327e-05,
|
26695 |
+
"loss": 2.6168,
|
26696 |
+
"step": 3769
|
26697 |
+
},
|
26698 |
+
{
|
26699 |
+
"epoch": 1.049222527568094,
|
26700 |
+
"grad_norm": 2.4922702312469482,
|
26701 |
+
"learning_rate": 9.249996907670881e-05,
|
26702 |
+
"loss": 2.7162,
|
26703 |
+
"step": 3770
|
26704 |
+
},
|
26705 |
+
{
|
26706 |
+
"epoch": 1.0495008174766063,
|
26707 |
+
"grad_norm": 2.6056606769561768,
|
26708 |
+
"learning_rate": 9.245632002109099e-05,
|
26709 |
+
"loss": 2.4848,
|
26710 |
+
"step": 3771
|
26711 |
+
},
|
26712 |
+
{
|
26713 |
+
"epoch": 1.0497791073851184,
|
26714 |
+
"grad_norm": 2.89192271232605,
|
26715 |
+
"learning_rate": 9.241267241090327e-05,
|
26716 |
+
"loss": 2.6764,
|
26717 |
+
"step": 3772
|
26718 |
+
},
|
26719 |
+
{
|
26720 |
+
"epoch": 1.0500573972936307,
|
26721 |
+
"grad_norm": 2.1888632774353027,
|
26722 |
+
"learning_rate": 9.236902625450893e-05,
|
26723 |
+
"loss": 2.3813,
|
26724 |
+
"step": 3773
|
26725 |
+
},
|
26726 |
+
{
|
26727 |
+
"epoch": 1.0503356872021428,
|
26728 |
+
"grad_norm": 2.4971957206726074,
|
26729 |
+
"learning_rate": 9.232538156027091e-05,
|
26730 |
+
"loss": 2.3947,
|
26731 |
+
"step": 3774
|
26732 |
+
},
|
26733 |
+
{
|
26734 |
+
"epoch": 1.050613977110655,
|
26735 |
+
"grad_norm": 2.0770082473754883,
|
26736 |
+
"learning_rate": 9.228173833655186e-05,
|
26737 |
+
"loss": 2.0168,
|
26738 |
+
"step": 3775
|
26739 |
+
},
|
26740 |
+
{
|
26741 |
+
"epoch": 1.0508922670191672,
|
26742 |
+
"grad_norm": 2.418344497680664,
|
26743 |
+
"learning_rate": 9.223809659171423e-05,
|
26744 |
+
"loss": 2.7327,
|
26745 |
+
"step": 3776
|
26746 |
+
},
|
26747 |
+
{
|
26748 |
+
"epoch": 1.0511705569276795,
|
26749 |
+
"grad_norm": 2.1940460205078125,
|
26750 |
+
"learning_rate": 9.21944563341201e-05,
|
26751 |
+
"loss": 2.3535,
|
26752 |
+
"step": 3777
|
26753 |
+
},
|
26754 |
+
{
|
26755 |
+
"epoch": 1.0514488468361916,
|
26756 |
+
"grad_norm": 2.1863420009613037,
|
26757 |
+
"learning_rate": 9.215081757213127e-05,
|
26758 |
+
"loss": 2.357,
|
26759 |
+
"step": 3778
|
26760 |
+
},
|
26761 |
+
{
|
26762 |
+
"epoch": 1.0517271367447039,
|
26763 |
+
"grad_norm": 2.4358723163604736,
|
26764 |
+
"learning_rate": 9.210718031410934e-05,
|
26765 |
+
"loss": 2.5809,
|
26766 |
+
"step": 3779
|
26767 |
+
},
|
26768 |
+
{
|
26769 |
+
"epoch": 1.052005426653216,
|
26770 |
+
"grad_norm": 2.3010590076446533,
|
26771 |
+
"learning_rate": 9.206354456841551e-05,
|
26772 |
+
"loss": 2.4569,
|
26773 |
+
"step": 3780
|
26774 |
+
},
|
26775 |
+
{
|
26776 |
+
"epoch": 1.0522837165617283,
|
26777 |
+
"grad_norm": 2.511343002319336,
|
26778 |
+
"learning_rate": 9.201991034341075e-05,
|
26779 |
+
"loss": 2.6625,
|
26780 |
+
"step": 3781
|
26781 |
+
},
|
26782 |
+
{
|
26783 |
+
"epoch": 1.0525620064702403,
|
26784 |
+
"grad_norm": 2.421273708343506,
|
26785 |
+
"learning_rate": 9.197627764745577e-05,
|
26786 |
+
"loss": 2.472,
|
26787 |
+
"step": 3782
|
26788 |
+
},
|
26789 |
+
{
|
26790 |
+
"epoch": 1.0528402963787526,
|
26791 |
+
"grad_norm": 2.5087594985961914,
|
26792 |
+
"learning_rate": 9.193264648891091e-05,
|
26793 |
+
"loss": 2.462,
|
26794 |
+
"step": 3783
|
26795 |
+
},
|
26796 |
+
{
|
26797 |
+
"epoch": 1.0531185862872647,
|
26798 |
+
"grad_norm": 2.3734774589538574,
|
26799 |
+
"learning_rate": 9.188901687613624e-05,
|
26800 |
+
"loss": 2.6545,
|
26801 |
+
"step": 3784
|
26802 |
+
},
|
26803 |
+
{
|
26804 |
+
"epoch": 1.053396876195777,
|
26805 |
+
"grad_norm": 2.4143643379211426,
|
26806 |
+
"learning_rate": 9.18453888174916e-05,
|
26807 |
+
"loss": 2.4521,
|
26808 |
+
"step": 3785
|
26809 |
+
},
|
26810 |
+
{
|
26811 |
+
"epoch": 1.0536751661042891,
|
26812 |
+
"grad_norm": 2.7103075981140137,
|
26813 |
+
"learning_rate": 9.180176232133647e-05,
|
26814 |
+
"loss": 2.9208,
|
26815 |
+
"step": 3786
|
26816 |
+
},
|
26817 |
+
{
|
26818 |
+
"epoch": 1.0539534560128014,
|
26819 |
+
"grad_norm": 2.6463193893432617,
|
26820 |
+
"learning_rate": 9.175813739602996e-05,
|
26821 |
+
"loss": 2.7337,
|
26822 |
+
"step": 3787
|
26823 |
+
},
|
26824 |
+
{
|
26825 |
+
"epoch": 1.0542317459213135,
|
26826 |
+
"grad_norm": 2.1172423362731934,
|
26827 |
+
"learning_rate": 9.171451404993105e-05,
|
26828 |
+
"loss": 2.3709,
|
26829 |
+
"step": 3788
|
26830 |
+
},
|
26831 |
+
{
|
26832 |
+
"epoch": 1.0545100358298258,
|
26833 |
+
"grad_norm": 2.3747429847717285,
|
26834 |
+
"learning_rate": 9.167089229139829e-05,
|
26835 |
+
"loss": 2.469,
|
26836 |
+
"step": 3789
|
26837 |
+
},
|
26838 |
+
{
|
26839 |
+
"epoch": 1.054788325738338,
|
26840 |
+
"grad_norm": 2.7493515014648438,
|
26841 |
+
"learning_rate": 9.162727212878996e-05,
|
26842 |
+
"loss": 2.7483,
|
26843 |
+
"step": 3790
|
26844 |
+
},
|
26845 |
+
{
|
26846 |
+
"epoch": 1.05506661564685,
|
26847 |
+
"grad_norm": 2.1751036643981934,
|
26848 |
+
"learning_rate": 9.158365357046405e-05,
|
26849 |
+
"loss": 2.3784,
|
26850 |
+
"step": 3791
|
26851 |
+
},
|
26852 |
+
{
|
26853 |
+
"epoch": 1.0553449055553623,
|
26854 |
+
"grad_norm": 2.5898613929748535,
|
26855 |
+
"learning_rate": 9.154003662477821e-05,
|
26856 |
+
"loss": 2.4678,
|
26857 |
+
"step": 3792
|
26858 |
+
},
|
26859 |
+
{
|
26860 |
+
"epoch": 1.0556231954638744,
|
26861 |
+
"grad_norm": 2.432840347290039,
|
26862 |
+
"learning_rate": 9.14964213000898e-05,
|
26863 |
+
"loss": 2.2535,
|
26864 |
+
"step": 3793
|
26865 |
+
},
|
26866 |
+
{
|
26867 |
+
"epoch": 1.0559014853723867,
|
26868 |
+
"grad_norm": 2.558281183242798,
|
26869 |
+
"learning_rate": 9.14528076047559e-05,
|
26870 |
+
"loss": 2.8376,
|
26871 |
+
"step": 3794
|
26872 |
+
},
|
26873 |
+
{
|
26874 |
+
"epoch": 1.0561797752808988,
|
26875 |
+
"grad_norm": 2.4376842975616455,
|
26876 |
+
"learning_rate": 9.140919554713323e-05,
|
26877 |
+
"loss": 2.5847,
|
26878 |
+
"step": 3795
|
26879 |
+
},
|
26880 |
+
{
|
26881 |
+
"epoch": 1.056458065189411,
|
26882 |
+
"grad_norm": 2.5773372650146484,
|
26883 |
+
"learning_rate": 9.136558513557818e-05,
|
26884 |
+
"loss": 2.3371,
|
26885 |
+
"step": 3796
|
26886 |
+
},
|
26887 |
+
{
|
26888 |
+
"epoch": 1.0567363550979232,
|
26889 |
+
"grad_norm": 2.790679454803467,
|
26890 |
+
"learning_rate": 9.132197637844691e-05,
|
26891 |
+
"loss": 2.5507,
|
26892 |
+
"step": 3797
|
26893 |
+
},
|
26894 |
+
{
|
26895 |
+
"epoch": 1.0570146450064355,
|
26896 |
+
"grad_norm": 2.5002150535583496,
|
26897 |
+
"learning_rate": 9.12783692840952e-05,
|
26898 |
+
"loss": 2.6795,
|
26899 |
+
"step": 3798
|
26900 |
+
},
|
26901 |
+
{
|
26902 |
+
"epoch": 1.0572929349149476,
|
26903 |
+
"grad_norm": 2.6883461475372314,
|
26904 |
+
"learning_rate": 9.123476386087844e-05,
|
26905 |
+
"loss": 2.6772,
|
26906 |
+
"step": 3799
|
26907 |
+
},
|
26908 |
+
{
|
26909 |
+
"epoch": 1.0575712248234599,
|
26910 |
+
"grad_norm": 2.8790204524993896,
|
26911 |
+
"learning_rate": 9.119116011715189e-05,
|
26912 |
+
"loss": 2.5414,
|
26913 |
+
"step": 3800
|
26914 |
+
},
|
26915 |
+
{
|
26916 |
+
"epoch": 1.0575712248234599,
|
26917 |
+
"eval_loss": 2.8708136081695557,
|
26918 |
+
"eval_runtime": 84.6585,
|
26919 |
+
"eval_samples_per_second": 59.061,
|
26920 |
+
"eval_steps_per_second": 14.765,
|
26921 |
+
"step": 3800
|
26922 |
}
|
26923 |
],
|
26924 |
"logging_steps": 1,
|
|
|
26933 |
"early_stopping_threshold": 0.0
|
26934 |
},
|
26935 |
"attributes": {
|
26936 |
+
"early_stopping_patience_counter": 2
|
26937 |
}
|
26938 |
},
|
26939 |
"TrainerControl": {
|
|
|
26942 |
"should_evaluate": false,
|
26943 |
"should_log": false,
|
26944 |
"should_save": true,
|
26945 |
+
"should_training_stop": true
|
26946 |
},
|
26947 |
"attributes": {}
|
26948 |
}
|
26949 |
},
|
26950 |
+
"total_flos": 2.73962634313728e+17,
|
26951 |
"train_batch_size": 4,
|
26952 |
"trial_name": null,
|
26953 |
"trial_params": null
|