Training in progress, step 170000, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step170000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step170000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/trainer_state.json +1403 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 42002584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0a873d3e5cec0b20feea1679b557aa52226ecf87d514140650491e6c4224173
|
3 |
size 42002584
|
last-checkpoint/global_step170000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:afc265c613d3f39521ec54bf81997ac91dfe687c11dff372b6b8f0b38ab93cc0
|
3 |
+
size 251710672
|
last-checkpoint/global_step170000/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81cb5c9f5271a23d1de216a273d79886410951a5158990bf67a9a06be669359f
|
3 |
+
size 153747385
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step170000
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4793ef9903ff177bf19c3b5b4511c3654a7cfbe2b64841a245379d48de8867a7
|
3 |
size 14244
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -22414,6 +22414,1406 @@
|
|
22414 |
"learning_rate": 0.00015428716735524205,
|
22415 |
"loss": 1.2157,
|
22416 |
"step": 160000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22417 |
}
|
22418 |
],
|
22419 |
"logging_steps": 50,
|
@@ -22433,7 +23833,7 @@
|
|
22433 |
"attributes": {}
|
22434 |
}
|
22435 |
},
|
22436 |
-
"total_flos": 4.
|
22437 |
"train_batch_size": 2,
|
22438 |
"trial_name": null,
|
22439 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 5.075687457080584,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 170000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
22414 |
"learning_rate": 0.00015428716735524205,
|
22415 |
"loss": 1.2157,
|
22416 |
"step": 160000
|
22417 |
+
},
|
22418 |
+
{
|
22419 |
+
"epoch": 4.778610455916161,
|
22420 |
+
"grad_norm": 6.003055572509766,
|
22421 |
+
"learning_rate": 0.0001542728815593232,
|
22422 |
+
"loss": 1.2021,
|
22423 |
+
"step": 160050
|
22424 |
+
},
|
22425 |
+
{
|
22426 |
+
"epoch": 4.780103305168244,
|
22427 |
+
"grad_norm": 3.8452155590057373,
|
22428 |
+
"learning_rate": 0.00015425859576340437,
|
22429 |
+
"loss": 1.254,
|
22430 |
+
"step": 160100
|
22431 |
+
},
|
22432 |
+
{
|
22433 |
+
"epoch": 4.781596154420327,
|
22434 |
+
"grad_norm": 4.149327278137207,
|
22435 |
+
"learning_rate": 0.00015424430996748554,
|
22436 |
+
"loss": 1.235,
|
22437 |
+
"step": 160150
|
22438 |
+
},
|
22439 |
+
{
|
22440 |
+
"epoch": 4.783089003672409,
|
22441 |
+
"grad_norm": 4.331029415130615,
|
22442 |
+
"learning_rate": 0.0001542300241715667,
|
22443 |
+
"loss": 1.2077,
|
22444 |
+
"step": 160200
|
22445 |
+
},
|
22446 |
+
{
|
22447 |
+
"epoch": 4.784581852924491,
|
22448 |
+
"grad_norm": 4.8423662185668945,
|
22449 |
+
"learning_rate": 0.00015421573837564787,
|
22450 |
+
"loss": 1.2223,
|
22451 |
+
"step": 160250
|
22452 |
+
},
|
22453 |
+
{
|
22454 |
+
"epoch": 4.786074702176574,
|
22455 |
+
"grad_norm": 4.183012008666992,
|
22456 |
+
"learning_rate": 0.00015420145257972903,
|
22457 |
+
"loss": 1.2438,
|
22458 |
+
"step": 160300
|
22459 |
+
},
|
22460 |
+
{
|
22461 |
+
"epoch": 4.787567551428657,
|
22462 |
+
"grad_norm": 3.7776644229888916,
|
22463 |
+
"learning_rate": 0.0001541871667838102,
|
22464 |
+
"loss": 1.2093,
|
22465 |
+
"step": 160350
|
22466 |
+
},
|
22467 |
+
{
|
22468 |
+
"epoch": 4.789060400680739,
|
22469 |
+
"grad_norm": 4.586690902709961,
|
22470 |
+
"learning_rate": 0.0001541728809878914,
|
22471 |
+
"loss": 1.2353,
|
22472 |
+
"step": 160400
|
22473 |
+
},
|
22474 |
+
{
|
22475 |
+
"epoch": 4.790553249932822,
|
22476 |
+
"grad_norm": 4.448139190673828,
|
22477 |
+
"learning_rate": 0.00015415859519197253,
|
22478 |
+
"loss": 1.2149,
|
22479 |
+
"step": 160450
|
22480 |
+
},
|
22481 |
+
{
|
22482 |
+
"epoch": 4.792046099184905,
|
22483 |
+
"grad_norm": 4.895113945007324,
|
22484 |
+
"learning_rate": 0.00015414430939605372,
|
22485 |
+
"loss": 1.2682,
|
22486 |
+
"step": 160500
|
22487 |
+
},
|
22488 |
+
{
|
22489 |
+
"epoch": 4.793538948436987,
|
22490 |
+
"grad_norm": 6.31041955947876,
|
22491 |
+
"learning_rate": 0.00015413002360013486,
|
22492 |
+
"loss": 1.2296,
|
22493 |
+
"step": 160550
|
22494 |
+
},
|
22495 |
+
{
|
22496 |
+
"epoch": 4.795031797689069,
|
22497 |
+
"grad_norm": 5.237212181091309,
|
22498 |
+
"learning_rate": 0.00015411573780421602,
|
22499 |
+
"loss": 1.2121,
|
22500 |
+
"step": 160600
|
22501 |
+
},
|
22502 |
+
{
|
22503 |
+
"epoch": 4.796524646941152,
|
22504 |
+
"grad_norm": 5.017521381378174,
|
22505 |
+
"learning_rate": 0.00015410145200829719,
|
22506 |
+
"loss": 1.2413,
|
22507 |
+
"step": 160650
|
22508 |
+
},
|
22509 |
+
{
|
22510 |
+
"epoch": 4.798017496193235,
|
22511 |
+
"grad_norm": 4.569025039672852,
|
22512 |
+
"learning_rate": 0.00015408716621237835,
|
22513 |
+
"loss": 1.2321,
|
22514 |
+
"step": 160700
|
22515 |
+
},
|
22516 |
+
{
|
22517 |
+
"epoch": 4.7995103454453165,
|
22518 |
+
"grad_norm": 4.4759650230407715,
|
22519 |
+
"learning_rate": 0.00015407288041645954,
|
22520 |
+
"loss": 1.2579,
|
22521 |
+
"step": 160750
|
22522 |
+
},
|
22523 |
+
{
|
22524 |
+
"epoch": 4.801003194697399,
|
22525 |
+
"grad_norm": 4.48403263092041,
|
22526 |
+
"learning_rate": 0.00015405859462054068,
|
22527 |
+
"loss": 1.2332,
|
22528 |
+
"step": 160800
|
22529 |
+
},
|
22530 |
+
{
|
22531 |
+
"epoch": 4.802496043949482,
|
22532 |
+
"grad_norm": 4.603891849517822,
|
22533 |
+
"learning_rate": 0.00015404430882462187,
|
22534 |
+
"loss": 1.2004,
|
22535 |
+
"step": 160850
|
22536 |
+
},
|
22537 |
+
{
|
22538 |
+
"epoch": 4.803988893201565,
|
22539 |
+
"grad_norm": 3.970634937286377,
|
22540 |
+
"learning_rate": 0.000154030023028703,
|
22541 |
+
"loss": 1.2121,
|
22542 |
+
"step": 160900
|
22543 |
+
},
|
22544 |
+
{
|
22545 |
+
"epoch": 4.805481742453647,
|
22546 |
+
"grad_norm": 5.254501819610596,
|
22547 |
+
"learning_rate": 0.0001540157372327842,
|
22548 |
+
"loss": 1.3071,
|
22549 |
+
"step": 160950
|
22550 |
+
},
|
22551 |
+
{
|
22552 |
+
"epoch": 4.80697459170573,
|
22553 |
+
"grad_norm": 3.6827309131622314,
|
22554 |
+
"learning_rate": 0.00015400145143686534,
|
22555 |
+
"loss": 1.2399,
|
22556 |
+
"step": 161000
|
22557 |
+
},
|
22558 |
+
{
|
22559 |
+
"epoch": 4.808467440957812,
|
22560 |
+
"grad_norm": 4.880918502807617,
|
22561 |
+
"learning_rate": 0.00015398716564094653,
|
22562 |
+
"loss": 1.1842,
|
22563 |
+
"step": 161050
|
22564 |
+
},
|
22565 |
+
{
|
22566 |
+
"epoch": 4.809960290209895,
|
22567 |
+
"grad_norm": 3.4792227745056152,
|
22568 |
+
"learning_rate": 0.0001539728798450277,
|
22569 |
+
"loss": 1.22,
|
22570 |
+
"step": 161100
|
22571 |
+
},
|
22572 |
+
{
|
22573 |
+
"epoch": 4.811453139461977,
|
22574 |
+
"grad_norm": 4.746030807495117,
|
22575 |
+
"learning_rate": 0.00015395859404910886,
|
22576 |
+
"loss": 1.2068,
|
22577 |
+
"step": 161150
|
22578 |
+
},
|
22579 |
+
{
|
22580 |
+
"epoch": 4.81294598871406,
|
22581 |
+
"grad_norm": 4.76997709274292,
|
22582 |
+
"learning_rate": 0.00015394430825319002,
|
22583 |
+
"loss": 1.2713,
|
22584 |
+
"step": 161200
|
22585 |
+
},
|
22586 |
+
{
|
22587 |
+
"epoch": 4.814438837966142,
|
22588 |
+
"grad_norm": 3.699706792831421,
|
22589 |
+
"learning_rate": 0.0001539300224572712,
|
22590 |
+
"loss": 1.2059,
|
22591 |
+
"step": 161250
|
22592 |
+
},
|
22593 |
+
{
|
22594 |
+
"epoch": 4.8159316872182245,
|
22595 |
+
"grad_norm": 5.2487664222717285,
|
22596 |
+
"learning_rate": 0.00015391573666135235,
|
22597 |
+
"loss": 1.2295,
|
22598 |
+
"step": 161300
|
22599 |
+
},
|
22600 |
+
{
|
22601 |
+
"epoch": 4.817424536470307,
|
22602 |
+
"grad_norm": 4.549964904785156,
|
22603 |
+
"learning_rate": 0.00015390145086543352,
|
22604 |
+
"loss": 1.2362,
|
22605 |
+
"step": 161350
|
22606 |
+
},
|
22607 |
+
{
|
22608 |
+
"epoch": 4.81891738572239,
|
22609 |
+
"grad_norm": 6.63731575012207,
|
22610 |
+
"learning_rate": 0.00015388716506951468,
|
22611 |
+
"loss": 1.2018,
|
22612 |
+
"step": 161400
|
22613 |
+
},
|
22614 |
+
{
|
22615 |
+
"epoch": 4.820410234974473,
|
22616 |
+
"grad_norm": 4.054471969604492,
|
22617 |
+
"learning_rate": 0.00015387287927359585,
|
22618 |
+
"loss": 1.2234,
|
22619 |
+
"step": 161450
|
22620 |
+
},
|
22621 |
+
{
|
22622 |
+
"epoch": 4.8219030842265544,
|
22623 |
+
"grad_norm": 4.350498199462891,
|
22624 |
+
"learning_rate": 0.000153858593477677,
|
22625 |
+
"loss": 1.2006,
|
22626 |
+
"step": 161500
|
22627 |
+
},
|
22628 |
+
{
|
22629 |
+
"epoch": 4.823395933478637,
|
22630 |
+
"grad_norm": 4.074748992919922,
|
22631 |
+
"learning_rate": 0.0001538443076817582,
|
22632 |
+
"loss": 1.2353,
|
22633 |
+
"step": 161550
|
22634 |
+
},
|
22635 |
+
{
|
22636 |
+
"epoch": 4.82488878273072,
|
22637 |
+
"grad_norm": 4.194019317626953,
|
22638 |
+
"learning_rate": 0.00015383002188583934,
|
22639 |
+
"loss": 1.2294,
|
22640 |
+
"step": 161600
|
22641 |
+
},
|
22642 |
+
{
|
22643 |
+
"epoch": 4.826381631982803,
|
22644 |
+
"grad_norm": 4.11326789855957,
|
22645 |
+
"learning_rate": 0.00015381573608992053,
|
22646 |
+
"loss": 1.2195,
|
22647 |
+
"step": 161650
|
22648 |
+
},
|
22649 |
+
{
|
22650 |
+
"epoch": 4.827874481234885,
|
22651 |
+
"grad_norm": 5.007305145263672,
|
22652 |
+
"learning_rate": 0.00015380145029400167,
|
22653 |
+
"loss": 1.221,
|
22654 |
+
"step": 161700
|
22655 |
+
},
|
22656 |
+
{
|
22657 |
+
"epoch": 4.829367330486967,
|
22658 |
+
"grad_norm": 5.089259624481201,
|
22659 |
+
"learning_rate": 0.00015378716449808286,
|
22660 |
+
"loss": 1.1968,
|
22661 |
+
"step": 161750
|
22662 |
+
},
|
22663 |
+
{
|
22664 |
+
"epoch": 4.83086017973905,
|
22665 |
+
"grad_norm": 5.502153396606445,
|
22666 |
+
"learning_rate": 0.000153772878702164,
|
22667 |
+
"loss": 1.2386,
|
22668 |
+
"step": 161800
|
22669 |
+
},
|
22670 |
+
{
|
22671 |
+
"epoch": 4.8323530289911325,
|
22672 |
+
"grad_norm": 4.014642238616943,
|
22673 |
+
"learning_rate": 0.0001537585929062452,
|
22674 |
+
"loss": 1.2358,
|
22675 |
+
"step": 161850
|
22676 |
+
},
|
22677 |
+
{
|
22678 |
+
"epoch": 4.833845878243215,
|
22679 |
+
"grad_norm": 4.9020915031433105,
|
22680 |
+
"learning_rate": 0.00015374430711032636,
|
22681 |
+
"loss": 1.1838,
|
22682 |
+
"step": 161900
|
22683 |
+
},
|
22684 |
+
{
|
22685 |
+
"epoch": 4.835338727495298,
|
22686 |
+
"grad_norm": 3.0818114280700684,
|
22687 |
+
"learning_rate": 0.00015373002131440752,
|
22688 |
+
"loss": 1.3071,
|
22689 |
+
"step": 161950
|
22690 |
+
},
|
22691 |
+
{
|
22692 |
+
"epoch": 4.83683157674738,
|
22693 |
+
"grad_norm": 4.447099208831787,
|
22694 |
+
"learning_rate": 0.00015371573551848869,
|
22695 |
+
"loss": 1.2656,
|
22696 |
+
"step": 162000
|
22697 |
+
},
|
22698 |
+
{
|
22699 |
+
"epoch": 4.838324425999462,
|
22700 |
+
"grad_norm": 3.805255174636841,
|
22701 |
+
"learning_rate": 0.00015370144972256985,
|
22702 |
+
"loss": 1.233,
|
22703 |
+
"step": 162050
|
22704 |
+
},
|
22705 |
+
{
|
22706 |
+
"epoch": 4.839817275251545,
|
22707 |
+
"grad_norm": 4.1598968505859375,
|
22708 |
+
"learning_rate": 0.00015368716392665102,
|
22709 |
+
"loss": 1.256,
|
22710 |
+
"step": 162100
|
22711 |
+
},
|
22712 |
+
{
|
22713 |
+
"epoch": 4.841310124503628,
|
22714 |
+
"grad_norm": 4.9868364334106445,
|
22715 |
+
"learning_rate": 0.00015367287813073218,
|
22716 |
+
"loss": 1.1727,
|
22717 |
+
"step": 162150
|
22718 |
+
},
|
22719 |
+
{
|
22720 |
+
"epoch": 4.84280297375571,
|
22721 |
+
"grad_norm": 5.927983283996582,
|
22722 |
+
"learning_rate": 0.00015365859233481334,
|
22723 |
+
"loss": 1.1832,
|
22724 |
+
"step": 162200
|
22725 |
+
},
|
22726 |
+
{
|
22727 |
+
"epoch": 4.844295823007792,
|
22728 |
+
"grad_norm": 4.542764663696289,
|
22729 |
+
"learning_rate": 0.0001536443065388945,
|
22730 |
+
"loss": 1.2064,
|
22731 |
+
"step": 162250
|
22732 |
+
},
|
22733 |
+
{
|
22734 |
+
"epoch": 4.845788672259875,
|
22735 |
+
"grad_norm": 5.132274627685547,
|
22736 |
+
"learning_rate": 0.00015363002074297567,
|
22737 |
+
"loss": 1.2295,
|
22738 |
+
"step": 162300
|
22739 |
+
},
|
22740 |
+
{
|
22741 |
+
"epoch": 4.847281521511958,
|
22742 |
+
"grad_norm": 4.797247409820557,
|
22743 |
+
"learning_rate": 0.00015361573494705687,
|
22744 |
+
"loss": 1.1985,
|
22745 |
+
"step": 162350
|
22746 |
+
},
|
22747 |
+
{
|
22748 |
+
"epoch": 4.8487743707640405,
|
22749 |
+
"grad_norm": 3.799029588699341,
|
22750 |
+
"learning_rate": 0.000153601449151138,
|
22751 |
+
"loss": 1.2029,
|
22752 |
+
"step": 162400
|
22753 |
+
},
|
22754 |
+
{
|
22755 |
+
"epoch": 4.850267220016123,
|
22756 |
+
"grad_norm": 3.758556842803955,
|
22757 |
+
"learning_rate": 0.0001535871633552192,
|
22758 |
+
"loss": 1.2401,
|
22759 |
+
"step": 162450
|
22760 |
+
},
|
22761 |
+
{
|
22762 |
+
"epoch": 4.851760069268205,
|
22763 |
+
"grad_norm": 4.282034397125244,
|
22764 |
+
"learning_rate": 0.00015357287755930033,
|
22765 |
+
"loss": 1.1842,
|
22766 |
+
"step": 162500
|
22767 |
+
},
|
22768 |
+
{
|
22769 |
+
"epoch": 4.853252918520288,
|
22770 |
+
"grad_norm": 4.154748916625977,
|
22771 |
+
"learning_rate": 0.00015355859176338152,
|
22772 |
+
"loss": 1.2201,
|
22773 |
+
"step": 162550
|
22774 |
+
},
|
22775 |
+
{
|
22776 |
+
"epoch": 4.85474576777237,
|
22777 |
+
"grad_norm": 4.305574893951416,
|
22778 |
+
"learning_rate": 0.0001535443059674627,
|
22779 |
+
"loss": 1.206,
|
22780 |
+
"step": 162600
|
22781 |
+
},
|
22782 |
+
{
|
22783 |
+
"epoch": 4.856238617024453,
|
22784 |
+
"grad_norm": 4.58249568939209,
|
22785 |
+
"learning_rate": 0.00015353002017154385,
|
22786 |
+
"loss": 1.1851,
|
22787 |
+
"step": 162650
|
22788 |
+
},
|
22789 |
+
{
|
22790 |
+
"epoch": 4.857731466276535,
|
22791 |
+
"grad_norm": 3.7399301528930664,
|
22792 |
+
"learning_rate": 0.00015351573437562502,
|
22793 |
+
"loss": 1.2155,
|
22794 |
+
"step": 162700
|
22795 |
+
},
|
22796 |
+
{
|
22797 |
+
"epoch": 4.859224315528618,
|
22798 |
+
"grad_norm": 4.139153480529785,
|
22799 |
+
"learning_rate": 0.00015350144857970618,
|
22800 |
+
"loss": 1.2233,
|
22801 |
+
"step": 162750
|
22802 |
+
},
|
22803 |
+
{
|
22804 |
+
"epoch": 4.8607171647807,
|
22805 |
+
"grad_norm": 4.756731986999512,
|
22806 |
+
"learning_rate": 0.00015348716278378735,
|
22807 |
+
"loss": 1.2671,
|
22808 |
+
"step": 162800
|
22809 |
+
},
|
22810 |
+
{
|
22811 |
+
"epoch": 4.862210014032783,
|
22812 |
+
"grad_norm": 4.127963542938232,
|
22813 |
+
"learning_rate": 0.0001534728769878685,
|
22814 |
+
"loss": 1.2087,
|
22815 |
+
"step": 162850
|
22816 |
+
},
|
22817 |
+
{
|
22818 |
+
"epoch": 4.863702863284866,
|
22819 |
+
"grad_norm": 5.56052303314209,
|
22820 |
+
"learning_rate": 0.00015345859119194968,
|
22821 |
+
"loss": 1.2257,
|
22822 |
+
"step": 162900
|
22823 |
+
},
|
22824 |
+
{
|
22825 |
+
"epoch": 4.8651957125369485,
|
22826 |
+
"grad_norm": 6.622554302215576,
|
22827 |
+
"learning_rate": 0.00015344430539603084,
|
22828 |
+
"loss": 1.2274,
|
22829 |
+
"step": 162950
|
22830 |
+
},
|
22831 |
+
{
|
22832 |
+
"epoch": 4.86668856178903,
|
22833 |
+
"grad_norm": 3.089730739593506,
|
22834 |
+
"learning_rate": 0.000153430019600112,
|
22835 |
+
"loss": 1.2615,
|
22836 |
+
"step": 163000
|
22837 |
+
},
|
22838 |
+
{
|
22839 |
+
"epoch": 4.868181411041113,
|
22840 |
+
"grad_norm": 5.314934253692627,
|
22841 |
+
"learning_rate": 0.0001534157338041932,
|
22842 |
+
"loss": 1.2035,
|
22843 |
+
"step": 163050
|
22844 |
+
},
|
22845 |
+
{
|
22846 |
+
"epoch": 4.869674260293196,
|
22847 |
+
"grad_norm": 4.352352619171143,
|
22848 |
+
"learning_rate": 0.00015340144800827434,
|
22849 |
+
"loss": 1.2113,
|
22850 |
+
"step": 163100
|
22851 |
+
},
|
22852 |
+
{
|
22853 |
+
"epoch": 4.871167109545278,
|
22854 |
+
"grad_norm": 4.171778202056885,
|
22855 |
+
"learning_rate": 0.00015338716221235553,
|
22856 |
+
"loss": 1.1977,
|
22857 |
+
"step": 163150
|
22858 |
+
},
|
22859 |
+
{
|
22860 |
+
"epoch": 4.87265995879736,
|
22861 |
+
"grad_norm": 3.6624767780303955,
|
22862 |
+
"learning_rate": 0.00015337287641643666,
|
22863 |
+
"loss": 1.1419,
|
22864 |
+
"step": 163200
|
22865 |
+
},
|
22866 |
+
{
|
22867 |
+
"epoch": 4.874152808049443,
|
22868 |
+
"grad_norm": 3.778160810470581,
|
22869 |
+
"learning_rate": 0.00015335859062051783,
|
22870 |
+
"loss": 1.257,
|
22871 |
+
"step": 163250
|
22872 |
+
},
|
22873 |
+
{
|
22874 |
+
"epoch": 4.875645657301526,
|
22875 |
+
"grad_norm": 4.410187244415283,
|
22876 |
+
"learning_rate": 0.000153344304824599,
|
22877 |
+
"loss": 1.1846,
|
22878 |
+
"step": 163300
|
22879 |
+
},
|
22880 |
+
{
|
22881 |
+
"epoch": 4.877138506553608,
|
22882 |
+
"grad_norm": 4.407317161560059,
|
22883 |
+
"learning_rate": 0.00015333001902868016,
|
22884 |
+
"loss": 1.2594,
|
22885 |
+
"step": 163350
|
22886 |
+
},
|
22887 |
+
{
|
22888 |
+
"epoch": 4.878631355805691,
|
22889 |
+
"grad_norm": 3.622882843017578,
|
22890 |
+
"learning_rate": 0.00015331573323276135,
|
22891 |
+
"loss": 1.1928,
|
22892 |
+
"step": 163400
|
22893 |
+
},
|
22894 |
+
{
|
22895 |
+
"epoch": 4.880124205057773,
|
22896 |
+
"grad_norm": 4.268970012664795,
|
22897 |
+
"learning_rate": 0.0001533014474368425,
|
22898 |
+
"loss": 1.2575,
|
22899 |
+
"step": 163450
|
22900 |
+
},
|
22901 |
+
{
|
22902 |
+
"epoch": 4.881617054309856,
|
22903 |
+
"grad_norm": 5.875527381896973,
|
22904 |
+
"learning_rate": 0.00015328716164092368,
|
22905 |
+
"loss": 1.1402,
|
22906 |
+
"step": 163500
|
22907 |
+
},
|
22908 |
+
{
|
22909 |
+
"epoch": 4.883109903561938,
|
22910 |
+
"grad_norm": 4.78771448135376,
|
22911 |
+
"learning_rate": 0.00015327287584500482,
|
22912 |
+
"loss": 1.2003,
|
22913 |
+
"step": 163550
|
22914 |
+
},
|
22915 |
+
{
|
22916 |
+
"epoch": 4.884602752814021,
|
22917 |
+
"grad_norm": 4.128746509552002,
|
22918 |
+
"learning_rate": 0.000153258590049086,
|
22919 |
+
"loss": 1.1964,
|
22920 |
+
"step": 163600
|
22921 |
+
},
|
22922 |
+
{
|
22923 |
+
"epoch": 4.886095602066104,
|
22924 |
+
"grad_norm": 2.846057176589966,
|
22925 |
+
"learning_rate": 0.00015324430425316715,
|
22926 |
+
"loss": 1.2154,
|
22927 |
+
"step": 163650
|
22928 |
+
},
|
22929 |
+
{
|
22930 |
+
"epoch": 4.8875884513181855,
|
22931 |
+
"grad_norm": 3.893822193145752,
|
22932 |
+
"learning_rate": 0.00015323001845724834,
|
22933 |
+
"loss": 1.1727,
|
22934 |
+
"step": 163700
|
22935 |
+
},
|
22936 |
+
{
|
22937 |
+
"epoch": 4.889081300570268,
|
22938 |
+
"grad_norm": 3.671567916870117,
|
22939 |
+
"learning_rate": 0.0001532157326613295,
|
22940 |
+
"loss": 1.247,
|
22941 |
+
"step": 163750
|
22942 |
+
},
|
22943 |
+
{
|
22944 |
+
"epoch": 4.890574149822351,
|
22945 |
+
"grad_norm": 4.6587066650390625,
|
22946 |
+
"learning_rate": 0.00015320144686541067,
|
22947 |
+
"loss": 1.2278,
|
22948 |
+
"step": 163800
|
22949 |
+
},
|
22950 |
+
{
|
22951 |
+
"epoch": 4.892066999074434,
|
22952 |
+
"grad_norm": 4.010086536407471,
|
22953 |
+
"learning_rate": 0.00015318716106949183,
|
22954 |
+
"loss": 1.2153,
|
22955 |
+
"step": 163850
|
22956 |
+
},
|
22957 |
+
{
|
22958 |
+
"epoch": 4.893559848326516,
|
22959 |
+
"grad_norm": 3.343904495239258,
|
22960 |
+
"learning_rate": 0.000153172875273573,
|
22961 |
+
"loss": 1.1807,
|
22962 |
+
"step": 163900
|
22963 |
+
},
|
22964 |
+
{
|
22965 |
+
"epoch": 4.895052697578598,
|
22966 |
+
"grad_norm": 4.4615702629089355,
|
22967 |
+
"learning_rate": 0.00015315858947765416,
|
22968 |
+
"loss": 1.1961,
|
22969 |
+
"step": 163950
|
22970 |
+
},
|
22971 |
+
{
|
22972 |
+
"epoch": 4.896545546830681,
|
22973 |
+
"grad_norm": 4.361425399780273,
|
22974 |
+
"learning_rate": 0.00015314430368173533,
|
22975 |
+
"loss": 1.2436,
|
22976 |
+
"step": 164000
|
22977 |
+
},
|
22978 |
+
{
|
22979 |
+
"epoch": 4.898038396082764,
|
22980 |
+
"grad_norm": 4.161865234375,
|
22981 |
+
"learning_rate": 0.0001531300178858165,
|
22982 |
+
"loss": 1.2104,
|
22983 |
+
"step": 164050
|
22984 |
+
},
|
22985 |
+
{
|
22986 |
+
"epoch": 4.899531245334846,
|
22987 |
+
"grad_norm": 5.719996929168701,
|
22988 |
+
"learning_rate": 0.00015311573208989766,
|
22989 |
+
"loss": 1.2363,
|
22990 |
+
"step": 164100
|
22991 |
+
},
|
22992 |
+
{
|
22993 |
+
"epoch": 4.901024094586928,
|
22994 |
+
"grad_norm": 4.253158092498779,
|
22995 |
+
"learning_rate": 0.00015310144629397882,
|
22996 |
+
"loss": 1.2513,
|
22997 |
+
"step": 164150
|
22998 |
+
},
|
22999 |
+
{
|
23000 |
+
"epoch": 4.902516943839011,
|
23001 |
+
"grad_norm": 2.575803518295288,
|
23002 |
+
"learning_rate": 0.00015308716049806,
|
23003 |
+
"loss": 1.1488,
|
23004 |
+
"step": 164200
|
23005 |
+
},
|
23006 |
+
{
|
23007 |
+
"epoch": 4.9040097930910935,
|
23008 |
+
"grad_norm": 6.142227649688721,
|
23009 |
+
"learning_rate": 0.00015307287470214115,
|
23010 |
+
"loss": 1.2197,
|
23011 |
+
"step": 164250
|
23012 |
+
},
|
23013 |
+
{
|
23014 |
+
"epoch": 4.905502642343176,
|
23015 |
+
"grad_norm": 5.662699222564697,
|
23016 |
+
"learning_rate": 0.00015305858890622234,
|
23017 |
+
"loss": 1.2282,
|
23018 |
+
"step": 164300
|
23019 |
+
},
|
23020 |
+
{
|
23021 |
+
"epoch": 4.906995491595259,
|
23022 |
+
"grad_norm": 4.345313549041748,
|
23023 |
+
"learning_rate": 0.00015304430311030348,
|
23024 |
+
"loss": 1.263,
|
23025 |
+
"step": 164350
|
23026 |
+
},
|
23027 |
+
{
|
23028 |
+
"epoch": 4.908488340847342,
|
23029 |
+
"grad_norm": 4.106235980987549,
|
23030 |
+
"learning_rate": 0.00015303001731438467,
|
23031 |
+
"loss": 1.2568,
|
23032 |
+
"step": 164400
|
23033 |
+
},
|
23034 |
+
{
|
23035 |
+
"epoch": 4.9099811900994235,
|
23036 |
+
"grad_norm": 5.795431613922119,
|
23037 |
+
"learning_rate": 0.0001530157315184658,
|
23038 |
+
"loss": 1.1668,
|
23039 |
+
"step": 164450
|
23040 |
+
},
|
23041 |
+
{
|
23042 |
+
"epoch": 4.911474039351506,
|
23043 |
+
"grad_norm": 4.605996608734131,
|
23044 |
+
"learning_rate": 0.000153001445722547,
|
23045 |
+
"loss": 1.2513,
|
23046 |
+
"step": 164500
|
23047 |
+
},
|
23048 |
+
{
|
23049 |
+
"epoch": 4.912966888603589,
|
23050 |
+
"grad_norm": 6.11320686340332,
|
23051 |
+
"learning_rate": 0.00015298715992662816,
|
23052 |
+
"loss": 1.1922,
|
23053 |
+
"step": 164550
|
23054 |
+
},
|
23055 |
+
{
|
23056 |
+
"epoch": 4.914459737855672,
|
23057 |
+
"grad_norm": 6.641072750091553,
|
23058 |
+
"learning_rate": 0.00015297287413070933,
|
23059 |
+
"loss": 1.2272,
|
23060 |
+
"step": 164600
|
23061 |
+
},
|
23062 |
+
{
|
23063 |
+
"epoch": 4.915952587107753,
|
23064 |
+
"grad_norm": 3.955686092376709,
|
23065 |
+
"learning_rate": 0.0001529585883347905,
|
23066 |
+
"loss": 1.2089,
|
23067 |
+
"step": 164650
|
23068 |
+
},
|
23069 |
+
{
|
23070 |
+
"epoch": 4.917445436359836,
|
23071 |
+
"grad_norm": 3.445519208908081,
|
23072 |
+
"learning_rate": 0.00015294430253887166,
|
23073 |
+
"loss": 1.2013,
|
23074 |
+
"step": 164700
|
23075 |
+
},
|
23076 |
+
{
|
23077 |
+
"epoch": 4.918938285611919,
|
23078 |
+
"grad_norm": 5.487844944000244,
|
23079 |
+
"learning_rate": 0.00015293001674295282,
|
23080 |
+
"loss": 1.2818,
|
23081 |
+
"step": 164750
|
23082 |
+
},
|
23083 |
+
{
|
23084 |
+
"epoch": 4.9204311348640015,
|
23085 |
+
"grad_norm": 5.098935127258301,
|
23086 |
+
"learning_rate": 0.000152915730947034,
|
23087 |
+
"loss": 1.2195,
|
23088 |
+
"step": 164800
|
23089 |
+
},
|
23090 |
+
{
|
23091 |
+
"epoch": 4.921923984116084,
|
23092 |
+
"grad_norm": 3.2200589179992676,
|
23093 |
+
"learning_rate": 0.00015290144515111515,
|
23094 |
+
"loss": 1.2425,
|
23095 |
+
"step": 164850
|
23096 |
+
},
|
23097 |
+
{
|
23098 |
+
"epoch": 4.923416833368167,
|
23099 |
+
"grad_norm": 5.7381205558776855,
|
23100 |
+
"learning_rate": 0.00015288715935519632,
|
23101 |
+
"loss": 1.2012,
|
23102 |
+
"step": 164900
|
23103 |
+
},
|
23104 |
+
{
|
23105 |
+
"epoch": 4.924909682620249,
|
23106 |
+
"grad_norm": 4.34859037399292,
|
23107 |
+
"learning_rate": 0.00015287287355927748,
|
23108 |
+
"loss": 1.2595,
|
23109 |
+
"step": 164950
|
23110 |
+
},
|
23111 |
+
{
|
23112 |
+
"epoch": 4.9264025318723315,
|
23113 |
+
"grad_norm": 4.303558349609375,
|
23114 |
+
"learning_rate": 0.00015285858776335867,
|
23115 |
+
"loss": 1.2802,
|
23116 |
+
"step": 165000
|
23117 |
+
},
|
23118 |
+
{
|
23119 |
+
"epoch": 4.927895381124414,
|
23120 |
+
"grad_norm": 3.9082958698272705,
|
23121 |
+
"learning_rate": 0.0001528443019674398,
|
23122 |
+
"loss": 1.2252,
|
23123 |
+
"step": 165050
|
23124 |
+
},
|
23125 |
+
{
|
23126 |
+
"epoch": 4.929388230376497,
|
23127 |
+
"grad_norm": 3.7715795040130615,
|
23128 |
+
"learning_rate": 0.000152830016171521,
|
23129 |
+
"loss": 1.2849,
|
23130 |
+
"step": 165100
|
23131 |
+
},
|
23132 |
+
{
|
23133 |
+
"epoch": 4.930881079628579,
|
23134 |
+
"grad_norm": 5.810252666473389,
|
23135 |
+
"learning_rate": 0.00015281573037560214,
|
23136 |
+
"loss": 1.1903,
|
23137 |
+
"step": 165150
|
23138 |
+
},
|
23139 |
+
{
|
23140 |
+
"epoch": 4.932373928880661,
|
23141 |
+
"grad_norm": 4.941216468811035,
|
23142 |
+
"learning_rate": 0.00015280144457968333,
|
23143 |
+
"loss": 1.199,
|
23144 |
+
"step": 165200
|
23145 |
+
},
|
23146 |
+
{
|
23147 |
+
"epoch": 4.933866778132744,
|
23148 |
+
"grad_norm": 4.764664173126221,
|
23149 |
+
"learning_rate": 0.0001527871587837645,
|
23150 |
+
"loss": 1.2358,
|
23151 |
+
"step": 165250
|
23152 |
+
},
|
23153 |
+
{
|
23154 |
+
"epoch": 4.935359627384827,
|
23155 |
+
"grad_norm": 4.884801387786865,
|
23156 |
+
"learning_rate": 0.00015277287298784566,
|
23157 |
+
"loss": 1.168,
|
23158 |
+
"step": 165300
|
23159 |
+
},
|
23160 |
+
{
|
23161 |
+
"epoch": 4.9368524766369095,
|
23162 |
+
"grad_norm": 4.915900230407715,
|
23163 |
+
"learning_rate": 0.00015275858719192683,
|
23164 |
+
"loss": 1.2395,
|
23165 |
+
"step": 165350
|
23166 |
+
},
|
23167 |
+
{
|
23168 |
+
"epoch": 4.938345325888992,
|
23169 |
+
"grad_norm": 4.347561836242676,
|
23170 |
+
"learning_rate": 0.000152744301396008,
|
23171 |
+
"loss": 1.2134,
|
23172 |
+
"step": 165400
|
23173 |
+
},
|
23174 |
+
{
|
23175 |
+
"epoch": 4.939838175141074,
|
23176 |
+
"grad_norm": 5.786562919616699,
|
23177 |
+
"learning_rate": 0.00015273001560008916,
|
23178 |
+
"loss": 1.2413,
|
23179 |
+
"step": 165450
|
23180 |
+
},
|
23181 |
+
{
|
23182 |
+
"epoch": 4.941331024393157,
|
23183 |
+
"grad_norm": 4.780857086181641,
|
23184 |
+
"learning_rate": 0.00015271572980417032,
|
23185 |
+
"loss": 1.2336,
|
23186 |
+
"step": 165500
|
23187 |
+
},
|
23188 |
+
{
|
23189 |
+
"epoch": 4.9428238736452395,
|
23190 |
+
"grad_norm": 4.805835723876953,
|
23191 |
+
"learning_rate": 0.00015270144400825148,
|
23192 |
+
"loss": 1.2464,
|
23193 |
+
"step": 165550
|
23194 |
+
},
|
23195 |
+
{
|
23196 |
+
"epoch": 4.944316722897322,
|
23197 |
+
"grad_norm": 4.181761264801025,
|
23198 |
+
"learning_rate": 0.00015268715821233265,
|
23199 |
+
"loss": 1.2486,
|
23200 |
+
"step": 165600
|
23201 |
+
},
|
23202 |
+
{
|
23203 |
+
"epoch": 4.945809572149404,
|
23204 |
+
"grad_norm": 6.043191432952881,
|
23205 |
+
"learning_rate": 0.00015267287241641381,
|
23206 |
+
"loss": 1.2485,
|
23207 |
+
"step": 165650
|
23208 |
+
},
|
23209 |
+
{
|
23210 |
+
"epoch": 4.947302421401487,
|
23211 |
+
"grad_norm": 3.7125275135040283,
|
23212 |
+
"learning_rate": 0.00015265858662049498,
|
23213 |
+
"loss": 1.2216,
|
23214 |
+
"step": 165700
|
23215 |
+
},
|
23216 |
+
{
|
23217 |
+
"epoch": 4.948795270653569,
|
23218 |
+
"grad_norm": 4.299423694610596,
|
23219 |
+
"learning_rate": 0.00015264430082457614,
|
23220 |
+
"loss": 1.1784,
|
23221 |
+
"step": 165750
|
23222 |
+
},
|
23223 |
+
{
|
23224 |
+
"epoch": 4.950288119905652,
|
23225 |
+
"grad_norm": 3.839174270629883,
|
23226 |
+
"learning_rate": 0.00015263001502865734,
|
23227 |
+
"loss": 1.2377,
|
23228 |
+
"step": 165800
|
23229 |
+
},
|
23230 |
+
{
|
23231 |
+
"epoch": 4.951780969157735,
|
23232 |
+
"grad_norm": 4.8078837394714355,
|
23233 |
+
"learning_rate": 0.00015261572923273847,
|
23234 |
+
"loss": 1.2435,
|
23235 |
+
"step": 165850
|
23236 |
+
},
|
23237 |
+
{
|
23238 |
+
"epoch": 4.953273818409817,
|
23239 |
+
"grad_norm": 4.367580413818359,
|
23240 |
+
"learning_rate": 0.00015260144343681964,
|
23241 |
+
"loss": 1.1683,
|
23242 |
+
"step": 165900
|
23243 |
+
},
|
23244 |
+
{
|
23245 |
+
"epoch": 4.954766667661899,
|
23246 |
+
"grad_norm": 4.1319756507873535,
|
23247 |
+
"learning_rate": 0.0001525871576409008,
|
23248 |
+
"loss": 1.259,
|
23249 |
+
"step": 165950
|
23250 |
+
},
|
23251 |
+
{
|
23252 |
+
"epoch": 4.956259516913982,
|
23253 |
+
"grad_norm": 4.9268293380737305,
|
23254 |
+
"learning_rate": 0.00015257287184498197,
|
23255 |
+
"loss": 1.1774,
|
23256 |
+
"step": 166000
|
23257 |
+
},
|
23258 |
+
{
|
23259 |
+
"epoch": 4.957752366166065,
|
23260 |
+
"grad_norm": 3.851490020751953,
|
23261 |
+
"learning_rate": 0.00015255858604906316,
|
23262 |
+
"loss": 1.2051,
|
23263 |
+
"step": 166050
|
23264 |
+
},
|
23265 |
+
{
|
23266 |
+
"epoch": 4.9592452154181474,
|
23267 |
+
"grad_norm": 6.583017826080322,
|
23268 |
+
"learning_rate": 0.0001525443002531443,
|
23269 |
+
"loss": 1.2744,
|
23270 |
+
"step": 166100
|
23271 |
+
},
|
23272 |
+
{
|
23273 |
+
"epoch": 4.960738064670229,
|
23274 |
+
"grad_norm": 3.7507100105285645,
|
23275 |
+
"learning_rate": 0.0001525300144572255,
|
23276 |
+
"loss": 1.1927,
|
23277 |
+
"step": 166150
|
23278 |
+
},
|
23279 |
+
{
|
23280 |
+
"epoch": 4.962230913922312,
|
23281 |
+
"grad_norm": 5.387413501739502,
|
23282 |
+
"learning_rate": 0.00015251572866130663,
|
23283 |
+
"loss": 1.203,
|
23284 |
+
"step": 166200
|
23285 |
+
},
|
23286 |
+
{
|
23287 |
+
"epoch": 4.963723763174395,
|
23288 |
+
"grad_norm": 3.7446823120117188,
|
23289 |
+
"learning_rate": 0.00015250144286538782,
|
23290 |
+
"loss": 1.2062,
|
23291 |
+
"step": 166250
|
23292 |
+
},
|
23293 |
+
{
|
23294 |
+
"epoch": 4.965216612426477,
|
23295 |
+
"grad_norm": 4.162785053253174,
|
23296 |
+
"learning_rate": 0.00015248715706946895,
|
23297 |
+
"loss": 1.1969,
|
23298 |
+
"step": 166300
|
23299 |
+
},
|
23300 |
+
{
|
23301 |
+
"epoch": 4.96670946167856,
|
23302 |
+
"grad_norm": 4.15479040145874,
|
23303 |
+
"learning_rate": 0.00015247287127355015,
|
23304 |
+
"loss": 1.2164,
|
23305 |
+
"step": 166350
|
23306 |
+
},
|
23307 |
+
{
|
23308 |
+
"epoch": 4.968202310930642,
|
23309 |
+
"grad_norm": 4.103176593780518,
|
23310 |
+
"learning_rate": 0.0001524585854776313,
|
23311 |
+
"loss": 1.2318,
|
23312 |
+
"step": 166400
|
23313 |
+
},
|
23314 |
+
{
|
23315 |
+
"epoch": 4.969695160182725,
|
23316 |
+
"grad_norm": 4.903412818908691,
|
23317 |
+
"learning_rate": 0.00015244429968171248,
|
23318 |
+
"loss": 1.1984,
|
23319 |
+
"step": 166450
|
23320 |
+
},
|
23321 |
+
{
|
23322 |
+
"epoch": 4.971188009434807,
|
23323 |
+
"grad_norm": 5.026261806488037,
|
23324 |
+
"learning_rate": 0.00015243001388579364,
|
23325 |
+
"loss": 1.2472,
|
23326 |
+
"step": 166500
|
23327 |
+
},
|
23328 |
+
{
|
23329 |
+
"epoch": 4.97268085868689,
|
23330 |
+
"grad_norm": 4.777016639709473,
|
23331 |
+
"learning_rate": 0.0001524157280898748,
|
23332 |
+
"loss": 1.2363,
|
23333 |
+
"step": 166550
|
23334 |
+
},
|
23335 |
+
{
|
23336 |
+
"epoch": 4.974173707938972,
|
23337 |
+
"grad_norm": 3.636580467224121,
|
23338 |
+
"learning_rate": 0.00015240144229395597,
|
23339 |
+
"loss": 1.2543,
|
23340 |
+
"step": 166600
|
23341 |
+
},
|
23342 |
+
{
|
23343 |
+
"epoch": 4.9756665571910546,
|
23344 |
+
"grad_norm": 3.8117880821228027,
|
23345 |
+
"learning_rate": 0.00015238715649803713,
|
23346 |
+
"loss": 1.1817,
|
23347 |
+
"step": 166650
|
23348 |
+
},
|
23349 |
+
{
|
23350 |
+
"epoch": 4.977159406443137,
|
23351 |
+
"grad_norm": 5.417634963989258,
|
23352 |
+
"learning_rate": 0.0001523728707021183,
|
23353 |
+
"loss": 1.2251,
|
23354 |
+
"step": 166700
|
23355 |
+
},
|
23356 |
+
{
|
23357 |
+
"epoch": 4.97865225569522,
|
23358 |
+
"grad_norm": 4.955143451690674,
|
23359 |
+
"learning_rate": 0.00015235858490619946,
|
23360 |
+
"loss": 1.2164,
|
23361 |
+
"step": 166750
|
23362 |
+
},
|
23363 |
+
{
|
23364 |
+
"epoch": 4.980145104947303,
|
23365 |
+
"grad_norm": 4.069868564605713,
|
23366 |
+
"learning_rate": 0.00015234429911028063,
|
23367 |
+
"loss": 1.2496,
|
23368 |
+
"step": 166800
|
23369 |
+
},
|
23370 |
+
{
|
23371 |
+
"epoch": 4.981637954199385,
|
23372 |
+
"grad_norm": 3.862034320831299,
|
23373 |
+
"learning_rate": 0.00015233001331436182,
|
23374 |
+
"loss": 1.19,
|
23375 |
+
"step": 166850
|
23376 |
+
},
|
23377 |
+
{
|
23378 |
+
"epoch": 4.983130803451467,
|
23379 |
+
"grad_norm": 5.259671211242676,
|
23380 |
+
"learning_rate": 0.00015231572751844296,
|
23381 |
+
"loss": 1.216,
|
23382 |
+
"step": 166900
|
23383 |
+
},
|
23384 |
+
{
|
23385 |
+
"epoch": 4.98462365270355,
|
23386 |
+
"grad_norm": 4.1818742752075195,
|
23387 |
+
"learning_rate": 0.00015230144172252415,
|
23388 |
+
"loss": 1.2089,
|
23389 |
+
"step": 166950
|
23390 |
+
},
|
23391 |
+
{
|
23392 |
+
"epoch": 4.986116501955633,
|
23393 |
+
"grad_norm": 4.551473617553711,
|
23394 |
+
"learning_rate": 0.0001522871559266053,
|
23395 |
+
"loss": 1.2218,
|
23396 |
+
"step": 167000
|
23397 |
+
},
|
23398 |
+
{
|
23399 |
+
"epoch": 4.987609351207715,
|
23400 |
+
"grad_norm": 4.714193820953369,
|
23401 |
+
"learning_rate": 0.00015227287013068648,
|
23402 |
+
"loss": 1.2117,
|
23403 |
+
"step": 167050
|
23404 |
+
},
|
23405 |
+
{
|
23406 |
+
"epoch": 4.989102200459797,
|
23407 |
+
"grad_norm": 5.234292030334473,
|
23408 |
+
"learning_rate": 0.00015225858433476762,
|
23409 |
+
"loss": 1.1861,
|
23410 |
+
"step": 167100
|
23411 |
+
},
|
23412 |
+
{
|
23413 |
+
"epoch": 4.99059504971188,
|
23414 |
+
"grad_norm": 3.052459239959717,
|
23415 |
+
"learning_rate": 0.0001522442985388488,
|
23416 |
+
"loss": 1.1598,
|
23417 |
+
"step": 167150
|
23418 |
+
},
|
23419 |
+
{
|
23420 |
+
"epoch": 4.9920878989639625,
|
23421 |
+
"grad_norm": 4.330234527587891,
|
23422 |
+
"learning_rate": 0.00015223001274292997,
|
23423 |
+
"loss": 1.2177,
|
23424 |
+
"step": 167200
|
23425 |
+
},
|
23426 |
+
{
|
23427 |
+
"epoch": 4.993580748216045,
|
23428 |
+
"grad_norm": 4.3281636238098145,
|
23429 |
+
"learning_rate": 0.00015221572694701114,
|
23430 |
+
"loss": 1.291,
|
23431 |
+
"step": 167250
|
23432 |
+
},
|
23433 |
+
{
|
23434 |
+
"epoch": 4.995073597468128,
|
23435 |
+
"grad_norm": 4.501579761505127,
|
23436 |
+
"learning_rate": 0.0001522014411510923,
|
23437 |
+
"loss": 1.215,
|
23438 |
+
"step": 167300
|
23439 |
+
},
|
23440 |
+
{
|
23441 |
+
"epoch": 4.996566446720211,
|
23442 |
+
"grad_norm": 3.957705497741699,
|
23443 |
+
"learning_rate": 0.00015218715535517347,
|
23444 |
+
"loss": 1.2056,
|
23445 |
+
"step": 167350
|
23446 |
+
},
|
23447 |
+
{
|
23448 |
+
"epoch": 4.9980592959722925,
|
23449 |
+
"grad_norm": 3.6151764392852783,
|
23450 |
+
"learning_rate": 0.00015217286955925463,
|
23451 |
+
"loss": 1.2531,
|
23452 |
+
"step": 167400
|
23453 |
+
},
|
23454 |
+
{
|
23455 |
+
"epoch": 4.999552145224375,
|
23456 |
+
"grad_norm": 5.759849548339844,
|
23457 |
+
"learning_rate": 0.0001521585837633358,
|
23458 |
+
"loss": 1.233,
|
23459 |
+
"step": 167450
|
23460 |
+
},
|
23461 |
+
{
|
23462 |
+
"epoch": 5.001044994476458,
|
23463 |
+
"grad_norm": 4.063636779785156,
|
23464 |
+
"learning_rate": 0.00015214429796741696,
|
23465 |
+
"loss": 1.0927,
|
23466 |
+
"step": 167500
|
23467 |
+
},
|
23468 |
+
{
|
23469 |
+
"epoch": 5.002537843728541,
|
23470 |
+
"grad_norm": 5.337904930114746,
|
23471 |
+
"learning_rate": 0.00015213001217149813,
|
23472 |
+
"loss": 1.0732,
|
23473 |
+
"step": 167550
|
23474 |
+
},
|
23475 |
+
{
|
23476 |
+
"epoch": 5.004030692980622,
|
23477 |
+
"grad_norm": 3.7725415229797363,
|
23478 |
+
"learning_rate": 0.0001521157263755793,
|
23479 |
+
"loss": 1.0745,
|
23480 |
+
"step": 167600
|
23481 |
+
},
|
23482 |
+
{
|
23483 |
+
"epoch": 5.005523542232705,
|
23484 |
+
"grad_norm": 5.607946395874023,
|
23485 |
+
"learning_rate": 0.00015210144057966048,
|
23486 |
+
"loss": 1.0961,
|
23487 |
+
"step": 167650
|
23488 |
+
},
|
23489 |
+
{
|
23490 |
+
"epoch": 5.007016391484788,
|
23491 |
+
"grad_norm": 5.750009059906006,
|
23492 |
+
"learning_rate": 0.00015208715478374162,
|
23493 |
+
"loss": 1.1483,
|
23494 |
+
"step": 167700
|
23495 |
+
},
|
23496 |
+
{
|
23497 |
+
"epoch": 5.0085092407368705,
|
23498 |
+
"grad_norm": 4.2717180252075195,
|
23499 |
+
"learning_rate": 0.0001520728689878228,
|
23500 |
+
"loss": 1.1728,
|
23501 |
+
"step": 167750
|
23502 |
+
},
|
23503 |
+
{
|
23504 |
+
"epoch": 5.010002089988953,
|
23505 |
+
"grad_norm": 4.048334121704102,
|
23506 |
+
"learning_rate": 0.00015205858319190395,
|
23507 |
+
"loss": 1.1091,
|
23508 |
+
"step": 167800
|
23509 |
+
},
|
23510 |
+
{
|
23511 |
+
"epoch": 5.011494939241035,
|
23512 |
+
"grad_norm": 4.57917594909668,
|
23513 |
+
"learning_rate": 0.00015204429739598514,
|
23514 |
+
"loss": 1.1404,
|
23515 |
+
"step": 167850
|
23516 |
+
},
|
23517 |
+
{
|
23518 |
+
"epoch": 5.012987788493118,
|
23519 |
+
"grad_norm": 5.619372367858887,
|
23520 |
+
"learning_rate": 0.00015203001160006628,
|
23521 |
+
"loss": 1.1803,
|
23522 |
+
"step": 167900
|
23523 |
+
},
|
23524 |
+
{
|
23525 |
+
"epoch": 5.0144806377452005,
|
23526 |
+
"grad_norm": 4.788344383239746,
|
23527 |
+
"learning_rate": 0.00015201572580414747,
|
23528 |
+
"loss": 1.1131,
|
23529 |
+
"step": 167950
|
23530 |
+
},
|
23531 |
+
{
|
23532 |
+
"epoch": 5.015973486997283,
|
23533 |
+
"grad_norm": 4.1209588050842285,
|
23534 |
+
"learning_rate": 0.00015200144000822863,
|
23535 |
+
"loss": 1.108,
|
23536 |
+
"step": 168000
|
23537 |
+
},
|
23538 |
+
{
|
23539 |
+
"epoch": 5.017466336249366,
|
23540 |
+
"grad_norm": 3.88391375541687,
|
23541 |
+
"learning_rate": 0.0001519871542123098,
|
23542 |
+
"loss": 1.1692,
|
23543 |
+
"step": 168050
|
23544 |
+
},
|
23545 |
+
{
|
23546 |
+
"epoch": 5.018959185501448,
|
23547 |
+
"grad_norm": 5.6544904708862305,
|
23548 |
+
"learning_rate": 0.00015197286841639096,
|
23549 |
+
"loss": 1.1457,
|
23550 |
+
"step": 168100
|
23551 |
+
},
|
23552 |
+
{
|
23553 |
+
"epoch": 5.02045203475353,
|
23554 |
+
"grad_norm": 5.817676544189453,
|
23555 |
+
"learning_rate": 0.00015195858262047213,
|
23556 |
+
"loss": 1.124,
|
23557 |
+
"step": 168150
|
23558 |
+
},
|
23559 |
+
{
|
23560 |
+
"epoch": 5.021944884005613,
|
23561 |
+
"grad_norm": 6.914028644561768,
|
23562 |
+
"learning_rate": 0.0001519442968245533,
|
23563 |
+
"loss": 1.0979,
|
23564 |
+
"step": 168200
|
23565 |
+
},
|
23566 |
+
{
|
23567 |
+
"epoch": 5.023437733257696,
|
23568 |
+
"grad_norm": 4.256136417388916,
|
23569 |
+
"learning_rate": 0.00015193001102863446,
|
23570 |
+
"loss": 1.1524,
|
23571 |
+
"step": 168250
|
23572 |
+
},
|
23573 |
+
{
|
23574 |
+
"epoch": 5.0249305825097785,
|
23575 |
+
"grad_norm": 4.776482582092285,
|
23576 |
+
"learning_rate": 0.00015191572523271562,
|
23577 |
+
"loss": 1.1005,
|
23578 |
+
"step": 168300
|
23579 |
+
},
|
23580 |
+
{
|
23581 |
+
"epoch": 5.02642343176186,
|
23582 |
+
"grad_norm": 4.402430057525635,
|
23583 |
+
"learning_rate": 0.0001519014394367968,
|
23584 |
+
"loss": 1.1295,
|
23585 |
+
"step": 168350
|
23586 |
+
},
|
23587 |
+
{
|
23588 |
+
"epoch": 5.027916281013943,
|
23589 |
+
"grad_norm": 4.203427314758301,
|
23590 |
+
"learning_rate": 0.00015188715364087795,
|
23591 |
+
"loss": 1.1377,
|
23592 |
+
"step": 168400
|
23593 |
+
},
|
23594 |
+
{
|
23595 |
+
"epoch": 5.029409130266026,
|
23596 |
+
"grad_norm": 3.8063766956329346,
|
23597 |
+
"learning_rate": 0.00015187286784495912,
|
23598 |
+
"loss": 1.0963,
|
23599 |
+
"step": 168450
|
23600 |
+
},
|
23601 |
+
{
|
23602 |
+
"epoch": 5.0309019795181085,
|
23603 |
+
"grad_norm": 4.944723129272461,
|
23604 |
+
"learning_rate": 0.00015185858204904028,
|
23605 |
+
"loss": 1.1298,
|
23606 |
+
"step": 168500
|
23607 |
+
},
|
23608 |
+
{
|
23609 |
+
"epoch": 5.032394828770191,
|
23610 |
+
"grad_norm": 4.621240615844727,
|
23611 |
+
"learning_rate": 0.00015184429625312145,
|
23612 |
+
"loss": 1.1597,
|
23613 |
+
"step": 168550
|
23614 |
+
},
|
23615 |
+
{
|
23616 |
+
"epoch": 5.033887678022273,
|
23617 |
+
"grad_norm": 5.244666576385498,
|
23618 |
+
"learning_rate": 0.0001518300104572026,
|
23619 |
+
"loss": 1.1266,
|
23620 |
+
"step": 168600
|
23621 |
+
},
|
23622 |
+
{
|
23623 |
+
"epoch": 5.035380527274356,
|
23624 |
+
"grad_norm": 3.7527828216552734,
|
23625 |
+
"learning_rate": 0.00015181572466128377,
|
23626 |
+
"loss": 1.1254,
|
23627 |
+
"step": 168650
|
23628 |
+
},
|
23629 |
+
{
|
23630 |
+
"epoch": 5.036873376526438,
|
23631 |
+
"grad_norm": 5.852657794952393,
|
23632 |
+
"learning_rate": 0.00015180143886536497,
|
23633 |
+
"loss": 1.1909,
|
23634 |
+
"step": 168700
|
23635 |
+
},
|
23636 |
+
{
|
23637 |
+
"epoch": 5.038366225778521,
|
23638 |
+
"grad_norm": 4.197393417358398,
|
23639 |
+
"learning_rate": 0.0001517871530694461,
|
23640 |
+
"loss": 1.1742,
|
23641 |
+
"step": 168750
|
23642 |
+
},
|
23643 |
+
{
|
23644 |
+
"epoch": 5.039859075030604,
|
23645 |
+
"grad_norm": 4.88790225982666,
|
23646 |
+
"learning_rate": 0.0001517728672735273,
|
23647 |
+
"loss": 1.1273,
|
23648 |
+
"step": 168800
|
23649 |
+
},
|
23650 |
+
{
|
23651 |
+
"epoch": 5.041351924282686,
|
23652 |
+
"grad_norm": 5.1781134605407715,
|
23653 |
+
"learning_rate": 0.00015175858147760843,
|
23654 |
+
"loss": 1.1011,
|
23655 |
+
"step": 168850
|
23656 |
+
},
|
23657 |
+
{
|
23658 |
+
"epoch": 5.042844773534768,
|
23659 |
+
"grad_norm": 6.221542835235596,
|
23660 |
+
"learning_rate": 0.00015174429568168963,
|
23661 |
+
"loss": 1.0775,
|
23662 |
+
"step": 168900
|
23663 |
+
},
|
23664 |
+
{
|
23665 |
+
"epoch": 5.044337622786851,
|
23666 |
+
"grad_norm": 3.557209014892578,
|
23667 |
+
"learning_rate": 0.00015173000988577076,
|
23668 |
+
"loss": 1.1295,
|
23669 |
+
"step": 168950
|
23670 |
+
},
|
23671 |
+
{
|
23672 |
+
"epoch": 5.045830472038934,
|
23673 |
+
"grad_norm": 4.908199310302734,
|
23674 |
+
"learning_rate": 0.00015171572408985195,
|
23675 |
+
"loss": 1.1428,
|
23676 |
+
"step": 169000
|
23677 |
+
},
|
23678 |
+
{
|
23679 |
+
"epoch": 5.0473233212910165,
|
23680 |
+
"grad_norm": 3.2522714138031006,
|
23681 |
+
"learning_rate": 0.00015170143829393312,
|
23682 |
+
"loss": 1.1454,
|
23683 |
+
"step": 169050
|
23684 |
+
},
|
23685 |
+
{
|
23686 |
+
"epoch": 5.048816170543098,
|
23687 |
+
"grad_norm": 4.945195198059082,
|
23688 |
+
"learning_rate": 0.00015168715249801428,
|
23689 |
+
"loss": 1.1541,
|
23690 |
+
"step": 169100
|
23691 |
+
},
|
23692 |
+
{
|
23693 |
+
"epoch": 5.050309019795181,
|
23694 |
+
"grad_norm": 3.7441771030426025,
|
23695 |
+
"learning_rate": 0.00015167286670209545,
|
23696 |
+
"loss": 1.1223,
|
23697 |
+
"step": 169150
|
23698 |
+
},
|
23699 |
+
{
|
23700 |
+
"epoch": 5.051801869047264,
|
23701 |
+
"grad_norm": 4.811500549316406,
|
23702 |
+
"learning_rate": 0.0001516585809061766,
|
23703 |
+
"loss": 1.1861,
|
23704 |
+
"step": 169200
|
23705 |
+
},
|
23706 |
+
{
|
23707 |
+
"epoch": 5.053294718299346,
|
23708 |
+
"grad_norm": 4.175551414489746,
|
23709 |
+
"learning_rate": 0.00015164429511025778,
|
23710 |
+
"loss": 1.1655,
|
23711 |
+
"step": 169250
|
23712 |
+
},
|
23713 |
+
{
|
23714 |
+
"epoch": 5.054787567551428,
|
23715 |
+
"grad_norm": 4.7304205894470215,
|
23716 |
+
"learning_rate": 0.00015163000931433894,
|
23717 |
+
"loss": 1.1238,
|
23718 |
+
"step": 169300
|
23719 |
+
},
|
23720 |
+
{
|
23721 |
+
"epoch": 5.056280416803511,
|
23722 |
+
"grad_norm": 4.252381801605225,
|
23723 |
+
"learning_rate": 0.0001516157235184201,
|
23724 |
+
"loss": 1.1683,
|
23725 |
+
"step": 169350
|
23726 |
+
},
|
23727 |
+
{
|
23728 |
+
"epoch": 5.057773266055594,
|
23729 |
+
"grad_norm": 3.368849039077759,
|
23730 |
+
"learning_rate": 0.00015160143772250127,
|
23731 |
+
"loss": 1.1584,
|
23732 |
+
"step": 169400
|
23733 |
+
},
|
23734 |
+
{
|
23735 |
+
"epoch": 5.059266115307676,
|
23736 |
+
"grad_norm": 4.560303688049316,
|
23737 |
+
"learning_rate": 0.00015158715192658244,
|
23738 |
+
"loss": 1.1668,
|
23739 |
+
"step": 169450
|
23740 |
+
},
|
23741 |
+
{
|
23742 |
+
"epoch": 5.060758964559759,
|
23743 |
+
"grad_norm": 4.084552764892578,
|
23744 |
+
"learning_rate": 0.00015157286613066363,
|
23745 |
+
"loss": 1.1385,
|
23746 |
+
"step": 169500
|
23747 |
+
},
|
23748 |
+
{
|
23749 |
+
"epoch": 5.062251813811841,
|
23750 |
+
"grad_norm": 4.2204060554504395,
|
23751 |
+
"learning_rate": 0.00015155858033474477,
|
23752 |
+
"loss": 1.1486,
|
23753 |
+
"step": 169550
|
23754 |
+
},
|
23755 |
+
{
|
23756 |
+
"epoch": 5.063744663063924,
|
23757 |
+
"grad_norm": 3.872606039047241,
|
23758 |
+
"learning_rate": 0.00015154429453882596,
|
23759 |
+
"loss": 1.1732,
|
23760 |
+
"step": 169600
|
23761 |
+
},
|
23762 |
+
{
|
23763 |
+
"epoch": 5.065237512316006,
|
23764 |
+
"grad_norm": 6.434457302093506,
|
23765 |
+
"learning_rate": 0.0001515300087429071,
|
23766 |
+
"loss": 1.1658,
|
23767 |
+
"step": 169650
|
23768 |
+
},
|
23769 |
+
{
|
23770 |
+
"epoch": 5.066730361568089,
|
23771 |
+
"grad_norm": 4.881868362426758,
|
23772 |
+
"learning_rate": 0.0001515157229469883,
|
23773 |
+
"loss": 1.1769,
|
23774 |
+
"step": 169700
|
23775 |
+
},
|
23776 |
+
{
|
23777 |
+
"epoch": 5.068223210820172,
|
23778 |
+
"grad_norm": 4.622376918792725,
|
23779 |
+
"learning_rate": 0.00015150143715106942,
|
23780 |
+
"loss": 1.0902,
|
23781 |
+
"step": 169750
|
23782 |
+
},
|
23783 |
+
{
|
23784 |
+
"epoch": 5.0697160600722535,
|
23785 |
+
"grad_norm": 3.6144649982452393,
|
23786 |
+
"learning_rate": 0.00015148715135515062,
|
23787 |
+
"loss": 1.1098,
|
23788 |
+
"step": 169800
|
23789 |
+
},
|
23790 |
+
{
|
23791 |
+
"epoch": 5.071208909324336,
|
23792 |
+
"grad_norm": 4.620079517364502,
|
23793 |
+
"learning_rate": 0.00015147286555923178,
|
23794 |
+
"loss": 1.1716,
|
23795 |
+
"step": 169850
|
23796 |
+
},
|
23797 |
+
{
|
23798 |
+
"epoch": 5.072701758576419,
|
23799 |
+
"grad_norm": 4.734360694885254,
|
23800 |
+
"learning_rate": 0.00015145857976331295,
|
23801 |
+
"loss": 1.1539,
|
23802 |
+
"step": 169900
|
23803 |
+
},
|
23804 |
+
{
|
23805 |
+
"epoch": 5.074194607828502,
|
23806 |
+
"grad_norm": 4.301705360412598,
|
23807 |
+
"learning_rate": 0.0001514442939673941,
|
23808 |
+
"loss": 1.1533,
|
23809 |
+
"step": 169950
|
23810 |
+
},
|
23811 |
+
{
|
23812 |
+
"epoch": 5.075687457080584,
|
23813 |
+
"grad_norm": 5.320747375488281,
|
23814 |
+
"learning_rate": 0.00015143000817147527,
|
23815 |
+
"loss": 1.1233,
|
23816 |
+
"step": 170000
|
23817 |
}
|
23818 |
],
|
23819 |
"logging_steps": 50,
|
|
|
23833 |
"attributes": {}
|
23834 |
}
|
23835 |
},
|
23836 |
+
"total_flos": 4.2969393603480125e+18,
|
23837 |
"train_batch_size": 2,
|
23838 |
"trial_name": null,
|
23839 |
"trial_params": null
|