Training in progress, step 70000, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step70000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step70000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/trainer_state.json +1403 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 42002584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79f20513fcff79f2b0c92ad2377c2fc7b68aa74ce9d28e0ccfad53373cfc3336
|
3 |
size 42002584
|
last-checkpoint/global_step70000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdb5106e67302f2928be9d34b098b742057fdbcf2c739a3dee2a672bb3adf23e
|
3 |
+
size 251710672
|
last-checkpoint/global_step70000/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:765ed271ca2feb9ea662abbd68d3eda1f76642cc3e280e073ac3b2e499473da0
|
3 |
+
size 153747385
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step70000
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04933d8412a91ea6a45ca1451652f089c0380276afbe0f51f95ee48cf969406f
|
3 |
size 14244
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8414,6 +8414,1406 @@
|
|
8414 |
"learning_rate": 0.0001828587591929097,
|
8415 |
"loss": 1.3149,
|
8416 |
"step": 60000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8417 |
}
|
8418 |
],
|
8419 |
"logging_steps": 50,
|
@@ -8433,7 +9833,7 @@
|
|
8433 |
"attributes": {}
|
8434 |
}
|
8435 |
},
|
8436 |
-
"total_flos": 1.
|
8437 |
"train_batch_size": 2,
|
8438 |
"trial_name": null,
|
8439 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0899889529155344,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 70000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8414 |
"learning_rate": 0.0001828587591929097,
|
8415 |
"loss": 1.3149,
|
8416 |
"step": 60000
|
8417 |
+
},
|
8418 |
+
{
|
8419 |
+
"epoch": 1.7929119517511123,
|
8420 |
+
"grad_norm": 4.172428607940674,
|
8421 |
+
"learning_rate": 0.00018284447339699084,
|
8422 |
+
"loss": 1.2703,
|
8423 |
+
"step": 60050
|
8424 |
+
},
|
8425 |
+
{
|
8426 |
+
"epoch": 1.7944048010031945,
|
8427 |
+
"grad_norm": 5.1870903968811035,
|
8428 |
+
"learning_rate": 0.00018283018760107203,
|
8429 |
+
"loss": 1.3107,
|
8430 |
+
"step": 60100
|
8431 |
+
},
|
8432 |
+
{
|
8433 |
+
"epoch": 1.7958976502552773,
|
8434 |
+
"grad_norm": 4.141382694244385,
|
8435 |
+
"learning_rate": 0.00018281590180515317,
|
8436 |
+
"loss": 1.3252,
|
8437 |
+
"step": 60150
|
8438 |
+
},
|
8439 |
+
{
|
8440 |
+
"epoch": 1.7973904995073597,
|
8441 |
+
"grad_norm": 3.9119277000427246,
|
8442 |
+
"learning_rate": 0.00018280161600923436,
|
8443 |
+
"loss": 1.3875,
|
8444 |
+
"step": 60200
|
8445 |
+
},
|
8446 |
+
{
|
8447 |
+
"epoch": 1.7988833487594422,
|
8448 |
+
"grad_norm": 4.159456729888916,
|
8449 |
+
"learning_rate": 0.00018278733021331552,
|
8450 |
+
"loss": 1.3871,
|
8451 |
+
"step": 60250
|
8452 |
+
},
|
8453 |
+
{
|
8454 |
+
"epoch": 1.800376198011525,
|
8455 |
+
"grad_norm": 4.536618709564209,
|
8456 |
+
"learning_rate": 0.0001827730444173967,
|
8457 |
+
"loss": 1.2892,
|
8458 |
+
"step": 60300
|
8459 |
+
},
|
8460 |
+
{
|
8461 |
+
"epoch": 1.8018690472636072,
|
8462 |
+
"grad_norm": 7.839147090911865,
|
8463 |
+
"learning_rate": 0.00018275875862147785,
|
8464 |
+
"loss": 1.2953,
|
8465 |
+
"step": 60350
|
8466 |
+
},
|
8467 |
+
{
|
8468 |
+
"epoch": 1.80336189651569,
|
8469 |
+
"grad_norm": 3.6673262119293213,
|
8470 |
+
"learning_rate": 0.00018274447282555902,
|
8471 |
+
"loss": 1.3245,
|
8472 |
+
"step": 60400
|
8473 |
+
},
|
8474 |
+
{
|
8475 |
+
"epoch": 1.8048547457677724,
|
8476 |
+
"grad_norm": 4.275418281555176,
|
8477 |
+
"learning_rate": 0.00018273018702964018,
|
8478 |
+
"loss": 1.3061,
|
8479 |
+
"step": 60450
|
8480 |
+
},
|
8481 |
+
{
|
8482 |
+
"epoch": 1.8063475950198549,
|
8483 |
+
"grad_norm": 4.154748916625977,
|
8484 |
+
"learning_rate": 0.00018271590123372135,
|
8485 |
+
"loss": 1.3127,
|
8486 |
+
"step": 60500
|
8487 |
+
},
|
8488 |
+
{
|
8489 |
+
"epoch": 1.8078404442719376,
|
8490 |
+
"grad_norm": 4.65784215927124,
|
8491 |
+
"learning_rate": 0.0001827016154378025,
|
8492 |
+
"loss": 1.3717,
|
8493 |
+
"step": 60550
|
8494 |
+
},
|
8495 |
+
{
|
8496 |
+
"epoch": 1.8093332935240198,
|
8497 |
+
"grad_norm": 6.874159812927246,
|
8498 |
+
"learning_rate": 0.00018268732964188368,
|
8499 |
+
"loss": 1.3043,
|
8500 |
+
"step": 60600
|
8501 |
+
},
|
8502 |
+
{
|
8503 |
+
"epoch": 1.8108261427761025,
|
8504 |
+
"grad_norm": 5.431716442108154,
|
8505 |
+
"learning_rate": 0.00018267304384596484,
|
8506 |
+
"loss": 1.3138,
|
8507 |
+
"step": 60650
|
8508 |
+
},
|
8509 |
+
{
|
8510 |
+
"epoch": 1.812318992028185,
|
8511 |
+
"grad_norm": 4.631954669952393,
|
8512 |
+
"learning_rate": 0.00018265875805004603,
|
8513 |
+
"loss": 1.3241,
|
8514 |
+
"step": 60700
|
8515 |
+
},
|
8516 |
+
{
|
8517 |
+
"epoch": 1.8138118412802675,
|
8518 |
+
"grad_norm": 4.336903095245361,
|
8519 |
+
"learning_rate": 0.00018264447225412717,
|
8520 |
+
"loss": 1.2709,
|
8521 |
+
"step": 60750
|
8522 |
+
},
|
8523 |
+
{
|
8524 |
+
"epoch": 1.81530469053235,
|
8525 |
+
"grad_norm": 4.103146553039551,
|
8526 |
+
"learning_rate": 0.00018263018645820836,
|
8527 |
+
"loss": 1.2912,
|
8528 |
+
"step": 60800
|
8529 |
+
},
|
8530 |
+
{
|
8531 |
+
"epoch": 1.8167975397844325,
|
8532 |
+
"grad_norm": 4.709009170532227,
|
8533 |
+
"learning_rate": 0.0001826159006622895,
|
8534 |
+
"loss": 1.304,
|
8535 |
+
"step": 60850
|
8536 |
+
},
|
8537 |
+
{
|
8538 |
+
"epoch": 1.8182903890365152,
|
8539 |
+
"grad_norm": 5.0871148109436035,
|
8540 |
+
"learning_rate": 0.00018260161486637066,
|
8541 |
+
"loss": 1.301,
|
8542 |
+
"step": 60900
|
8543 |
+
},
|
8544 |
+
{
|
8545 |
+
"epoch": 1.8197832382885977,
|
8546 |
+
"grad_norm": 4.265575408935547,
|
8547 |
+
"learning_rate": 0.00018258732907045183,
|
8548 |
+
"loss": 1.3914,
|
8549 |
+
"step": 60950
|
8550 |
+
},
|
8551 |
+
{
|
8552 |
+
"epoch": 1.8212760875406802,
|
8553 |
+
"grad_norm": 4.705594539642334,
|
8554 |
+
"learning_rate": 0.000182573043274533,
|
8555 |
+
"loss": 1.3056,
|
8556 |
+
"step": 61000
|
8557 |
+
},
|
8558 |
+
{
|
8559 |
+
"epoch": 1.8227689367927626,
|
8560 |
+
"grad_norm": 3.86602783203125,
|
8561 |
+
"learning_rate": 0.00018255875747861418,
|
8562 |
+
"loss": 1.3394,
|
8563 |
+
"step": 61050
|
8564 |
+
},
|
8565 |
+
{
|
8566 |
+
"epoch": 1.8242617860448451,
|
8567 |
+
"grad_norm": 3.9397497177124023,
|
8568 |
+
"learning_rate": 0.00018254447168269532,
|
8569 |
+
"loss": 1.3625,
|
8570 |
+
"step": 61100
|
8571 |
+
},
|
8572 |
+
{
|
8573 |
+
"epoch": 1.8257546352969278,
|
8574 |
+
"grad_norm": 4.313930988311768,
|
8575 |
+
"learning_rate": 0.00018253018588677651,
|
8576 |
+
"loss": 1.3445,
|
8577 |
+
"step": 61150
|
8578 |
+
},
|
8579 |
+
{
|
8580 |
+
"epoch": 1.82724748454901,
|
8581 |
+
"grad_norm": 5.848763942718506,
|
8582 |
+
"learning_rate": 0.00018251590009085765,
|
8583 |
+
"loss": 1.3896,
|
8584 |
+
"step": 61200
|
8585 |
+
},
|
8586 |
+
{
|
8587 |
+
"epoch": 1.8287403338010928,
|
8588 |
+
"grad_norm": 4.323721885681152,
|
8589 |
+
"learning_rate": 0.00018250161429493884,
|
8590 |
+
"loss": 1.3616,
|
8591 |
+
"step": 61250
|
8592 |
+
},
|
8593 |
+
{
|
8594 |
+
"epoch": 1.8302331830531753,
|
8595 |
+
"grad_norm": 5.140015125274658,
|
8596 |
+
"learning_rate": 0.00018248732849901998,
|
8597 |
+
"loss": 1.2641,
|
8598 |
+
"step": 61300
|
8599 |
+
},
|
8600 |
+
{
|
8601 |
+
"epoch": 1.8317260323052578,
|
8602 |
+
"grad_norm": 5.685877799987793,
|
8603 |
+
"learning_rate": 0.00018247304270310117,
|
8604 |
+
"loss": 1.3396,
|
8605 |
+
"step": 61350
|
8606 |
+
},
|
8607 |
+
{
|
8608 |
+
"epoch": 1.8332188815573405,
|
8609 |
+
"grad_norm": 4.268862724304199,
|
8610 |
+
"learning_rate": 0.00018245875690718234,
|
8611 |
+
"loss": 1.3201,
|
8612 |
+
"step": 61400
|
8613 |
+
},
|
8614 |
+
{
|
8615 |
+
"epoch": 1.8347117308094227,
|
8616 |
+
"grad_norm": 5.0206732749938965,
|
8617 |
+
"learning_rate": 0.0001824444711112635,
|
8618 |
+
"loss": 1.372,
|
8619 |
+
"step": 61450
|
8620 |
+
},
|
8621 |
+
{
|
8622 |
+
"epoch": 1.8362045800615054,
|
8623 |
+
"grad_norm": 4.32529354095459,
|
8624 |
+
"learning_rate": 0.00018243018531534467,
|
8625 |
+
"loss": 1.3,
|
8626 |
+
"step": 61500
|
8627 |
+
},
|
8628 |
+
{
|
8629 |
+
"epoch": 1.837697429313588,
|
8630 |
+
"grad_norm": 5.349672317504883,
|
8631 |
+
"learning_rate": 0.00018241589951942583,
|
8632 |
+
"loss": 1.2957,
|
8633 |
+
"step": 61550
|
8634 |
+
},
|
8635 |
+
{
|
8636 |
+
"epoch": 1.8391902785656704,
|
8637 |
+
"grad_norm": 4.735100746154785,
|
8638 |
+
"learning_rate": 0.000182401613723507,
|
8639 |
+
"loss": 1.3546,
|
8640 |
+
"step": 61600
|
8641 |
+
},
|
8642 |
+
{
|
8643 |
+
"epoch": 1.8406831278177531,
|
8644 |
+
"grad_norm": 4.489027500152588,
|
8645 |
+
"learning_rate": 0.00018238732792758816,
|
8646 |
+
"loss": 1.2695,
|
8647 |
+
"step": 61650
|
8648 |
+
},
|
8649 |
+
{
|
8650 |
+
"epoch": 1.8421759770698354,
|
8651 |
+
"grad_norm": 3.2674996852874756,
|
8652 |
+
"learning_rate": 0.00018237304213166933,
|
8653 |
+
"loss": 1.3425,
|
8654 |
+
"step": 61700
|
8655 |
+
},
|
8656 |
+
{
|
8657 |
+
"epoch": 1.843668826321918,
|
8658 |
+
"grad_norm": 3.9815094470977783,
|
8659 |
+
"learning_rate": 0.0001823587563357505,
|
8660 |
+
"loss": 1.3396,
|
8661 |
+
"step": 61750
|
8662 |
+
},
|
8663 |
+
{
|
8664 |
+
"epoch": 1.8451616755740006,
|
8665 |
+
"grad_norm": 3.9253129959106445,
|
8666 |
+
"learning_rate": 0.00018234447053983165,
|
8667 |
+
"loss": 1.3641,
|
8668 |
+
"step": 61800
|
8669 |
+
},
|
8670 |
+
{
|
8671 |
+
"epoch": 1.846654524826083,
|
8672 |
+
"grad_norm": 6.536600112915039,
|
8673 |
+
"learning_rate": 0.00018233018474391285,
|
8674 |
+
"loss": 1.2578,
|
8675 |
+
"step": 61850
|
8676 |
+
},
|
8677 |
+
{
|
8678 |
+
"epoch": 1.8481473740781655,
|
8679 |
+
"grad_norm": 4.420536518096924,
|
8680 |
+
"learning_rate": 0.00018231589894799398,
|
8681 |
+
"loss": 1.3635,
|
8682 |
+
"step": 61900
|
8683 |
+
},
|
8684 |
+
{
|
8685 |
+
"epoch": 1.849640223330248,
|
8686 |
+
"grad_norm": 3.770279884338379,
|
8687 |
+
"learning_rate": 0.00018230161315207518,
|
8688 |
+
"loss": 1.2869,
|
8689 |
+
"step": 61950
|
8690 |
+
},
|
8691 |
+
{
|
8692 |
+
"epoch": 1.8511330725823307,
|
8693 |
+
"grad_norm": 4.154125213623047,
|
8694 |
+
"learning_rate": 0.0001822873273561563,
|
8695 |
+
"loss": 1.3246,
|
8696 |
+
"step": 62000
|
8697 |
+
},
|
8698 |
+
{
|
8699 |
+
"epoch": 1.8526259218344132,
|
8700 |
+
"grad_norm": 3.884587049484253,
|
8701 |
+
"learning_rate": 0.0001822730415602375,
|
8702 |
+
"loss": 1.329,
|
8703 |
+
"step": 62050
|
8704 |
+
},
|
8705 |
+
{
|
8706 |
+
"epoch": 1.8541187710864957,
|
8707 |
+
"grad_norm": 5.329457759857178,
|
8708 |
+
"learning_rate": 0.00018225875576431864,
|
8709 |
+
"loss": 1.3787,
|
8710 |
+
"step": 62100
|
8711 |
+
},
|
8712 |
+
{
|
8713 |
+
"epoch": 1.8556116203385782,
|
8714 |
+
"grad_norm": 5.966410160064697,
|
8715 |
+
"learning_rate": 0.00018224446996839983,
|
8716 |
+
"loss": 1.3574,
|
8717 |
+
"step": 62150
|
8718 |
+
},
|
8719 |
+
{
|
8720 |
+
"epoch": 1.8571044695906607,
|
8721 |
+
"grad_norm": 5.356328010559082,
|
8722 |
+
"learning_rate": 0.000182230184172481,
|
8723 |
+
"loss": 1.3113,
|
8724 |
+
"step": 62200
|
8725 |
+
},
|
8726 |
+
{
|
8727 |
+
"epoch": 1.8585973188427434,
|
8728 |
+
"grad_norm": 3.6018612384796143,
|
8729 |
+
"learning_rate": 0.00018221589837656216,
|
8730 |
+
"loss": 1.3296,
|
8731 |
+
"step": 62250
|
8732 |
+
},
|
8733 |
+
{
|
8734 |
+
"epoch": 1.8600901680948256,
|
8735 |
+
"grad_norm": 5.247616767883301,
|
8736 |
+
"learning_rate": 0.00018220161258064333,
|
8737 |
+
"loss": 1.3518,
|
8738 |
+
"step": 62300
|
8739 |
+
},
|
8740 |
+
{
|
8741 |
+
"epoch": 1.8615830173469083,
|
8742 |
+
"grad_norm": 5.828862190246582,
|
8743 |
+
"learning_rate": 0.0001821873267847245,
|
8744 |
+
"loss": 1.3027,
|
8745 |
+
"step": 62350
|
8746 |
+
},
|
8747 |
+
{
|
8748 |
+
"epoch": 1.8630758665989908,
|
8749 |
+
"grad_norm": 5.451898574829102,
|
8750 |
+
"learning_rate": 0.00018217304098880566,
|
8751 |
+
"loss": 1.3647,
|
8752 |
+
"step": 62400
|
8753 |
+
},
|
8754 |
+
{
|
8755 |
+
"epoch": 1.8645687158510733,
|
8756 |
+
"grad_norm": 3.905299663543701,
|
8757 |
+
"learning_rate": 0.00018215875519288682,
|
8758 |
+
"loss": 1.3528,
|
8759 |
+
"step": 62450
|
8760 |
+
},
|
8761 |
+
{
|
8762 |
+
"epoch": 1.866061565103156,
|
8763 |
+
"grad_norm": 4.312819004058838,
|
8764 |
+
"learning_rate": 0.000182144469396968,
|
8765 |
+
"loss": 1.3487,
|
8766 |
+
"step": 62500
|
8767 |
+
},
|
8768 |
+
{
|
8769 |
+
"epoch": 1.8675544143552383,
|
8770 |
+
"grad_norm": 4.3358073234558105,
|
8771 |
+
"learning_rate": 0.00018213018360104915,
|
8772 |
+
"loss": 1.2925,
|
8773 |
+
"step": 62550
|
8774 |
+
},
|
8775 |
+
{
|
8776 |
+
"epoch": 1.869047263607321,
|
8777 |
+
"grad_norm": 5.664048194885254,
|
8778 |
+
"learning_rate": 0.00018211589780513032,
|
8779 |
+
"loss": 1.3822,
|
8780 |
+
"step": 62600
|
8781 |
+
},
|
8782 |
+
{
|
8783 |
+
"epoch": 1.8705401128594035,
|
8784 |
+
"grad_norm": 4.992645263671875,
|
8785 |
+
"learning_rate": 0.0001821016120092115,
|
8786 |
+
"loss": 1.3819,
|
8787 |
+
"step": 62650
|
8788 |
+
},
|
8789 |
+
{
|
8790 |
+
"epoch": 1.872032962111486,
|
8791 |
+
"grad_norm": 4.036514759063721,
|
8792 |
+
"learning_rate": 0.00018208732621329265,
|
8793 |
+
"loss": 1.3193,
|
8794 |
+
"step": 62700
|
8795 |
+
},
|
8796 |
+
{
|
8797 |
+
"epoch": 1.8735258113635687,
|
8798 |
+
"grad_norm": 4.094419956207275,
|
8799 |
+
"learning_rate": 0.00018207304041737384,
|
8800 |
+
"loss": 1.3649,
|
8801 |
+
"step": 62750
|
8802 |
+
},
|
8803 |
+
{
|
8804 |
+
"epoch": 1.875018660615651,
|
8805 |
+
"grad_norm": 5.437168121337891,
|
8806 |
+
"learning_rate": 0.00018205875462145497,
|
8807 |
+
"loss": 1.3445,
|
8808 |
+
"step": 62800
|
8809 |
+
},
|
8810 |
+
{
|
8811 |
+
"epoch": 1.8765115098677336,
|
8812 |
+
"grad_norm": 4.937995433807373,
|
8813 |
+
"learning_rate": 0.00018204446882553617,
|
8814 |
+
"loss": 1.3588,
|
8815 |
+
"step": 62850
|
8816 |
+
},
|
8817 |
+
{
|
8818 |
+
"epoch": 1.8780043591198161,
|
8819 |
+
"grad_norm": 3.918588876724243,
|
8820 |
+
"learning_rate": 0.00018203018302961733,
|
8821 |
+
"loss": 1.3195,
|
8822 |
+
"step": 62900
|
8823 |
+
},
|
8824 |
+
{
|
8825 |
+
"epoch": 1.8794972083718986,
|
8826 |
+
"grad_norm": 4.650214672088623,
|
8827 |
+
"learning_rate": 0.0001820158972336985,
|
8828 |
+
"loss": 1.3812,
|
8829 |
+
"step": 62950
|
8830 |
+
},
|
8831 |
+
{
|
8832 |
+
"epoch": 1.880990057623981,
|
8833 |
+
"grad_norm": 3.844249725341797,
|
8834 |
+
"learning_rate": 0.00018200161143777966,
|
8835 |
+
"loss": 1.2877,
|
8836 |
+
"step": 63000
|
8837 |
+
},
|
8838 |
+
{
|
8839 |
+
"epoch": 1.8824829068760636,
|
8840 |
+
"grad_norm": 4.988340854644775,
|
8841 |
+
"learning_rate": 0.00018198732564186083,
|
8842 |
+
"loss": 1.2705,
|
8843 |
+
"step": 63050
|
8844 |
+
},
|
8845 |
+
{
|
8846 |
+
"epoch": 1.8839757561281463,
|
8847 |
+
"grad_norm": 4.229214668273926,
|
8848 |
+
"learning_rate": 0.000181973039845942,
|
8849 |
+
"loss": 1.3314,
|
8850 |
+
"step": 63100
|
8851 |
+
},
|
8852 |
+
{
|
8853 |
+
"epoch": 1.8854686053802288,
|
8854 |
+
"grad_norm": 4.095324993133545,
|
8855 |
+
"learning_rate": 0.00018195875405002315,
|
8856 |
+
"loss": 1.3779,
|
8857 |
+
"step": 63150
|
8858 |
+
},
|
8859 |
+
{
|
8860 |
+
"epoch": 1.8869614546323112,
|
8861 |
+
"grad_norm": 5.325993537902832,
|
8862 |
+
"learning_rate": 0.00018194446825410432,
|
8863 |
+
"loss": 1.3163,
|
8864 |
+
"step": 63200
|
8865 |
+
},
|
8866 |
+
{
|
8867 |
+
"epoch": 1.8884543038843937,
|
8868 |
+
"grad_norm": 5.301263809204102,
|
8869 |
+
"learning_rate": 0.00018193018245818548,
|
8870 |
+
"loss": 1.4235,
|
8871 |
+
"step": 63250
|
8872 |
+
},
|
8873 |
+
{
|
8874 |
+
"epoch": 1.8899471531364762,
|
8875 |
+
"grad_norm": 5.347028732299805,
|
8876 |
+
"learning_rate": 0.00018191589666226665,
|
8877 |
+
"loss": 1.356,
|
8878 |
+
"step": 63300
|
8879 |
+
},
|
8880 |
+
{
|
8881 |
+
"epoch": 1.891440002388559,
|
8882 |
+
"grad_norm": 4.957263469696045,
|
8883 |
+
"learning_rate": 0.00018190161086634784,
|
8884 |
+
"loss": 1.2915,
|
8885 |
+
"step": 63350
|
8886 |
+
},
|
8887 |
+
{
|
8888 |
+
"epoch": 1.8929328516406412,
|
8889 |
+
"grad_norm": 3.9531173706054688,
|
8890 |
+
"learning_rate": 0.00018188732507042898,
|
8891 |
+
"loss": 1.2905,
|
8892 |
+
"step": 63400
|
8893 |
+
},
|
8894 |
+
{
|
8895 |
+
"epoch": 1.8944257008927239,
|
8896 |
+
"grad_norm": 5.983827590942383,
|
8897 |
+
"learning_rate": 0.00018187303927451017,
|
8898 |
+
"loss": 1.3129,
|
8899 |
+
"step": 63450
|
8900 |
+
},
|
8901 |
+
{
|
8902 |
+
"epoch": 1.8959185501448064,
|
8903 |
+
"grad_norm": 4.4479522705078125,
|
8904 |
+
"learning_rate": 0.0001818587534785913,
|
8905 |
+
"loss": 1.3528,
|
8906 |
+
"step": 63500
|
8907 |
+
},
|
8908 |
+
{
|
8909 |
+
"epoch": 1.8974113993968889,
|
8910 |
+
"grad_norm": 4.956698417663574,
|
8911 |
+
"learning_rate": 0.00018184446768267247,
|
8912 |
+
"loss": 1.2675,
|
8913 |
+
"step": 63550
|
8914 |
+
},
|
8915 |
+
{
|
8916 |
+
"epoch": 1.8989042486489716,
|
8917 |
+
"grad_norm": 3.7053678035736084,
|
8918 |
+
"learning_rate": 0.00018183018188675364,
|
8919 |
+
"loss": 1.3316,
|
8920 |
+
"step": 63600
|
8921 |
+
},
|
8922 |
+
{
|
8923 |
+
"epoch": 1.9003970979010538,
|
8924 |
+
"grad_norm": 5.076517105102539,
|
8925 |
+
"learning_rate": 0.0001818158960908348,
|
8926 |
+
"loss": 1.3603,
|
8927 |
+
"step": 63650
|
8928 |
+
},
|
8929 |
+
{
|
8930 |
+
"epoch": 1.9018899471531365,
|
8931 |
+
"grad_norm": 4.377649307250977,
|
8932 |
+
"learning_rate": 0.000181801610294916,
|
8933 |
+
"loss": 1.3491,
|
8934 |
+
"step": 63700
|
8935 |
+
},
|
8936 |
+
{
|
8937 |
+
"epoch": 1.903382796405219,
|
8938 |
+
"grad_norm": 5.296698570251465,
|
8939 |
+
"learning_rate": 0.00018178732449899713,
|
8940 |
+
"loss": 1.3768,
|
8941 |
+
"step": 63750
|
8942 |
+
},
|
8943 |
+
{
|
8944 |
+
"epoch": 1.9048756456573015,
|
8945 |
+
"grad_norm": 4.674185752868652,
|
8946 |
+
"learning_rate": 0.00018177303870307832,
|
8947 |
+
"loss": 1.3599,
|
8948 |
+
"step": 63800
|
8949 |
+
},
|
8950 |
+
{
|
8951 |
+
"epoch": 1.9063684949093842,
|
8952 |
+
"grad_norm": 5.758728981018066,
|
8953 |
+
"learning_rate": 0.00018175875290715946,
|
8954 |
+
"loss": 1.3239,
|
8955 |
+
"step": 63850
|
8956 |
+
},
|
8957 |
+
{
|
8958 |
+
"epoch": 1.9078613441614665,
|
8959 |
+
"grad_norm": 4.195470333099365,
|
8960 |
+
"learning_rate": 0.00018174446711124065,
|
8961 |
+
"loss": 1.3132,
|
8962 |
+
"step": 63900
|
8963 |
+
},
|
8964 |
+
{
|
8965 |
+
"epoch": 1.9093541934135492,
|
8966 |
+
"grad_norm": 4.118239402770996,
|
8967 |
+
"learning_rate": 0.0001817301813153218,
|
8968 |
+
"loss": 1.2887,
|
8969 |
+
"step": 63950
|
8970 |
+
},
|
8971 |
+
{
|
8972 |
+
"epoch": 1.9108470426656317,
|
8973 |
+
"grad_norm": 3.598353147506714,
|
8974 |
+
"learning_rate": 0.00018171589551940298,
|
8975 |
+
"loss": 1.3341,
|
8976 |
+
"step": 64000
|
8977 |
+
},
|
8978 |
+
{
|
8979 |
+
"epoch": 1.9123398919177141,
|
8980 |
+
"grad_norm": 3.8190245628356934,
|
8981 |
+
"learning_rate": 0.00018170160972348415,
|
8982 |
+
"loss": 1.3011,
|
8983 |
+
"step": 64050
|
8984 |
+
},
|
8985 |
+
{
|
8986 |
+
"epoch": 1.9138327411697966,
|
8987 |
+
"grad_norm": 6.505800724029541,
|
8988 |
+
"learning_rate": 0.0001816873239275653,
|
8989 |
+
"loss": 1.3583,
|
8990 |
+
"step": 64100
|
8991 |
+
},
|
8992 |
+
{
|
8993 |
+
"epoch": 1.915325590421879,
|
8994 |
+
"grad_norm": 4.134885311126709,
|
8995 |
+
"learning_rate": 0.00018167303813164647,
|
8996 |
+
"loss": 1.3363,
|
8997 |
+
"step": 64150
|
8998 |
+
},
|
8999 |
+
{
|
9000 |
+
"epoch": 1.9168184396739618,
|
9001 |
+
"grad_norm": 3.753544807434082,
|
9002 |
+
"learning_rate": 0.00018165875233572764,
|
9003 |
+
"loss": 1.3415,
|
9004 |
+
"step": 64200
|
9005 |
+
},
|
9006 |
+
{
|
9007 |
+
"epoch": 1.9183112889260443,
|
9008 |
+
"grad_norm": 3.716143846511841,
|
9009 |
+
"learning_rate": 0.0001816444665398088,
|
9010 |
+
"loss": 1.3276,
|
9011 |
+
"step": 64250
|
9012 |
+
},
|
9013 |
+
{
|
9014 |
+
"epoch": 1.9198041381781268,
|
9015 |
+
"grad_norm": 4.619287490844727,
|
9016 |
+
"learning_rate": 0.00018163018074388997,
|
9017 |
+
"loss": 1.3624,
|
9018 |
+
"step": 64300
|
9019 |
+
},
|
9020 |
+
{
|
9021 |
+
"epoch": 1.9212969874302093,
|
9022 |
+
"grad_norm": 3.8095598220825195,
|
9023 |
+
"learning_rate": 0.00018161589494797113,
|
9024 |
+
"loss": 1.3488,
|
9025 |
+
"step": 64350
|
9026 |
+
},
|
9027 |
+
{
|
9028 |
+
"epoch": 1.9227898366822918,
|
9029 |
+
"grad_norm": 6.268500328063965,
|
9030 |
+
"learning_rate": 0.0001816016091520523,
|
9031 |
+
"loss": 1.3481,
|
9032 |
+
"step": 64400
|
9033 |
+
},
|
9034 |
+
{
|
9035 |
+
"epoch": 1.9242826859343745,
|
9036 |
+
"grad_norm": 6.642594814300537,
|
9037 |
+
"learning_rate": 0.00018158732335613346,
|
9038 |
+
"loss": 1.3392,
|
9039 |
+
"step": 64450
|
9040 |
+
},
|
9041 |
+
{
|
9042 |
+
"epoch": 1.9257755351864567,
|
9043 |
+
"grad_norm": 5.921909332275391,
|
9044 |
+
"learning_rate": 0.00018157303756021465,
|
9045 |
+
"loss": 1.3218,
|
9046 |
+
"step": 64500
|
9047 |
+
},
|
9048 |
+
{
|
9049 |
+
"epoch": 1.9272683844385394,
|
9050 |
+
"grad_norm": 4.467897415161133,
|
9051 |
+
"learning_rate": 0.0001815587517642958,
|
9052 |
+
"loss": 1.3087,
|
9053 |
+
"step": 64550
|
9054 |
+
},
|
9055 |
+
{
|
9056 |
+
"epoch": 1.928761233690622,
|
9057 |
+
"grad_norm": 6.775788307189941,
|
9058 |
+
"learning_rate": 0.00018154446596837698,
|
9059 |
+
"loss": 1.3712,
|
9060 |
+
"step": 64600
|
9061 |
+
},
|
9062 |
+
{
|
9063 |
+
"epoch": 1.9302540829427044,
|
9064 |
+
"grad_norm": 4.561954975128174,
|
9065 |
+
"learning_rate": 0.00018153018017245812,
|
9066 |
+
"loss": 1.3214,
|
9067 |
+
"step": 64650
|
9068 |
+
},
|
9069 |
+
{
|
9070 |
+
"epoch": 1.931746932194787,
|
9071 |
+
"grad_norm": 5.288097381591797,
|
9072 |
+
"learning_rate": 0.0001815158943765393,
|
9073 |
+
"loss": 1.3926,
|
9074 |
+
"step": 64700
|
9075 |
+
},
|
9076 |
+
{
|
9077 |
+
"epoch": 1.9332397814468694,
|
9078 |
+
"grad_norm": 4.883842945098877,
|
9079 |
+
"learning_rate": 0.00018150160858062045,
|
9080 |
+
"loss": 1.2545,
|
9081 |
+
"step": 64750
|
9082 |
+
},
|
9083 |
+
{
|
9084 |
+
"epoch": 1.934732630698952,
|
9085 |
+
"grad_norm": 4.528720378875732,
|
9086 |
+
"learning_rate": 0.00018148732278470164,
|
9087 |
+
"loss": 1.3733,
|
9088 |
+
"step": 64800
|
9089 |
+
},
|
9090 |
+
{
|
9091 |
+
"epoch": 1.9362254799510346,
|
9092 |
+
"grad_norm": 3.9458446502685547,
|
9093 |
+
"learning_rate": 0.0001814730369887828,
|
9094 |
+
"loss": 1.3685,
|
9095 |
+
"step": 64850
|
9096 |
+
},
|
9097 |
+
{
|
9098 |
+
"epoch": 1.937718329203117,
|
9099 |
+
"grad_norm": 5.117423057556152,
|
9100 |
+
"learning_rate": 0.00018145875119286397,
|
9101 |
+
"loss": 1.3712,
|
9102 |
+
"step": 64900
|
9103 |
+
},
|
9104 |
+
{
|
9105 |
+
"epoch": 1.9392111784551997,
|
9106 |
+
"grad_norm": 4.276913166046143,
|
9107 |
+
"learning_rate": 0.00018144446539694514,
|
9108 |
+
"loss": 1.2996,
|
9109 |
+
"step": 64950
|
9110 |
+
},
|
9111 |
+
{
|
9112 |
+
"epoch": 1.940704027707282,
|
9113 |
+
"grad_norm": 3.9713099002838135,
|
9114 |
+
"learning_rate": 0.0001814301796010263,
|
9115 |
+
"loss": 1.301,
|
9116 |
+
"step": 65000
|
9117 |
+
},
|
9118 |
+
{
|
9119 |
+
"epoch": 1.9421968769593647,
|
9120 |
+
"grad_norm": 4.240658760070801,
|
9121 |
+
"learning_rate": 0.00018141589380510747,
|
9122 |
+
"loss": 1.4013,
|
9123 |
+
"step": 65050
|
9124 |
+
},
|
9125 |
+
{
|
9126 |
+
"epoch": 1.9436897262114472,
|
9127 |
+
"grad_norm": 4.406320571899414,
|
9128 |
+
"learning_rate": 0.00018140160800918863,
|
9129 |
+
"loss": 1.3481,
|
9130 |
+
"step": 65100
|
9131 |
+
},
|
9132 |
+
{
|
9133 |
+
"epoch": 1.9451825754635297,
|
9134 |
+
"grad_norm": 4.139687538146973,
|
9135 |
+
"learning_rate": 0.0001813873222132698,
|
9136 |
+
"loss": 1.3179,
|
9137 |
+
"step": 65150
|
9138 |
+
},
|
9139 |
+
{
|
9140 |
+
"epoch": 1.9466754247156122,
|
9141 |
+
"grad_norm": 3.8167126178741455,
|
9142 |
+
"learning_rate": 0.00018137303641735096,
|
9143 |
+
"loss": 1.2714,
|
9144 |
+
"step": 65200
|
9145 |
+
},
|
9146 |
+
{
|
9147 |
+
"epoch": 1.9481682739676947,
|
9148 |
+
"grad_norm": 4.680548191070557,
|
9149 |
+
"learning_rate": 0.00018135875062143212,
|
9150 |
+
"loss": 1.3571,
|
9151 |
+
"step": 65250
|
9152 |
+
},
|
9153 |
+
{
|
9154 |
+
"epoch": 1.9496611232197774,
|
9155 |
+
"grad_norm": 8.530779838562012,
|
9156 |
+
"learning_rate": 0.00018134446482551332,
|
9157 |
+
"loss": 1.323,
|
9158 |
+
"step": 65300
|
9159 |
+
},
|
9160 |
+
{
|
9161 |
+
"epoch": 1.9511539724718598,
|
9162 |
+
"grad_norm": 4.576430320739746,
|
9163 |
+
"learning_rate": 0.00018133017902959445,
|
9164 |
+
"loss": 1.3709,
|
9165 |
+
"step": 65350
|
9166 |
+
},
|
9167 |
+
{
|
9168 |
+
"epoch": 1.9526468217239423,
|
9169 |
+
"grad_norm": 5.320089817047119,
|
9170 |
+
"learning_rate": 0.00018131589323367565,
|
9171 |
+
"loss": 1.3857,
|
9172 |
+
"step": 65400
|
9173 |
+
},
|
9174 |
+
{
|
9175 |
+
"epoch": 1.9541396709760248,
|
9176 |
+
"grad_norm": 4.247587203979492,
|
9177 |
+
"learning_rate": 0.00018130160743775678,
|
9178 |
+
"loss": 1.3637,
|
9179 |
+
"step": 65450
|
9180 |
+
},
|
9181 |
+
{
|
9182 |
+
"epoch": 1.9556325202281073,
|
9183 |
+
"grad_norm": 4.2958903312683105,
|
9184 |
+
"learning_rate": 0.00018128732164183797,
|
9185 |
+
"loss": 1.2861,
|
9186 |
+
"step": 65500
|
9187 |
+
},
|
9188 |
+
{
|
9189 |
+
"epoch": 1.95712536948019,
|
9190 |
+
"grad_norm": 4.818902969360352,
|
9191 |
+
"learning_rate": 0.00018127303584591914,
|
9192 |
+
"loss": 1.3264,
|
9193 |
+
"step": 65550
|
9194 |
+
},
|
9195 |
+
{
|
9196 |
+
"epoch": 1.9586182187322723,
|
9197 |
+
"grad_norm": 4.755589962005615,
|
9198 |
+
"learning_rate": 0.0001812587500500003,
|
9199 |
+
"loss": 1.3821,
|
9200 |
+
"step": 65600
|
9201 |
+
},
|
9202 |
+
{
|
9203 |
+
"epoch": 1.960111067984355,
|
9204 |
+
"grad_norm": 3.589580774307251,
|
9205 |
+
"learning_rate": 0.00018124446425408147,
|
9206 |
+
"loss": 1.2799,
|
9207 |
+
"step": 65650
|
9208 |
+
},
|
9209 |
+
{
|
9210 |
+
"epoch": 1.9616039172364375,
|
9211 |
+
"grad_norm": 5.7323431968688965,
|
9212 |
+
"learning_rate": 0.00018123017845816263,
|
9213 |
+
"loss": 1.2616,
|
9214 |
+
"step": 65700
|
9215 |
+
},
|
9216 |
+
{
|
9217 |
+
"epoch": 1.96309676648852,
|
9218 |
+
"grad_norm": 6.535925388336182,
|
9219 |
+
"learning_rate": 0.0001812158926622438,
|
9220 |
+
"loss": 1.3391,
|
9221 |
+
"step": 65750
|
9222 |
+
},
|
9223 |
+
{
|
9224 |
+
"epoch": 1.9645896157406026,
|
9225 |
+
"grad_norm": 3.783618688583374,
|
9226 |
+
"learning_rate": 0.00018120160686632496,
|
9227 |
+
"loss": 1.2743,
|
9228 |
+
"step": 65800
|
9229 |
+
},
|
9230 |
+
{
|
9231 |
+
"epoch": 1.966082464992685,
|
9232 |
+
"grad_norm": 4.055028438568115,
|
9233 |
+
"learning_rate": 0.00018118732107040613,
|
9234 |
+
"loss": 1.3319,
|
9235 |
+
"step": 65850
|
9236 |
+
},
|
9237 |
+
{
|
9238 |
+
"epoch": 1.9675753142447676,
|
9239 |
+
"grad_norm": 4.827310562133789,
|
9240 |
+
"learning_rate": 0.0001811730352744873,
|
9241 |
+
"loss": 1.3518,
|
9242 |
+
"step": 65900
|
9243 |
+
},
|
9244 |
+
{
|
9245 |
+
"epoch": 1.96906816349685,
|
9246 |
+
"grad_norm": 4.298334121704102,
|
9247 |
+
"learning_rate": 0.00018115874947856846,
|
9248 |
+
"loss": 1.3755,
|
9249 |
+
"step": 65950
|
9250 |
+
},
|
9251 |
+
{
|
9252 |
+
"epoch": 1.9705610127489326,
|
9253 |
+
"grad_norm": 4.718268871307373,
|
9254 |
+
"learning_rate": 0.00018114446368264962,
|
9255 |
+
"loss": 1.2615,
|
9256 |
+
"step": 66000
|
9257 |
+
},
|
9258 |
+
{
|
9259 |
+
"epoch": 1.9720538620010153,
|
9260 |
+
"grad_norm": 4.387974262237549,
|
9261 |
+
"learning_rate": 0.00018113017788673079,
|
9262 |
+
"loss": 1.3475,
|
9263 |
+
"step": 66050
|
9264 |
+
},
|
9265 |
+
{
|
9266 |
+
"epoch": 1.9735467112530976,
|
9267 |
+
"grad_norm": 4.896945476531982,
|
9268 |
+
"learning_rate": 0.00018111589209081198,
|
9269 |
+
"loss": 1.3145,
|
9270 |
+
"step": 66100
|
9271 |
+
},
|
9272 |
+
{
|
9273 |
+
"epoch": 1.9750395605051803,
|
9274 |
+
"grad_norm": 5.453362941741943,
|
9275 |
+
"learning_rate": 0.00018110160629489312,
|
9276 |
+
"loss": 1.3377,
|
9277 |
+
"step": 66150
|
9278 |
+
},
|
9279 |
+
{
|
9280 |
+
"epoch": 1.9765324097572627,
|
9281 |
+
"grad_norm": 6.6892499923706055,
|
9282 |
+
"learning_rate": 0.00018108732049897428,
|
9283 |
+
"loss": 1.2734,
|
9284 |
+
"step": 66200
|
9285 |
+
},
|
9286 |
+
{
|
9287 |
+
"epoch": 1.9780252590093452,
|
9288 |
+
"grad_norm": 4.2804765701293945,
|
9289 |
+
"learning_rate": 0.00018107303470305544,
|
9290 |
+
"loss": 1.3742,
|
9291 |
+
"step": 66250
|
9292 |
+
},
|
9293 |
+
{
|
9294 |
+
"epoch": 1.9795181082614277,
|
9295 |
+
"grad_norm": 4.235034465789795,
|
9296 |
+
"learning_rate": 0.0001810587489071366,
|
9297 |
+
"loss": 1.2921,
|
9298 |
+
"step": 66300
|
9299 |
+
},
|
9300 |
+
{
|
9301 |
+
"epoch": 1.9810109575135102,
|
9302 |
+
"grad_norm": 4.113801956176758,
|
9303 |
+
"learning_rate": 0.0001810444631112178,
|
9304 |
+
"loss": 1.3475,
|
9305 |
+
"step": 66350
|
9306 |
+
},
|
9307 |
+
{
|
9308 |
+
"epoch": 1.982503806765593,
|
9309 |
+
"grad_norm": 4.276202201843262,
|
9310 |
+
"learning_rate": 0.00018103017731529894,
|
9311 |
+
"loss": 1.2955,
|
9312 |
+
"step": 66400
|
9313 |
+
},
|
9314 |
+
{
|
9315 |
+
"epoch": 1.9839966560176754,
|
9316 |
+
"grad_norm": 4.72022008895874,
|
9317 |
+
"learning_rate": 0.00018101589151938013,
|
9318 |
+
"loss": 1.3676,
|
9319 |
+
"step": 66450
|
9320 |
+
},
|
9321 |
+
{
|
9322 |
+
"epoch": 1.9854895052697579,
|
9323 |
+
"grad_norm": 4.182514190673828,
|
9324 |
+
"learning_rate": 0.00018100160572346127,
|
9325 |
+
"loss": 1.3706,
|
9326 |
+
"step": 66500
|
9327 |
+
},
|
9328 |
+
{
|
9329 |
+
"epoch": 1.9869823545218404,
|
9330 |
+
"grad_norm": 3.8737592697143555,
|
9331 |
+
"learning_rate": 0.00018098731992754246,
|
9332 |
+
"loss": 1.3103,
|
9333 |
+
"step": 66550
|
9334 |
+
},
|
9335 |
+
{
|
9336 |
+
"epoch": 1.9884752037739228,
|
9337 |
+
"grad_norm": 6.266764163970947,
|
9338 |
+
"learning_rate": 0.0001809730341316236,
|
9339 |
+
"loss": 1.3175,
|
9340 |
+
"step": 66600
|
9341 |
+
},
|
9342 |
+
{
|
9343 |
+
"epoch": 1.9899680530260055,
|
9344 |
+
"grad_norm": 5.243070125579834,
|
9345 |
+
"learning_rate": 0.0001809587483357048,
|
9346 |
+
"loss": 1.3484,
|
9347 |
+
"step": 66650
|
9348 |
+
},
|
9349 |
+
{
|
9350 |
+
"epoch": 1.9914609022780878,
|
9351 |
+
"grad_norm": 4.289467811584473,
|
9352 |
+
"learning_rate": 0.00018094446253978595,
|
9353 |
+
"loss": 1.3806,
|
9354 |
+
"step": 66700
|
9355 |
+
},
|
9356 |
+
{
|
9357 |
+
"epoch": 1.9929537515301705,
|
9358 |
+
"grad_norm": 5.450198173522949,
|
9359 |
+
"learning_rate": 0.00018093017674386712,
|
9360 |
+
"loss": 1.3155,
|
9361 |
+
"step": 66750
|
9362 |
+
},
|
9363 |
+
{
|
9364 |
+
"epoch": 1.994446600782253,
|
9365 |
+
"grad_norm": 4.389066219329834,
|
9366 |
+
"learning_rate": 0.00018091589094794828,
|
9367 |
+
"loss": 1.293,
|
9368 |
+
"step": 66800
|
9369 |
+
},
|
9370 |
+
{
|
9371 |
+
"epoch": 1.9959394500343355,
|
9372 |
+
"grad_norm": 4.606175422668457,
|
9373 |
+
"learning_rate": 0.00018090160515202945,
|
9374 |
+
"loss": 1.3558,
|
9375 |
+
"step": 66850
|
9376 |
+
},
|
9377 |
+
{
|
9378 |
+
"epoch": 1.9974322992864182,
|
9379 |
+
"grad_norm": 4.8083319664001465,
|
9380 |
+
"learning_rate": 0.0001808873193561106,
|
9381 |
+
"loss": 1.2621,
|
9382 |
+
"step": 66900
|
9383 |
+
},
|
9384 |
+
{
|
9385 |
+
"epoch": 1.9989251485385005,
|
9386 |
+
"grad_norm": 3.760852813720703,
|
9387 |
+
"learning_rate": 0.00018087303356019178,
|
9388 |
+
"loss": 1.3539,
|
9389 |
+
"step": 66950
|
9390 |
+
},
|
9391 |
+
{
|
9392 |
+
"epoch": 2.000417997790583,
|
9393 |
+
"grad_norm": 3.6912026405334473,
|
9394 |
+
"learning_rate": 0.00018085874776427294,
|
9395 |
+
"loss": 1.2619,
|
9396 |
+
"step": 67000
|
9397 |
+
},
|
9398 |
+
{
|
9399 |
+
"epoch": 2.0019108470426654,
|
9400 |
+
"grad_norm": 4.681028842926025,
|
9401 |
+
"learning_rate": 0.0001808444619683541,
|
9402 |
+
"loss": 1.2457,
|
9403 |
+
"step": 67050
|
9404 |
+
},
|
9405 |
+
{
|
9406 |
+
"epoch": 2.003403696294748,
|
9407 |
+
"grad_norm": 4.194242477416992,
|
9408 |
+
"learning_rate": 0.00018083017617243527,
|
9409 |
+
"loss": 1.297,
|
9410 |
+
"step": 67100
|
9411 |
+
},
|
9412 |
+
{
|
9413 |
+
"epoch": 2.004896545546831,
|
9414 |
+
"grad_norm": 3.831331729888916,
|
9415 |
+
"learning_rate": 0.00018081589037651646,
|
9416 |
+
"loss": 1.2989,
|
9417 |
+
"step": 67150
|
9418 |
+
},
|
9419 |
+
{
|
9420 |
+
"epoch": 2.006389394798913,
|
9421 |
+
"grad_norm": 4.249265193939209,
|
9422 |
+
"learning_rate": 0.0001808016045805976,
|
9423 |
+
"loss": 1.2078,
|
9424 |
+
"step": 67200
|
9425 |
+
},
|
9426 |
+
{
|
9427 |
+
"epoch": 2.007882244050996,
|
9428 |
+
"grad_norm": 7.441315650939941,
|
9429 |
+
"learning_rate": 0.0001807873187846788,
|
9430 |
+
"loss": 1.2945,
|
9431 |
+
"step": 67250
|
9432 |
+
},
|
9433 |
+
{
|
9434 |
+
"epoch": 2.009375093303078,
|
9435 |
+
"grad_norm": 4.2568745613098145,
|
9436 |
+
"learning_rate": 0.00018077303298875993,
|
9437 |
+
"loss": 1.2295,
|
9438 |
+
"step": 67300
|
9439 |
+
},
|
9440 |
+
{
|
9441 |
+
"epoch": 2.0108679425551608,
|
9442 |
+
"grad_norm": 5.394452095031738,
|
9443 |
+
"learning_rate": 0.00018075874719284112,
|
9444 |
+
"loss": 1.2135,
|
9445 |
+
"step": 67350
|
9446 |
+
},
|
9447 |
+
{
|
9448 |
+
"epoch": 2.0123607918072435,
|
9449 |
+
"grad_norm": 3.199411630630493,
|
9450 |
+
"learning_rate": 0.00018074446139692226,
|
9451 |
+
"loss": 1.2314,
|
9452 |
+
"step": 67400
|
9453 |
+
},
|
9454 |
+
{
|
9455 |
+
"epoch": 2.0138536410593257,
|
9456 |
+
"grad_norm": 3.7905383110046387,
|
9457 |
+
"learning_rate": 0.00018073017560100345,
|
9458 |
+
"loss": 1.2227,
|
9459 |
+
"step": 67450
|
9460 |
+
},
|
9461 |
+
{
|
9462 |
+
"epoch": 2.0153464903114084,
|
9463 |
+
"grad_norm": 7.900835990905762,
|
9464 |
+
"learning_rate": 0.00018071588980508462,
|
9465 |
+
"loss": 1.2067,
|
9466 |
+
"step": 67500
|
9467 |
+
},
|
9468 |
+
{
|
9469 |
+
"epoch": 2.0168393395634907,
|
9470 |
+
"grad_norm": 4.803965091705322,
|
9471 |
+
"learning_rate": 0.00018070160400916578,
|
9472 |
+
"loss": 1.2894,
|
9473 |
+
"step": 67550
|
9474 |
+
},
|
9475 |
+
{
|
9476 |
+
"epoch": 2.0183321888155734,
|
9477 |
+
"grad_norm": 4.046397686004639,
|
9478 |
+
"learning_rate": 0.00018068731821324694,
|
9479 |
+
"loss": 1.2627,
|
9480 |
+
"step": 67600
|
9481 |
+
},
|
9482 |
+
{
|
9483 |
+
"epoch": 2.019825038067656,
|
9484 |
+
"grad_norm": 4.9588518142700195,
|
9485 |
+
"learning_rate": 0.0001806730324173281,
|
9486 |
+
"loss": 1.1931,
|
9487 |
+
"step": 67650
|
9488 |
+
},
|
9489 |
+
{
|
9490 |
+
"epoch": 2.0213178873197384,
|
9491 |
+
"grad_norm": 3.5710065364837646,
|
9492 |
+
"learning_rate": 0.00018065874662140927,
|
9493 |
+
"loss": 1.1937,
|
9494 |
+
"step": 67700
|
9495 |
+
},
|
9496 |
+
{
|
9497 |
+
"epoch": 2.022810736571821,
|
9498 |
+
"grad_norm": 4.107451915740967,
|
9499 |
+
"learning_rate": 0.00018064446082549044,
|
9500 |
+
"loss": 1.2236,
|
9501 |
+
"step": 67750
|
9502 |
+
},
|
9503 |
+
{
|
9504 |
+
"epoch": 2.0243035858239034,
|
9505 |
+
"grad_norm": 4.8303422927856445,
|
9506 |
+
"learning_rate": 0.0001806301750295716,
|
9507 |
+
"loss": 1.2223,
|
9508 |
+
"step": 67800
|
9509 |
+
},
|
9510 |
+
{
|
9511 |
+
"epoch": 2.025796435075986,
|
9512 |
+
"grad_norm": 6.539272308349609,
|
9513 |
+
"learning_rate": 0.00018061588923365277,
|
9514 |
+
"loss": 1.3199,
|
9515 |
+
"step": 67850
|
9516 |
+
},
|
9517 |
+
{
|
9518 |
+
"epoch": 2.0272892843280688,
|
9519 |
+
"grad_norm": 4.415231227874756,
|
9520 |
+
"learning_rate": 0.00018060160343773393,
|
9521 |
+
"loss": 1.2988,
|
9522 |
+
"step": 67900
|
9523 |
+
},
|
9524 |
+
{
|
9525 |
+
"epoch": 2.028782133580151,
|
9526 |
+
"grad_norm": 4.396175861358643,
|
9527 |
+
"learning_rate": 0.00018058731764181512,
|
9528 |
+
"loss": 1.1952,
|
9529 |
+
"step": 67950
|
9530 |
+
},
|
9531 |
+
{
|
9532 |
+
"epoch": 2.0302749828322337,
|
9533 |
+
"grad_norm": 3.2673914432525635,
|
9534 |
+
"learning_rate": 0.00018057303184589626,
|
9535 |
+
"loss": 1.2285,
|
9536 |
+
"step": 68000
|
9537 |
+
},
|
9538 |
+
{
|
9539 |
+
"epoch": 2.031767832084316,
|
9540 |
+
"grad_norm": 3.6252634525299072,
|
9541 |
+
"learning_rate": 0.00018055874604997745,
|
9542 |
+
"loss": 1.314,
|
9543 |
+
"step": 68050
|
9544 |
+
},
|
9545 |
+
{
|
9546 |
+
"epoch": 2.0332606813363987,
|
9547 |
+
"grad_norm": 4.550121307373047,
|
9548 |
+
"learning_rate": 0.0001805444602540586,
|
9549 |
+
"loss": 1.2846,
|
9550 |
+
"step": 68100
|
9551 |
+
},
|
9552 |
+
{
|
9553 |
+
"epoch": 2.034753530588481,
|
9554 |
+
"grad_norm": 4.809107303619385,
|
9555 |
+
"learning_rate": 0.00018053017445813978,
|
9556 |
+
"loss": 1.3165,
|
9557 |
+
"step": 68150
|
9558 |
+
},
|
9559 |
+
{
|
9560 |
+
"epoch": 2.0362463798405637,
|
9561 |
+
"grad_norm": 4.466585159301758,
|
9562 |
+
"learning_rate": 0.00018051588866222092,
|
9563 |
+
"loss": 1.2543,
|
9564 |
+
"step": 68200
|
9565 |
+
},
|
9566 |
+
{
|
9567 |
+
"epoch": 2.0377392290926464,
|
9568 |
+
"grad_norm": 4.243835926055908,
|
9569 |
+
"learning_rate": 0.0001805016028663021,
|
9570 |
+
"loss": 1.2908,
|
9571 |
+
"step": 68250
|
9572 |
+
},
|
9573 |
+
{
|
9574 |
+
"epoch": 2.0392320783447286,
|
9575 |
+
"grad_norm": 4.369659423828125,
|
9576 |
+
"learning_rate": 0.00018048731707038328,
|
9577 |
+
"loss": 1.2167,
|
9578 |
+
"step": 68300
|
9579 |
+
},
|
9580 |
+
{
|
9581 |
+
"epoch": 2.0407249275968113,
|
9582 |
+
"grad_norm": 4.6665496826171875,
|
9583 |
+
"learning_rate": 0.00018047303127446444,
|
9584 |
+
"loss": 1.2453,
|
9585 |
+
"step": 68350
|
9586 |
+
},
|
9587 |
+
{
|
9588 |
+
"epoch": 2.0422177768488936,
|
9589 |
+
"grad_norm": 4.3825507164001465,
|
9590 |
+
"learning_rate": 0.0001804587454785456,
|
9591 |
+
"loss": 1.2272,
|
9592 |
+
"step": 68400
|
9593 |
+
},
|
9594 |
+
{
|
9595 |
+
"epoch": 2.0437106261009763,
|
9596 |
+
"grad_norm": 4.299841403961182,
|
9597 |
+
"learning_rate": 0.00018044445968262677,
|
9598 |
+
"loss": 1.2198,
|
9599 |
+
"step": 68450
|
9600 |
+
},
|
9601 |
+
{
|
9602 |
+
"epoch": 2.045203475353059,
|
9603 |
+
"grad_norm": 3.937131881713867,
|
9604 |
+
"learning_rate": 0.00018043017388670794,
|
9605 |
+
"loss": 1.2763,
|
9606 |
+
"step": 68500
|
9607 |
+
},
|
9608 |
+
{
|
9609 |
+
"epoch": 2.0466963246051413,
|
9610 |
+
"grad_norm": 6.704258918762207,
|
9611 |
+
"learning_rate": 0.0001804158880907891,
|
9612 |
+
"loss": 1.3289,
|
9613 |
+
"step": 68550
|
9614 |
+
},
|
9615 |
+
{
|
9616 |
+
"epoch": 2.048189173857224,
|
9617 |
+
"grad_norm": 3.8973612785339355,
|
9618 |
+
"learning_rate": 0.00018040160229487026,
|
9619 |
+
"loss": 1.2907,
|
9620 |
+
"step": 68600
|
9621 |
+
},
|
9622 |
+
{
|
9623 |
+
"epoch": 2.0496820231093063,
|
9624 |
+
"grad_norm": 8.648911476135254,
|
9625 |
+
"learning_rate": 0.00018038731649895143,
|
9626 |
+
"loss": 1.2488,
|
9627 |
+
"step": 68650
|
9628 |
+
},
|
9629 |
+
{
|
9630 |
+
"epoch": 2.051174872361389,
|
9631 |
+
"grad_norm": 4.553196430206299,
|
9632 |
+
"learning_rate": 0.0001803730307030326,
|
9633 |
+
"loss": 1.2489,
|
9634 |
+
"step": 68700
|
9635 |
+
},
|
9636 |
+
{
|
9637 |
+
"epoch": 2.0526677216134717,
|
9638 |
+
"grad_norm": 4.379563808441162,
|
9639 |
+
"learning_rate": 0.00018035874490711376,
|
9640 |
+
"loss": 1.253,
|
9641 |
+
"step": 68750
|
9642 |
+
},
|
9643 |
+
{
|
9644 |
+
"epoch": 2.054160570865554,
|
9645 |
+
"grad_norm": 6.393334865570068,
|
9646 |
+
"learning_rate": 0.00018034445911119492,
|
9647 |
+
"loss": 1.1783,
|
9648 |
+
"step": 68800
|
9649 |
+
},
|
9650 |
+
{
|
9651 |
+
"epoch": 2.0556534201176366,
|
9652 |
+
"grad_norm": 4.846323013305664,
|
9653 |
+
"learning_rate": 0.0001803301733152761,
|
9654 |
+
"loss": 1.2583,
|
9655 |
+
"step": 68850
|
9656 |
+
},
|
9657 |
+
{
|
9658 |
+
"epoch": 2.057146269369719,
|
9659 |
+
"grad_norm": 5.24686861038208,
|
9660 |
+
"learning_rate": 0.00018031588751935725,
|
9661 |
+
"loss": 1.333,
|
9662 |
+
"step": 68900
|
9663 |
+
},
|
9664 |
+
{
|
9665 |
+
"epoch": 2.0586391186218016,
|
9666 |
+
"grad_norm": 4.824219226837158,
|
9667 |
+
"learning_rate": 0.00018030160172343842,
|
9668 |
+
"loss": 1.2931,
|
9669 |
+
"step": 68950
|
9670 |
+
},
|
9671 |
+
{
|
9672 |
+
"epoch": 2.0601319678738843,
|
9673 |
+
"grad_norm": 5.001037120819092,
|
9674 |
+
"learning_rate": 0.0001802873159275196,
|
9675 |
+
"loss": 1.2625,
|
9676 |
+
"step": 69000
|
9677 |
+
},
|
9678 |
+
{
|
9679 |
+
"epoch": 2.0616248171259666,
|
9680 |
+
"grad_norm": 4.036411762237549,
|
9681 |
+
"learning_rate": 0.00018027303013160075,
|
9682 |
+
"loss": 1.2585,
|
9683 |
+
"step": 69050
|
9684 |
+
},
|
9685 |
+
{
|
9686 |
+
"epoch": 2.0631176663780493,
|
9687 |
+
"grad_norm": 4.597596645355225,
|
9688 |
+
"learning_rate": 0.00018025874433568194,
|
9689 |
+
"loss": 1.2812,
|
9690 |
+
"step": 69100
|
9691 |
+
},
|
9692 |
+
{
|
9693 |
+
"epoch": 2.0646105156301315,
|
9694 |
+
"grad_norm": 7.785471439361572,
|
9695 |
+
"learning_rate": 0.00018024445853976308,
|
9696 |
+
"loss": 1.2749,
|
9697 |
+
"step": 69150
|
9698 |
+
},
|
9699 |
+
{
|
9700 |
+
"epoch": 2.0661033648822142,
|
9701 |
+
"grad_norm": 4.905806541442871,
|
9702 |
+
"learning_rate": 0.00018023017274384427,
|
9703 |
+
"loss": 1.2619,
|
9704 |
+
"step": 69200
|
9705 |
+
},
|
9706 |
+
{
|
9707 |
+
"epoch": 2.0675962141342965,
|
9708 |
+
"grad_norm": 5.271987438201904,
|
9709 |
+
"learning_rate": 0.0001802158869479254,
|
9710 |
+
"loss": 1.2769,
|
9711 |
+
"step": 69250
|
9712 |
+
},
|
9713 |
+
{
|
9714 |
+
"epoch": 2.069089063386379,
|
9715 |
+
"grad_norm": 4.129353046417236,
|
9716 |
+
"learning_rate": 0.0001802016011520066,
|
9717 |
+
"loss": 1.2552,
|
9718 |
+
"step": 69300
|
9719 |
+
},
|
9720 |
+
{
|
9721 |
+
"epoch": 2.070581912638462,
|
9722 |
+
"grad_norm": 4.855686187744141,
|
9723 |
+
"learning_rate": 0.00018018731535608776,
|
9724 |
+
"loss": 1.2452,
|
9725 |
+
"step": 69350
|
9726 |
+
},
|
9727 |
+
{
|
9728 |
+
"epoch": 2.072074761890544,
|
9729 |
+
"grad_norm": 4.6511383056640625,
|
9730 |
+
"learning_rate": 0.00018017302956016893,
|
9731 |
+
"loss": 1.2529,
|
9732 |
+
"step": 69400
|
9733 |
+
},
|
9734 |
+
{
|
9735 |
+
"epoch": 2.073567611142627,
|
9736 |
+
"grad_norm": 4.4711995124816895,
|
9737 |
+
"learning_rate": 0.0001801587437642501,
|
9738 |
+
"loss": 1.2721,
|
9739 |
+
"step": 69450
|
9740 |
+
},
|
9741 |
+
{
|
9742 |
+
"epoch": 2.075060460394709,
|
9743 |
+
"grad_norm": 4.466591835021973,
|
9744 |
+
"learning_rate": 0.00018014445796833126,
|
9745 |
+
"loss": 1.2654,
|
9746 |
+
"step": 69500
|
9747 |
+
},
|
9748 |
+
{
|
9749 |
+
"epoch": 2.076553309646792,
|
9750 |
+
"grad_norm": 5.165157318115234,
|
9751 |
+
"learning_rate": 0.00018013017217241242,
|
9752 |
+
"loss": 1.2522,
|
9753 |
+
"step": 69550
|
9754 |
+
},
|
9755 |
+
{
|
9756 |
+
"epoch": 2.0780461588988746,
|
9757 |
+
"grad_norm": 4.762765407562256,
|
9758 |
+
"learning_rate": 0.00018011588637649358,
|
9759 |
+
"loss": 1.2318,
|
9760 |
+
"step": 69600
|
9761 |
+
},
|
9762 |
+
{
|
9763 |
+
"epoch": 2.079539008150957,
|
9764 |
+
"grad_norm": 5.024138927459717,
|
9765 |
+
"learning_rate": 0.00018010160058057475,
|
9766 |
+
"loss": 1.2713,
|
9767 |
+
"step": 69650
|
9768 |
+
},
|
9769 |
+
{
|
9770 |
+
"epoch": 2.0810318574030395,
|
9771 |
+
"grad_norm": 5.054821014404297,
|
9772 |
+
"learning_rate": 0.00018008731478465591,
|
9773 |
+
"loss": 1.2714,
|
9774 |
+
"step": 69700
|
9775 |
+
},
|
9776 |
+
{
|
9777 |
+
"epoch": 2.082524706655122,
|
9778 |
+
"grad_norm": 5.052605152130127,
|
9779 |
+
"learning_rate": 0.00018007302898873708,
|
9780 |
+
"loss": 1.2001,
|
9781 |
+
"step": 69750
|
9782 |
+
},
|
9783 |
+
{
|
9784 |
+
"epoch": 2.0840175559072045,
|
9785 |
+
"grad_norm": 6.121365547180176,
|
9786 |
+
"learning_rate": 0.00018005874319281827,
|
9787 |
+
"loss": 1.2372,
|
9788 |
+
"step": 69800
|
9789 |
+
},
|
9790 |
+
{
|
9791 |
+
"epoch": 2.085510405159287,
|
9792 |
+
"grad_norm": 4.6751627922058105,
|
9793 |
+
"learning_rate": 0.0001800444573968994,
|
9794 |
+
"loss": 1.2366,
|
9795 |
+
"step": 69850
|
9796 |
+
},
|
9797 |
+
{
|
9798 |
+
"epoch": 2.0870032544113695,
|
9799 |
+
"grad_norm": 4.396819591522217,
|
9800 |
+
"learning_rate": 0.0001800301716009806,
|
9801 |
+
"loss": 1.2878,
|
9802 |
+
"step": 69900
|
9803 |
+
},
|
9804 |
+
{
|
9805 |
+
"epoch": 2.088496103663452,
|
9806 |
+
"grad_norm": 5.096658229827881,
|
9807 |
+
"learning_rate": 0.00018001588580506174,
|
9808 |
+
"loss": 1.2265,
|
9809 |
+
"step": 69950
|
9810 |
+
},
|
9811 |
+
{
|
9812 |
+
"epoch": 2.0899889529155344,
|
9813 |
+
"grad_norm": 4.45212459564209,
|
9814 |
+
"learning_rate": 0.00018000160000914293,
|
9815 |
+
"loss": 1.2261,
|
9816 |
+
"step": 70000
|
9817 |
}
|
9818 |
],
|
9819 |
"logging_steps": 50,
|
|
|
9833 |
"attributes": {}
|
9834 |
}
|
9835 |
},
|
9836 |
+
"total_flos": 1.768650507627266e+18,
|
9837 |
"train_batch_size": 2,
|
9838 |
"trial_name": null,
|
9839 |
"trial_params": null
|