Training in progress, step 150000, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step150000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step150000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/trainer_state.json +1403 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 42002584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f1db9b1208f9aad02434d0348acb114ab75299a4b84a2cc335a78a803ac68ef
|
3 |
size 42002584
|
last-checkpoint/global_step150000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d12b485fde7668144e6de39c328ef0a4b570e877297005a7c45a0a1b0f5896a1
|
3 |
+
size 251710672
|
last-checkpoint/global_step150000/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a165a4c9ceea37c1f322a10f77892c96688cd765187aa5e3537721295b8a8d04
|
3 |
+
size 153747385
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step150000
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5d5cfcac3fb4961ec3341f47db9ffc87dd5261aebb586ceb5e4d6c5a8068b65
|
3 |
size 14244
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 4.
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -19614,6 +19614,1406 @@
|
|
19614 |
"learning_rate": 0.00016000148572277556,
|
19615 |
"loss": 1.2099,
|
19616 |
"step": 140000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19617 |
}
|
19618 |
],
|
19619 |
"logging_steps": 50,
|
@@ -19633,7 +21033,7 @@
|
|
19633 |
"attributes": {}
|
19634 |
}
|
19635 |
},
|
19636 |
-
"total_flos": 3.
|
19637 |
"train_batch_size": 2,
|
19638 |
"trial_name": null,
|
19639 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 4.478547756247574,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 150000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
19614 |
"learning_rate": 0.00016000148572277556,
|
19615 |
"loss": 1.2099,
|
19616 |
"step": 140000
|
19617 |
+
},
|
19618 |
+
{
|
19619 |
+
"epoch": 4.181470755083152,
|
19620 |
+
"grad_norm": 4.736166954040527,
|
19621 |
+
"learning_rate": 0.00015998719992685672,
|
19622 |
+
"loss": 1.2101,
|
19623 |
+
"step": 140050
|
19624 |
+
},
|
19625 |
+
{
|
19626 |
+
"epoch": 4.182963604335234,
|
19627 |
+
"grad_norm": 4.431340217590332,
|
19628 |
+
"learning_rate": 0.0001599729141309379,
|
19629 |
+
"loss": 1.1885,
|
19630 |
+
"step": 140100
|
19631 |
+
},
|
19632 |
+
{
|
19633 |
+
"epoch": 4.184456453587317,
|
19634 |
+
"grad_norm": 4.86411190032959,
|
19635 |
+
"learning_rate": 0.00015995862833501905,
|
19636 |
+
"loss": 1.2,
|
19637 |
+
"step": 140150
|
19638 |
+
},
|
19639 |
+
{
|
19640 |
+
"epoch": 4.1859493028394,
|
19641 |
+
"grad_norm": 4.4166951179504395,
|
19642 |
+
"learning_rate": 0.00015994434253910022,
|
19643 |
+
"loss": 1.1717,
|
19644 |
+
"step": 140200
|
19645 |
+
},
|
19646 |
+
{
|
19647 |
+
"epoch": 4.1874421520914815,
|
19648 |
+
"grad_norm": 4.2804856300354,
|
19649 |
+
"learning_rate": 0.0001599300567431814,
|
19650 |
+
"loss": 1.1695,
|
19651 |
+
"step": 140250
|
19652 |
+
},
|
19653 |
+
{
|
19654 |
+
"epoch": 4.188935001343564,
|
19655 |
+
"grad_norm": 3.3184804916381836,
|
19656 |
+
"learning_rate": 0.00015991577094726255,
|
19657 |
+
"loss": 1.1864,
|
19658 |
+
"step": 140300
|
19659 |
+
},
|
19660 |
+
{
|
19661 |
+
"epoch": 4.190427850595647,
|
19662 |
+
"grad_norm": 3.712420701980591,
|
19663 |
+
"learning_rate": 0.00015990148515134374,
|
19664 |
+
"loss": 1.2254,
|
19665 |
+
"step": 140350
|
19666 |
+
},
|
19667 |
+
{
|
19668 |
+
"epoch": 4.19192069984773,
|
19669 |
+
"grad_norm": 4.40815544128418,
|
19670 |
+
"learning_rate": 0.00015988719935542488,
|
19671 |
+
"loss": 1.1645,
|
19672 |
+
"step": 140400
|
19673 |
+
},
|
19674 |
+
{
|
19675 |
+
"epoch": 4.193413549099812,
|
19676 |
+
"grad_norm": 4.534137725830078,
|
19677 |
+
"learning_rate": 0.00015987291355950607,
|
19678 |
+
"loss": 1.2137,
|
19679 |
+
"step": 140450
|
19680 |
+
},
|
19681 |
+
{
|
19682 |
+
"epoch": 4.194906398351894,
|
19683 |
+
"grad_norm": 3.4060471057891846,
|
19684 |
+
"learning_rate": 0.00015985862776358723,
|
19685 |
+
"loss": 1.1804,
|
19686 |
+
"step": 140500
|
19687 |
+
},
|
19688 |
+
{
|
19689 |
+
"epoch": 4.196399247603977,
|
19690 |
+
"grad_norm": 3.3433594703674316,
|
19691 |
+
"learning_rate": 0.0001598443419676684,
|
19692 |
+
"loss": 1.1801,
|
19693 |
+
"step": 140550
|
19694 |
+
},
|
19695 |
+
{
|
19696 |
+
"epoch": 4.19789209685606,
|
19697 |
+
"grad_norm": 4.448024272918701,
|
19698 |
+
"learning_rate": 0.00015983005617174956,
|
19699 |
+
"loss": 1.2327,
|
19700 |
+
"step": 140600
|
19701 |
+
},
|
19702 |
+
{
|
19703 |
+
"epoch": 4.199384946108142,
|
19704 |
+
"grad_norm": 4.073503494262695,
|
19705 |
+
"learning_rate": 0.00015981577037583073,
|
19706 |
+
"loss": 1.2058,
|
19707 |
+
"step": 140650
|
19708 |
+
},
|
19709 |
+
{
|
19710 |
+
"epoch": 4.200877795360224,
|
19711 |
+
"grad_norm": 4.200987339019775,
|
19712 |
+
"learning_rate": 0.0001598014845799119,
|
19713 |
+
"loss": 1.166,
|
19714 |
+
"step": 140700
|
19715 |
+
},
|
19716 |
+
{
|
19717 |
+
"epoch": 4.202370644612307,
|
19718 |
+
"grad_norm": 6.871157169342041,
|
19719 |
+
"learning_rate": 0.00015978719878399306,
|
19720 |
+
"loss": 1.1599,
|
19721 |
+
"step": 140750
|
19722 |
+
},
|
19723 |
+
{
|
19724 |
+
"epoch": 4.2038634938643895,
|
19725 |
+
"grad_norm": 6.4246745109558105,
|
19726 |
+
"learning_rate": 0.00015977291298807422,
|
19727 |
+
"loss": 1.2119,
|
19728 |
+
"step": 140800
|
19729 |
+
},
|
19730 |
+
{
|
19731 |
+
"epoch": 4.205356343116472,
|
19732 |
+
"grad_norm": 4.552119731903076,
|
19733 |
+
"learning_rate": 0.0001597586271921554,
|
19734 |
+
"loss": 1.2171,
|
19735 |
+
"step": 140850
|
19736 |
+
},
|
19737 |
+
{
|
19738 |
+
"epoch": 4.206849192368555,
|
19739 |
+
"grad_norm": 4.876185894012451,
|
19740 |
+
"learning_rate": 0.00015974434139623655,
|
19741 |
+
"loss": 1.2087,
|
19742 |
+
"step": 140900
|
19743 |
+
},
|
19744 |
+
{
|
19745 |
+
"epoch": 4.208342041620637,
|
19746 |
+
"grad_norm": 5.435263633728027,
|
19747 |
+
"learning_rate": 0.00015973005560031774,
|
19748 |
+
"loss": 1.1805,
|
19749 |
+
"step": 140950
|
19750 |
+
},
|
19751 |
+
{
|
19752 |
+
"epoch": 4.2098348908727194,
|
19753 |
+
"grad_norm": 6.356777191162109,
|
19754 |
+
"learning_rate": 0.00015971576980439888,
|
19755 |
+
"loss": 1.1849,
|
19756 |
+
"step": 141000
|
19757 |
+
},
|
19758 |
+
{
|
19759 |
+
"epoch": 4.211327740124802,
|
19760 |
+
"grad_norm": 5.153776168823242,
|
19761 |
+
"learning_rate": 0.00015970148400848007,
|
19762 |
+
"loss": 1.1769,
|
19763 |
+
"step": 141050
|
19764 |
+
},
|
19765 |
+
{
|
19766 |
+
"epoch": 4.212820589376885,
|
19767 |
+
"grad_norm": 3.6175692081451416,
|
19768 |
+
"learning_rate": 0.0001596871982125612,
|
19769 |
+
"loss": 1.1857,
|
19770 |
+
"step": 141100
|
19771 |
+
},
|
19772 |
+
{
|
19773 |
+
"epoch": 4.214313438628968,
|
19774 |
+
"grad_norm": 3.5825202465057373,
|
19775 |
+
"learning_rate": 0.0001596729124166424,
|
19776 |
+
"loss": 1.2036,
|
19777 |
+
"step": 141150
|
19778 |
+
},
|
19779 |
+
{
|
19780 |
+
"epoch": 4.215806287881049,
|
19781 |
+
"grad_norm": 4.836036205291748,
|
19782 |
+
"learning_rate": 0.00015965862662072354,
|
19783 |
+
"loss": 1.2118,
|
19784 |
+
"step": 141200
|
19785 |
+
},
|
19786 |
+
{
|
19787 |
+
"epoch": 4.217299137133132,
|
19788 |
+
"grad_norm": 4.852596759796143,
|
19789 |
+
"learning_rate": 0.00015964434082480473,
|
19790 |
+
"loss": 1.1825,
|
19791 |
+
"step": 141250
|
19792 |
+
},
|
19793 |
+
{
|
19794 |
+
"epoch": 4.218791986385215,
|
19795 |
+
"grad_norm": 6.009531497955322,
|
19796 |
+
"learning_rate": 0.0001596300550288859,
|
19797 |
+
"loss": 1.2429,
|
19798 |
+
"step": 141300
|
19799 |
+
},
|
19800 |
+
{
|
19801 |
+
"epoch": 4.2202848356372975,
|
19802 |
+
"grad_norm": 4.5213727951049805,
|
19803 |
+
"learning_rate": 0.00015961576923296706,
|
19804 |
+
"loss": 1.1961,
|
19805 |
+
"step": 141350
|
19806 |
+
},
|
19807 |
+
{
|
19808 |
+
"epoch": 4.22177768488938,
|
19809 |
+
"grad_norm": 4.069146633148193,
|
19810 |
+
"learning_rate": 0.00015960148343704822,
|
19811 |
+
"loss": 1.214,
|
19812 |
+
"step": 141400
|
19813 |
+
},
|
19814 |
+
{
|
19815 |
+
"epoch": 4.223270534141462,
|
19816 |
+
"grad_norm": 4.735863208770752,
|
19817 |
+
"learning_rate": 0.0001595871976411294,
|
19818 |
+
"loss": 1.2156,
|
19819 |
+
"step": 141450
|
19820 |
+
},
|
19821 |
+
{
|
19822 |
+
"epoch": 4.224763383393545,
|
19823 |
+
"grad_norm": 4.679515361785889,
|
19824 |
+
"learning_rate": 0.00015957291184521055,
|
19825 |
+
"loss": 1.1919,
|
19826 |
+
"step": 141500
|
19827 |
+
},
|
19828 |
+
{
|
19829 |
+
"epoch": 4.226256232645627,
|
19830 |
+
"grad_norm": 4.299509048461914,
|
19831 |
+
"learning_rate": 0.00015955862604929172,
|
19832 |
+
"loss": 1.198,
|
19833 |
+
"step": 141550
|
19834 |
+
},
|
19835 |
+
{
|
19836 |
+
"epoch": 4.22774908189771,
|
19837 |
+
"grad_norm": 4.518818378448486,
|
19838 |
+
"learning_rate": 0.00015954434025337288,
|
19839 |
+
"loss": 1.1918,
|
19840 |
+
"step": 141600
|
19841 |
+
},
|
19842 |
+
{
|
19843 |
+
"epoch": 4.229241931149793,
|
19844 |
+
"grad_norm": 2.8362114429473877,
|
19845 |
+
"learning_rate": 0.00015953005445745405,
|
19846 |
+
"loss": 1.1891,
|
19847 |
+
"step": 141650
|
19848 |
+
},
|
19849 |
+
{
|
19850 |
+
"epoch": 4.230734780401875,
|
19851 |
+
"grad_norm": 5.10905647277832,
|
19852 |
+
"learning_rate": 0.0001595157686615352,
|
19853 |
+
"loss": 1.2033,
|
19854 |
+
"step": 141700
|
19855 |
+
},
|
19856 |
+
{
|
19857 |
+
"epoch": 4.232227629653957,
|
19858 |
+
"grad_norm": 5.38252592086792,
|
19859 |
+
"learning_rate": 0.0001595014828656164,
|
19860 |
+
"loss": 1.2235,
|
19861 |
+
"step": 141750
|
19862 |
+
},
|
19863 |
+
{
|
19864 |
+
"epoch": 4.23372047890604,
|
19865 |
+
"grad_norm": 4.693549156188965,
|
19866 |
+
"learning_rate": 0.00015948719706969754,
|
19867 |
+
"loss": 1.2876,
|
19868 |
+
"step": 141800
|
19869 |
+
},
|
19870 |
+
{
|
19871 |
+
"epoch": 4.235213328158123,
|
19872 |
+
"grad_norm": 5.683078289031982,
|
19873 |
+
"learning_rate": 0.00015947291127377873,
|
19874 |
+
"loss": 1.175,
|
19875 |
+
"step": 141850
|
19876 |
+
},
|
19877 |
+
{
|
19878 |
+
"epoch": 4.2367061774102055,
|
19879 |
+
"grad_norm": 4.098508358001709,
|
19880 |
+
"learning_rate": 0.00015945862547785987,
|
19881 |
+
"loss": 1.2489,
|
19882 |
+
"step": 141900
|
19883 |
+
},
|
19884 |
+
{
|
19885 |
+
"epoch": 4.238199026662287,
|
19886 |
+
"grad_norm": 4.055385589599609,
|
19887 |
+
"learning_rate": 0.00015944433968194106,
|
19888 |
+
"loss": 1.1783,
|
19889 |
+
"step": 141950
|
19890 |
+
},
|
19891 |
+
{
|
19892 |
+
"epoch": 4.23969187591437,
|
19893 |
+
"grad_norm": 4.988245010375977,
|
19894 |
+
"learning_rate": 0.0001594300538860222,
|
19895 |
+
"loss": 1.1549,
|
19896 |
+
"step": 142000
|
19897 |
+
},
|
19898 |
+
{
|
19899 |
+
"epoch": 4.241184725166453,
|
19900 |
+
"grad_norm": 4.256673812866211,
|
19901 |
+
"learning_rate": 0.0001594157680901034,
|
19902 |
+
"loss": 1.2084,
|
19903 |
+
"step": 142050
|
19904 |
+
},
|
19905 |
+
{
|
19906 |
+
"epoch": 4.242677574418535,
|
19907 |
+
"grad_norm": 5.259324550628662,
|
19908 |
+
"learning_rate": 0.00015940148229418456,
|
19909 |
+
"loss": 1.1815,
|
19910 |
+
"step": 142100
|
19911 |
+
},
|
19912 |
+
{
|
19913 |
+
"epoch": 4.244170423670618,
|
19914 |
+
"grad_norm": 5.063663005828857,
|
19915 |
+
"learning_rate": 0.00015938719649826572,
|
19916 |
+
"loss": 1.1878,
|
19917 |
+
"step": 142150
|
19918 |
+
},
|
19919 |
+
{
|
19920 |
+
"epoch": 4.2456632729227,
|
19921 |
+
"grad_norm": 3.863372325897217,
|
19922 |
+
"learning_rate": 0.00015937291070234689,
|
19923 |
+
"loss": 1.1603,
|
19924 |
+
"step": 142200
|
19925 |
+
},
|
19926 |
+
{
|
19927 |
+
"epoch": 4.247156122174783,
|
19928 |
+
"grad_norm": 4.990203380584717,
|
19929 |
+
"learning_rate": 0.00015935862490642802,
|
19930 |
+
"loss": 1.2208,
|
19931 |
+
"step": 142250
|
19932 |
+
},
|
19933 |
+
{
|
19934 |
+
"epoch": 4.248648971426865,
|
19935 |
+
"grad_norm": 3.7190535068511963,
|
19936 |
+
"learning_rate": 0.00015934433911050922,
|
19937 |
+
"loss": 1.2135,
|
19938 |
+
"step": 142300
|
19939 |
+
},
|
19940 |
+
{
|
19941 |
+
"epoch": 4.250141820678948,
|
19942 |
+
"grad_norm": 6.787582874298096,
|
19943 |
+
"learning_rate": 0.00015933005331459035,
|
19944 |
+
"loss": 1.1978,
|
19945 |
+
"step": 142350
|
19946 |
+
},
|
19947 |
+
{
|
19948 |
+
"epoch": 4.251634669931031,
|
19949 |
+
"grad_norm": 4.888009548187256,
|
19950 |
+
"learning_rate": 0.00015931576751867155,
|
19951 |
+
"loss": 1.2557,
|
19952 |
+
"step": 142400
|
19953 |
+
},
|
19954 |
+
{
|
19955 |
+
"epoch": 4.253127519183113,
|
19956 |
+
"grad_norm": 4.288732528686523,
|
19957 |
+
"learning_rate": 0.0001593014817227527,
|
19958 |
+
"loss": 1.2057,
|
19959 |
+
"step": 142450
|
19960 |
+
},
|
19961 |
+
{
|
19962 |
+
"epoch": 4.254620368435195,
|
19963 |
+
"grad_norm": 4.282674789428711,
|
19964 |
+
"learning_rate": 0.00015928719592683387,
|
19965 |
+
"loss": 1.1783,
|
19966 |
+
"step": 142500
|
19967 |
+
},
|
19968 |
+
{
|
19969 |
+
"epoch": 4.256113217687278,
|
19970 |
+
"grad_norm": 4.2092719078063965,
|
19971 |
+
"learning_rate": 0.00015927291013091504,
|
19972 |
+
"loss": 1.214,
|
19973 |
+
"step": 142550
|
19974 |
+
},
|
19975 |
+
{
|
19976 |
+
"epoch": 4.257606066939361,
|
19977 |
+
"grad_norm": 3.5486562252044678,
|
19978 |
+
"learning_rate": 0.0001592586243349962,
|
19979 |
+
"loss": 1.1579,
|
19980 |
+
"step": 142600
|
19981 |
+
},
|
19982 |
+
{
|
19983 |
+
"epoch": 4.2590989161914425,
|
19984 |
+
"grad_norm": 4.74271821975708,
|
19985 |
+
"learning_rate": 0.00015924433853907737,
|
19986 |
+
"loss": 1.145,
|
19987 |
+
"step": 142650
|
19988 |
+
},
|
19989 |
+
{
|
19990 |
+
"epoch": 4.260591765443525,
|
19991 |
+
"grad_norm": 5.327475070953369,
|
19992 |
+
"learning_rate": 0.00015923005274315853,
|
19993 |
+
"loss": 1.1631,
|
19994 |
+
"step": 142700
|
19995 |
+
},
|
19996 |
+
{
|
19997 |
+
"epoch": 4.262084614695608,
|
19998 |
+
"grad_norm": 4.441630840301514,
|
19999 |
+
"learning_rate": 0.0001592157669472397,
|
20000 |
+
"loss": 1.1797,
|
20001 |
+
"step": 142750
|
20002 |
+
},
|
20003 |
+
{
|
20004 |
+
"epoch": 4.263577463947691,
|
20005 |
+
"grad_norm": 4.510640621185303,
|
20006 |
+
"learning_rate": 0.00015920148115132086,
|
20007 |
+
"loss": 1.1697,
|
20008 |
+
"step": 142800
|
20009 |
+
},
|
20010 |
+
{
|
20011 |
+
"epoch": 4.265070313199773,
|
20012 |
+
"grad_norm": 4.31403112411499,
|
20013 |
+
"learning_rate": 0.00015918719535540203,
|
20014 |
+
"loss": 1.2071,
|
20015 |
+
"step": 142850
|
20016 |
+
},
|
20017 |
+
{
|
20018 |
+
"epoch": 4.266563162451856,
|
20019 |
+
"grad_norm": 4.649517059326172,
|
20020 |
+
"learning_rate": 0.00015917290955948322,
|
20021 |
+
"loss": 1.1932,
|
20022 |
+
"step": 142900
|
20023 |
+
},
|
20024 |
+
{
|
20025 |
+
"epoch": 4.268056011703938,
|
20026 |
+
"grad_norm": 3.873218059539795,
|
20027 |
+
"learning_rate": 0.00015915862376356436,
|
20028 |
+
"loss": 1.1632,
|
20029 |
+
"step": 142950
|
20030 |
+
},
|
20031 |
+
{
|
20032 |
+
"epoch": 4.269548860956021,
|
20033 |
+
"grad_norm": 4.308788776397705,
|
20034 |
+
"learning_rate": 0.00015914433796764555,
|
20035 |
+
"loss": 1.2257,
|
20036 |
+
"step": 143000
|
20037 |
+
},
|
20038 |
+
{
|
20039 |
+
"epoch": 4.271041710208103,
|
20040 |
+
"grad_norm": 5.109141826629639,
|
20041 |
+
"learning_rate": 0.00015913005217172669,
|
20042 |
+
"loss": 1.2442,
|
20043 |
+
"step": 143050
|
20044 |
+
},
|
20045 |
+
{
|
20046 |
+
"epoch": 4.272534559460186,
|
20047 |
+
"grad_norm": 3.967876434326172,
|
20048 |
+
"learning_rate": 0.00015911576637580788,
|
20049 |
+
"loss": 1.2319,
|
20050 |
+
"step": 143100
|
20051 |
+
},
|
20052 |
+
{
|
20053 |
+
"epoch": 4.274027408712268,
|
20054 |
+
"grad_norm": 5.368978500366211,
|
20055 |
+
"learning_rate": 0.00015910148057988904,
|
20056 |
+
"loss": 1.1588,
|
20057 |
+
"step": 143150
|
20058 |
+
},
|
20059 |
+
{
|
20060 |
+
"epoch": 4.2755202579643505,
|
20061 |
+
"grad_norm": 5.190979480743408,
|
20062 |
+
"learning_rate": 0.0001590871947839702,
|
20063 |
+
"loss": 1.2443,
|
20064 |
+
"step": 143200
|
20065 |
+
},
|
20066 |
+
{
|
20067 |
+
"epoch": 4.277013107216433,
|
20068 |
+
"grad_norm": 3.6689977645874023,
|
20069 |
+
"learning_rate": 0.00015907290898805137,
|
20070 |
+
"loss": 1.2022,
|
20071 |
+
"step": 143250
|
20072 |
+
},
|
20073 |
+
{
|
20074 |
+
"epoch": 4.278505956468516,
|
20075 |
+
"grad_norm": 4.946050643920898,
|
20076 |
+
"learning_rate": 0.00015905862319213254,
|
20077 |
+
"loss": 1.2022,
|
20078 |
+
"step": 143300
|
20079 |
+
},
|
20080 |
+
{
|
20081 |
+
"epoch": 4.279998805720599,
|
20082 |
+
"grad_norm": 3.930406093597412,
|
20083 |
+
"learning_rate": 0.0001590443373962137,
|
20084 |
+
"loss": 1.2069,
|
20085 |
+
"step": 143350
|
20086 |
+
},
|
20087 |
+
{
|
20088 |
+
"epoch": 4.2814916549726805,
|
20089 |
+
"grad_norm": 4.970086574554443,
|
20090 |
+
"learning_rate": 0.00015903005160029487,
|
20091 |
+
"loss": 1.1877,
|
20092 |
+
"step": 143400
|
20093 |
+
},
|
20094 |
+
{
|
20095 |
+
"epoch": 4.282984504224763,
|
20096 |
+
"grad_norm": 4.933574199676514,
|
20097 |
+
"learning_rate": 0.00015901576580437603,
|
20098 |
+
"loss": 1.2037,
|
20099 |
+
"step": 143450
|
20100 |
+
},
|
20101 |
+
{
|
20102 |
+
"epoch": 4.284477353476846,
|
20103 |
+
"grad_norm": 5.583881378173828,
|
20104 |
+
"learning_rate": 0.0001590014800084572,
|
20105 |
+
"loss": 1.2082,
|
20106 |
+
"step": 143500
|
20107 |
+
},
|
20108 |
+
{
|
20109 |
+
"epoch": 4.285970202728929,
|
20110 |
+
"grad_norm": 3.963679552078247,
|
20111 |
+
"learning_rate": 0.00015898719421253836,
|
20112 |
+
"loss": 1.176,
|
20113 |
+
"step": 143550
|
20114 |
+
},
|
20115 |
+
{
|
20116 |
+
"epoch": 4.287463051981011,
|
20117 |
+
"grad_norm": 3.982883930206299,
|
20118 |
+
"learning_rate": 0.00015897290841661952,
|
20119 |
+
"loss": 1.2513,
|
20120 |
+
"step": 143600
|
20121 |
+
},
|
20122 |
+
{
|
20123 |
+
"epoch": 4.288955901233093,
|
20124 |
+
"grad_norm": 3.9288153648376465,
|
20125 |
+
"learning_rate": 0.0001589586226207007,
|
20126 |
+
"loss": 1.187,
|
20127 |
+
"step": 143650
|
20128 |
+
},
|
20129 |
+
{
|
20130 |
+
"epoch": 4.290448750485176,
|
20131 |
+
"grad_norm": 5.109147071838379,
|
20132 |
+
"learning_rate": 0.00015894433682478188,
|
20133 |
+
"loss": 1.2322,
|
20134 |
+
"step": 143700
|
20135 |
+
},
|
20136 |
+
{
|
20137 |
+
"epoch": 4.2919415997372585,
|
20138 |
+
"grad_norm": 7.246605396270752,
|
20139 |
+
"learning_rate": 0.00015893005102886302,
|
20140 |
+
"loss": 1.1822,
|
20141 |
+
"step": 143750
|
20142 |
+
},
|
20143 |
+
{
|
20144 |
+
"epoch": 4.293434448989341,
|
20145 |
+
"grad_norm": 4.197552680969238,
|
20146 |
+
"learning_rate": 0.0001589157652329442,
|
20147 |
+
"loss": 1.2281,
|
20148 |
+
"step": 143800
|
20149 |
+
},
|
20150 |
+
{
|
20151 |
+
"epoch": 4.294927298241424,
|
20152 |
+
"grad_norm": 6.113961696624756,
|
20153 |
+
"learning_rate": 0.00015890147943702535,
|
20154 |
+
"loss": 1.1814,
|
20155 |
+
"step": 143850
|
20156 |
+
},
|
20157 |
+
{
|
20158 |
+
"epoch": 4.296420147493506,
|
20159 |
+
"grad_norm": 6.120628356933594,
|
20160 |
+
"learning_rate": 0.00015888719364110654,
|
20161 |
+
"loss": 1.1497,
|
20162 |
+
"step": 143900
|
20163 |
+
},
|
20164 |
+
{
|
20165 |
+
"epoch": 4.2979129967455885,
|
20166 |
+
"grad_norm": 5.152296543121338,
|
20167 |
+
"learning_rate": 0.0001588729078451877,
|
20168 |
+
"loss": 1.2277,
|
20169 |
+
"step": 143950
|
20170 |
+
},
|
20171 |
+
{
|
20172 |
+
"epoch": 4.299405845997671,
|
20173 |
+
"grad_norm": 4.7058000564575195,
|
20174 |
+
"learning_rate": 0.00015885862204926887,
|
20175 |
+
"loss": 1.1586,
|
20176 |
+
"step": 144000
|
20177 |
+
},
|
20178 |
+
{
|
20179 |
+
"epoch": 4.300898695249754,
|
20180 |
+
"grad_norm": 4.757035732269287,
|
20181 |
+
"learning_rate": 0.00015884433625335003,
|
20182 |
+
"loss": 1.1701,
|
20183 |
+
"step": 144050
|
20184 |
+
},
|
20185 |
+
{
|
20186 |
+
"epoch": 4.302391544501837,
|
20187 |
+
"grad_norm": 5.381356239318848,
|
20188 |
+
"learning_rate": 0.0001588300504574312,
|
20189 |
+
"loss": 1.2242,
|
20190 |
+
"step": 144100
|
20191 |
+
},
|
20192 |
+
{
|
20193 |
+
"epoch": 4.303884393753918,
|
20194 |
+
"grad_norm": 4.9179511070251465,
|
20195 |
+
"learning_rate": 0.00015881576466151236,
|
20196 |
+
"loss": 1.2313,
|
20197 |
+
"step": 144150
|
20198 |
+
},
|
20199 |
+
{
|
20200 |
+
"epoch": 4.305377243006001,
|
20201 |
+
"grad_norm": 4.601654529571533,
|
20202 |
+
"learning_rate": 0.00015880147886559353,
|
20203 |
+
"loss": 1.2054,
|
20204 |
+
"step": 144200
|
20205 |
+
},
|
20206 |
+
{
|
20207 |
+
"epoch": 4.306870092258084,
|
20208 |
+
"grad_norm": 4.032548904418945,
|
20209 |
+
"learning_rate": 0.0001587871930696747,
|
20210 |
+
"loss": 1.2266,
|
20211 |
+
"step": 144250
|
20212 |
+
},
|
20213 |
+
{
|
20214 |
+
"epoch": 4.3083629415101665,
|
20215 |
+
"grad_norm": 4.067842960357666,
|
20216 |
+
"learning_rate": 0.00015877290727375586,
|
20217 |
+
"loss": 1.1795,
|
20218 |
+
"step": 144300
|
20219 |
+
},
|
20220 |
+
{
|
20221 |
+
"epoch": 4.309855790762249,
|
20222 |
+
"grad_norm": 4.99582576751709,
|
20223 |
+
"learning_rate": 0.00015875862147783702,
|
20224 |
+
"loss": 1.2216,
|
20225 |
+
"step": 144350
|
20226 |
+
},
|
20227 |
+
{
|
20228 |
+
"epoch": 4.311348640014331,
|
20229 |
+
"grad_norm": 6.543945789337158,
|
20230 |
+
"learning_rate": 0.0001587443356819182,
|
20231 |
+
"loss": 1.1862,
|
20232 |
+
"step": 144400
|
20233 |
+
},
|
20234 |
+
{
|
20235 |
+
"epoch": 4.312841489266414,
|
20236 |
+
"grad_norm": 4.526428699493408,
|
20237 |
+
"learning_rate": 0.00015873004988599935,
|
20238 |
+
"loss": 1.1575,
|
20239 |
+
"step": 144450
|
20240 |
+
},
|
20241 |
+
{
|
20242 |
+
"epoch": 4.3143343385184965,
|
20243 |
+
"grad_norm": 5.83212947845459,
|
20244 |
+
"learning_rate": 0.00015871576409008054,
|
20245 |
+
"loss": 1.2166,
|
20246 |
+
"step": 144500
|
20247 |
+
},
|
20248 |
+
{
|
20249 |
+
"epoch": 4.315827187770579,
|
20250 |
+
"grad_norm": 3.717582941055298,
|
20251 |
+
"learning_rate": 0.00015870147829416168,
|
20252 |
+
"loss": 1.2081,
|
20253 |
+
"step": 144550
|
20254 |
+
},
|
20255 |
+
{
|
20256 |
+
"epoch": 4.317320037022661,
|
20257 |
+
"grad_norm": 4.536131381988525,
|
20258 |
+
"learning_rate": 0.00015868719249824287,
|
20259 |
+
"loss": 1.2142,
|
20260 |
+
"step": 144600
|
20261 |
+
},
|
20262 |
+
{
|
20263 |
+
"epoch": 4.318812886274744,
|
20264 |
+
"grad_norm": 4.8904290199279785,
|
20265 |
+
"learning_rate": 0.000158672906702324,
|
20266 |
+
"loss": 1.2321,
|
20267 |
+
"step": 144650
|
20268 |
+
},
|
20269 |
+
{
|
20270 |
+
"epoch": 4.320305735526826,
|
20271 |
+
"grad_norm": 3.6146597862243652,
|
20272 |
+
"learning_rate": 0.0001586586209064052,
|
20273 |
+
"loss": 1.1443,
|
20274 |
+
"step": 144700
|
20275 |
+
},
|
20276 |
+
{
|
20277 |
+
"epoch": 4.321798584778909,
|
20278 |
+
"grad_norm": 4.939590930938721,
|
20279 |
+
"learning_rate": 0.00015864433511048637,
|
20280 |
+
"loss": 1.2164,
|
20281 |
+
"step": 144750
|
20282 |
+
},
|
20283 |
+
{
|
20284 |
+
"epoch": 4.323291434030992,
|
20285 |
+
"grad_norm": 3.37874698638916,
|
20286 |
+
"learning_rate": 0.00015863004931456753,
|
20287 |
+
"loss": 1.2104,
|
20288 |
+
"step": 144800
|
20289 |
+
},
|
20290 |
+
{
|
20291 |
+
"epoch": 4.3247842832830745,
|
20292 |
+
"grad_norm": 4.682936191558838,
|
20293 |
+
"learning_rate": 0.0001586157635186487,
|
20294 |
+
"loss": 1.2236,
|
20295 |
+
"step": 144850
|
20296 |
+
},
|
20297 |
+
{
|
20298 |
+
"epoch": 4.326277132535156,
|
20299 |
+
"grad_norm": 3.446786880493164,
|
20300 |
+
"learning_rate": 0.00015860147772272983,
|
20301 |
+
"loss": 1.1452,
|
20302 |
+
"step": 144900
|
20303 |
+
},
|
20304 |
+
{
|
20305 |
+
"epoch": 4.327769981787239,
|
20306 |
+
"grad_norm": 3.864423990249634,
|
20307 |
+
"learning_rate": 0.00015858719192681102,
|
20308 |
+
"loss": 1.1497,
|
20309 |
+
"step": 144950
|
20310 |
+
},
|
20311 |
+
{
|
20312 |
+
"epoch": 4.329262831039322,
|
20313 |
+
"grad_norm": 5.804245471954346,
|
20314 |
+
"learning_rate": 0.00015857290613089216,
|
20315 |
+
"loss": 1.2297,
|
20316 |
+
"step": 145000
|
20317 |
+
},
|
20318 |
+
{
|
20319 |
+
"epoch": 4.3307556802914045,
|
20320 |
+
"grad_norm": 6.271811485290527,
|
20321 |
+
"learning_rate": 0.00015855862033497335,
|
20322 |
+
"loss": 1.1358,
|
20323 |
+
"step": 145050
|
20324 |
+
},
|
20325 |
+
{
|
20326 |
+
"epoch": 4.332248529543486,
|
20327 |
+
"grad_norm": 5.019343852996826,
|
20328 |
+
"learning_rate": 0.00015854433453905452,
|
20329 |
+
"loss": 1.2191,
|
20330 |
+
"step": 145100
|
20331 |
+
},
|
20332 |
+
{
|
20333 |
+
"epoch": 4.333741378795569,
|
20334 |
+
"grad_norm": 7.10085916519165,
|
20335 |
+
"learning_rate": 0.00015853004874313568,
|
20336 |
+
"loss": 1.1678,
|
20337 |
+
"step": 145150
|
20338 |
+
},
|
20339 |
+
{
|
20340 |
+
"epoch": 4.335234228047652,
|
20341 |
+
"grad_norm": 4.680956840515137,
|
20342 |
+
"learning_rate": 0.00015851576294721685,
|
20343 |
+
"loss": 1.155,
|
20344 |
+
"step": 145200
|
20345 |
+
},
|
20346 |
+
{
|
20347 |
+
"epoch": 4.336727077299734,
|
20348 |
+
"grad_norm": 4.227858066558838,
|
20349 |
+
"learning_rate": 0.000158501477151298,
|
20350 |
+
"loss": 1.1563,
|
20351 |
+
"step": 145250
|
20352 |
+
},
|
20353 |
+
{
|
20354 |
+
"epoch": 4.338219926551817,
|
20355 |
+
"grad_norm": 6.304920196533203,
|
20356 |
+
"learning_rate": 0.00015848719135537918,
|
20357 |
+
"loss": 1.2083,
|
20358 |
+
"step": 145300
|
20359 |
+
},
|
20360 |
+
{
|
20361 |
+
"epoch": 4.339712775803899,
|
20362 |
+
"grad_norm": 3.7268805503845215,
|
20363 |
+
"learning_rate": 0.00015847290555946034,
|
20364 |
+
"loss": 1.2171,
|
20365 |
+
"step": 145350
|
20366 |
+
},
|
20367 |
+
{
|
20368 |
+
"epoch": 4.341205625055982,
|
20369 |
+
"grad_norm": 3.7859396934509277,
|
20370 |
+
"learning_rate": 0.0001584586197635415,
|
20371 |
+
"loss": 1.198,
|
20372 |
+
"step": 145400
|
20373 |
+
},
|
20374 |
+
{
|
20375 |
+
"epoch": 4.342698474308064,
|
20376 |
+
"grad_norm": 4.7007246017456055,
|
20377 |
+
"learning_rate": 0.00015844433396762267,
|
20378 |
+
"loss": 1.2418,
|
20379 |
+
"step": 145450
|
20380 |
+
},
|
20381 |
+
{
|
20382 |
+
"epoch": 4.344191323560147,
|
20383 |
+
"grad_norm": 4.652912616729736,
|
20384 |
+
"learning_rate": 0.00015843004817170383,
|
20385 |
+
"loss": 1.2408,
|
20386 |
+
"step": 145500
|
20387 |
+
},
|
20388 |
+
{
|
20389 |
+
"epoch": 4.34568417281223,
|
20390 |
+
"grad_norm": 5.060214519500732,
|
20391 |
+
"learning_rate": 0.00015841576237578503,
|
20392 |
+
"loss": 1.1922,
|
20393 |
+
"step": 145550
|
20394 |
+
},
|
20395 |
+
{
|
20396 |
+
"epoch": 4.347177022064312,
|
20397 |
+
"grad_norm": 4.346338272094727,
|
20398 |
+
"learning_rate": 0.00015840147657986616,
|
20399 |
+
"loss": 1.223,
|
20400 |
+
"step": 145600
|
20401 |
+
},
|
20402 |
+
{
|
20403 |
+
"epoch": 4.348669871316394,
|
20404 |
+
"grad_norm": 3.7870049476623535,
|
20405 |
+
"learning_rate": 0.00015838719078394736,
|
20406 |
+
"loss": 1.205,
|
20407 |
+
"step": 145650
|
20408 |
+
},
|
20409 |
+
{
|
20410 |
+
"epoch": 4.350162720568477,
|
20411 |
+
"grad_norm": 4.397316932678223,
|
20412 |
+
"learning_rate": 0.0001583729049880285,
|
20413 |
+
"loss": 1.1928,
|
20414 |
+
"step": 145700
|
20415 |
+
},
|
20416 |
+
{
|
20417 |
+
"epoch": 4.35165556982056,
|
20418 |
+
"grad_norm": 4.058841228485107,
|
20419 |
+
"learning_rate": 0.00015835861919210969,
|
20420 |
+
"loss": 1.2002,
|
20421 |
+
"step": 145750
|
20422 |
+
},
|
20423 |
+
{
|
20424 |
+
"epoch": 4.353148419072642,
|
20425 |
+
"grad_norm": 4.240140914916992,
|
20426 |
+
"learning_rate": 0.00015834433339619082,
|
20427 |
+
"loss": 1.1746,
|
20428 |
+
"step": 145800
|
20429 |
+
},
|
20430 |
+
{
|
20431 |
+
"epoch": 4.354641268324724,
|
20432 |
+
"grad_norm": 4.605095863342285,
|
20433 |
+
"learning_rate": 0.00015833004760027201,
|
20434 |
+
"loss": 1.1907,
|
20435 |
+
"step": 145850
|
20436 |
+
},
|
20437 |
+
{
|
20438 |
+
"epoch": 4.356134117576807,
|
20439 |
+
"grad_norm": 4.193150997161865,
|
20440 |
+
"learning_rate": 0.00015831576180435318,
|
20441 |
+
"loss": 1.2754,
|
20442 |
+
"step": 145900
|
20443 |
+
},
|
20444 |
+
{
|
20445 |
+
"epoch": 4.35762696682889,
|
20446 |
+
"grad_norm": 4.516674995422363,
|
20447 |
+
"learning_rate": 0.00015830147600843434,
|
20448 |
+
"loss": 1.2,
|
20449 |
+
"step": 145950
|
20450 |
+
},
|
20451 |
+
{
|
20452 |
+
"epoch": 4.359119816080972,
|
20453 |
+
"grad_norm": 4.528346538543701,
|
20454 |
+
"learning_rate": 0.0001582871902125155,
|
20455 |
+
"loss": 1.2143,
|
20456 |
+
"step": 146000
|
20457 |
+
},
|
20458 |
+
{
|
20459 |
+
"epoch": 4.360612665333055,
|
20460 |
+
"grad_norm": 4.620771408081055,
|
20461 |
+
"learning_rate": 0.00015827290441659667,
|
20462 |
+
"loss": 1.2008,
|
20463 |
+
"step": 146050
|
20464 |
+
},
|
20465 |
+
{
|
20466 |
+
"epoch": 4.362105514585137,
|
20467 |
+
"grad_norm": 5.53831148147583,
|
20468 |
+
"learning_rate": 0.00015825861862067784,
|
20469 |
+
"loss": 1.1736,
|
20470 |
+
"step": 146100
|
20471 |
+
},
|
20472 |
+
{
|
20473 |
+
"epoch": 4.3635983638372196,
|
20474 |
+
"grad_norm": 5.173369884490967,
|
20475 |
+
"learning_rate": 0.000158244332824759,
|
20476 |
+
"loss": 1.2242,
|
20477 |
+
"step": 146150
|
20478 |
+
},
|
20479 |
+
{
|
20480 |
+
"epoch": 4.365091213089302,
|
20481 |
+
"grad_norm": 4.372010231018066,
|
20482 |
+
"learning_rate": 0.00015823004702884017,
|
20483 |
+
"loss": 1.1915,
|
20484 |
+
"step": 146200
|
20485 |
+
},
|
20486 |
+
{
|
20487 |
+
"epoch": 4.366584062341385,
|
20488 |
+
"grad_norm": 6.224375247955322,
|
20489 |
+
"learning_rate": 0.00015821576123292133,
|
20490 |
+
"loss": 1.2133,
|
20491 |
+
"step": 146250
|
20492 |
+
},
|
20493 |
+
{
|
20494 |
+
"epoch": 4.368076911593468,
|
20495 |
+
"grad_norm": 8.175760269165039,
|
20496 |
+
"learning_rate": 0.0001582014754370025,
|
20497 |
+
"loss": 1.208,
|
20498 |
+
"step": 146300
|
20499 |
+
},
|
20500 |
+
{
|
20501 |
+
"epoch": 4.3695697608455495,
|
20502 |
+
"grad_norm": 4.286147594451904,
|
20503 |
+
"learning_rate": 0.0001581871896410837,
|
20504 |
+
"loss": 1.2283,
|
20505 |
+
"step": 146350
|
20506 |
+
},
|
20507 |
+
{
|
20508 |
+
"epoch": 4.371062610097632,
|
20509 |
+
"grad_norm": 5.579278945922852,
|
20510 |
+
"learning_rate": 0.00015817290384516483,
|
20511 |
+
"loss": 1.1817,
|
20512 |
+
"step": 146400
|
20513 |
+
},
|
20514 |
+
{
|
20515 |
+
"epoch": 4.372555459349715,
|
20516 |
+
"grad_norm": 3.485745668411255,
|
20517 |
+
"learning_rate": 0.00015815861804924602,
|
20518 |
+
"loss": 1.2079,
|
20519 |
+
"step": 146450
|
20520 |
+
},
|
20521 |
+
{
|
20522 |
+
"epoch": 4.374048308601798,
|
20523 |
+
"grad_norm": 5.126974582672119,
|
20524 |
+
"learning_rate": 0.00015814433225332716,
|
20525 |
+
"loss": 1.1943,
|
20526 |
+
"step": 146500
|
20527 |
+
},
|
20528 |
+
{
|
20529 |
+
"epoch": 4.37554115785388,
|
20530 |
+
"grad_norm": 5.43192720413208,
|
20531 |
+
"learning_rate": 0.00015813004645740835,
|
20532 |
+
"loss": 1.1759,
|
20533 |
+
"step": 146550
|
20534 |
+
},
|
20535 |
+
{
|
20536 |
+
"epoch": 4.377034007105962,
|
20537 |
+
"grad_norm": 5.335154056549072,
|
20538 |
+
"learning_rate": 0.0001581157606614895,
|
20539 |
+
"loss": 1.2406,
|
20540 |
+
"step": 146600
|
20541 |
+
},
|
20542 |
+
{
|
20543 |
+
"epoch": 4.378526856358045,
|
20544 |
+
"grad_norm": 4.4820756912231445,
|
20545 |
+
"learning_rate": 0.00015810147486557068,
|
20546 |
+
"loss": 1.2259,
|
20547 |
+
"step": 146650
|
20548 |
+
},
|
20549 |
+
{
|
20550 |
+
"epoch": 4.3800197056101275,
|
20551 |
+
"grad_norm": 4.541141986846924,
|
20552 |
+
"learning_rate": 0.00015808718906965184,
|
20553 |
+
"loss": 1.2041,
|
20554 |
+
"step": 146700
|
20555 |
+
},
|
20556 |
+
{
|
20557 |
+
"epoch": 4.38151255486221,
|
20558 |
+
"grad_norm": 5.367811679840088,
|
20559 |
+
"learning_rate": 0.000158072903273733,
|
20560 |
+
"loss": 1.1894,
|
20561 |
+
"step": 146750
|
20562 |
+
},
|
20563 |
+
{
|
20564 |
+
"epoch": 4.383005404114293,
|
20565 |
+
"grad_norm": 3.965756893157959,
|
20566 |
+
"learning_rate": 0.00015805861747781417,
|
20567 |
+
"loss": 1.2322,
|
20568 |
+
"step": 146800
|
20569 |
+
},
|
20570 |
+
{
|
20571 |
+
"epoch": 4.384498253366375,
|
20572 |
+
"grad_norm": 5.187005519866943,
|
20573 |
+
"learning_rate": 0.00015804433168189533,
|
20574 |
+
"loss": 1.2183,
|
20575 |
+
"step": 146850
|
20576 |
+
},
|
20577 |
+
{
|
20578 |
+
"epoch": 4.3859911026184575,
|
20579 |
+
"grad_norm": 4.8554205894470215,
|
20580 |
+
"learning_rate": 0.0001580300458859765,
|
20581 |
+
"loss": 1.2169,
|
20582 |
+
"step": 146900
|
20583 |
+
},
|
20584 |
+
{
|
20585 |
+
"epoch": 4.38748395187054,
|
20586 |
+
"grad_norm": 5.468644618988037,
|
20587 |
+
"learning_rate": 0.00015801576009005766,
|
20588 |
+
"loss": 1.2101,
|
20589 |
+
"step": 146950
|
20590 |
+
},
|
20591 |
+
{
|
20592 |
+
"epoch": 4.388976801122623,
|
20593 |
+
"grad_norm": 3.7732479572296143,
|
20594 |
+
"learning_rate": 0.00015800147429413883,
|
20595 |
+
"loss": 1.226,
|
20596 |
+
"step": 147000
|
20597 |
+
},
|
20598 |
+
{
|
20599 |
+
"epoch": 4.390469650374705,
|
20600 |
+
"grad_norm": 4.265769958496094,
|
20601 |
+
"learning_rate": 0.00015798718849822,
|
20602 |
+
"loss": 1.1803,
|
20603 |
+
"step": 147050
|
20604 |
+
},
|
20605 |
+
{
|
20606 |
+
"epoch": 4.391962499626787,
|
20607 |
+
"grad_norm": 5.331263065338135,
|
20608 |
+
"learning_rate": 0.00015797290270230116,
|
20609 |
+
"loss": 1.1799,
|
20610 |
+
"step": 147100
|
20611 |
+
},
|
20612 |
+
{
|
20613 |
+
"epoch": 4.39345534887887,
|
20614 |
+
"grad_norm": 4.761427879333496,
|
20615 |
+
"learning_rate": 0.00015795861690638235,
|
20616 |
+
"loss": 1.2091,
|
20617 |
+
"step": 147150
|
20618 |
+
},
|
20619 |
+
{
|
20620 |
+
"epoch": 4.394948198130953,
|
20621 |
+
"grad_norm": 4.461631774902344,
|
20622 |
+
"learning_rate": 0.0001579443311104635,
|
20623 |
+
"loss": 1.2328,
|
20624 |
+
"step": 147200
|
20625 |
+
},
|
20626 |
+
{
|
20627 |
+
"epoch": 4.3964410473830355,
|
20628 |
+
"grad_norm": 4.383672714233398,
|
20629 |
+
"learning_rate": 0.00015793004531454468,
|
20630 |
+
"loss": 1.2279,
|
20631 |
+
"step": 147250
|
20632 |
+
},
|
20633 |
+
{
|
20634 |
+
"epoch": 4.397933896635118,
|
20635 |
+
"grad_norm": 6.641529560089111,
|
20636 |
+
"learning_rate": 0.00015791575951862582,
|
20637 |
+
"loss": 1.2231,
|
20638 |
+
"step": 147300
|
20639 |
+
},
|
20640 |
+
{
|
20641 |
+
"epoch": 4.3994267458872,
|
20642 |
+
"grad_norm": 3.944716215133667,
|
20643 |
+
"learning_rate": 0.000157901473722707,
|
20644 |
+
"loss": 1.2308,
|
20645 |
+
"step": 147350
|
20646 |
+
},
|
20647 |
+
{
|
20648 |
+
"epoch": 4.400919595139283,
|
20649 |
+
"grad_norm": 3.688462495803833,
|
20650 |
+
"learning_rate": 0.00015788718792678817,
|
20651 |
+
"loss": 1.252,
|
20652 |
+
"step": 147400
|
20653 |
+
},
|
20654 |
+
{
|
20655 |
+
"epoch": 4.4024124443913655,
|
20656 |
+
"grad_norm": 5.539252281188965,
|
20657 |
+
"learning_rate": 0.00015787290213086934,
|
20658 |
+
"loss": 1.2468,
|
20659 |
+
"step": 147450
|
20660 |
+
},
|
20661 |
+
{
|
20662 |
+
"epoch": 4.403905293643448,
|
20663 |
+
"grad_norm": 5.409310340881348,
|
20664 |
+
"learning_rate": 0.0001578586163349505,
|
20665 |
+
"loss": 1.2176,
|
20666 |
+
"step": 147500
|
20667 |
+
},
|
20668 |
+
{
|
20669 |
+
"epoch": 4.40539814289553,
|
20670 |
+
"grad_norm": 4.958510875701904,
|
20671 |
+
"learning_rate": 0.00015784433053903164,
|
20672 |
+
"loss": 1.2498,
|
20673 |
+
"step": 147550
|
20674 |
+
},
|
20675 |
+
{
|
20676 |
+
"epoch": 4.406890992147613,
|
20677 |
+
"grad_norm": 5.825056076049805,
|
20678 |
+
"learning_rate": 0.00015783004474311283,
|
20679 |
+
"loss": 1.2347,
|
20680 |
+
"step": 147600
|
20681 |
+
},
|
20682 |
+
{
|
20683 |
+
"epoch": 4.408383841399695,
|
20684 |
+
"grad_norm": 6.798030853271484,
|
20685 |
+
"learning_rate": 0.00015781575894719397,
|
20686 |
+
"loss": 1.2459,
|
20687 |
+
"step": 147650
|
20688 |
+
},
|
20689 |
+
{
|
20690 |
+
"epoch": 4.409876690651778,
|
20691 |
+
"grad_norm": 4.517495632171631,
|
20692 |
+
"learning_rate": 0.00015780147315127516,
|
20693 |
+
"loss": 1.2076,
|
20694 |
+
"step": 147700
|
20695 |
+
},
|
20696 |
+
{
|
20697 |
+
"epoch": 4.411369539903861,
|
20698 |
+
"grad_norm": 4.233908653259277,
|
20699 |
+
"learning_rate": 0.00015778718735535633,
|
20700 |
+
"loss": 1.1905,
|
20701 |
+
"step": 147750
|
20702 |
+
},
|
20703 |
+
{
|
20704 |
+
"epoch": 4.412862389155943,
|
20705 |
+
"grad_norm": 4.206421375274658,
|
20706 |
+
"learning_rate": 0.0001577729015594375,
|
20707 |
+
"loss": 1.2357,
|
20708 |
+
"step": 147800
|
20709 |
+
},
|
20710 |
+
{
|
20711 |
+
"epoch": 4.414355238408025,
|
20712 |
+
"grad_norm": 4.130222797393799,
|
20713 |
+
"learning_rate": 0.00015775861576351866,
|
20714 |
+
"loss": 1.201,
|
20715 |
+
"step": 147850
|
20716 |
+
},
|
20717 |
+
{
|
20718 |
+
"epoch": 4.415848087660108,
|
20719 |
+
"grad_norm": 3.8784375190734863,
|
20720 |
+
"learning_rate": 0.00015774432996759982,
|
20721 |
+
"loss": 1.2536,
|
20722 |
+
"step": 147900
|
20723 |
+
},
|
20724 |
+
{
|
20725 |
+
"epoch": 4.417340936912191,
|
20726 |
+
"grad_norm": 5.533395290374756,
|
20727 |
+
"learning_rate": 0.00015773004417168098,
|
20728 |
+
"loss": 1.208,
|
20729 |
+
"step": 147950
|
20730 |
+
},
|
20731 |
+
{
|
20732 |
+
"epoch": 4.4188337861642735,
|
20733 |
+
"grad_norm": 5.0397419929504395,
|
20734 |
+
"learning_rate": 0.00015771575837576215,
|
20735 |
+
"loss": 1.1951,
|
20736 |
+
"step": 148000
|
20737 |
+
},
|
20738 |
+
{
|
20739 |
+
"epoch": 4.420326635416355,
|
20740 |
+
"grad_norm": 4.333613872528076,
|
20741 |
+
"learning_rate": 0.00015770147257984331,
|
20742 |
+
"loss": 1.196,
|
20743 |
+
"step": 148050
|
20744 |
+
},
|
20745 |
+
{
|
20746 |
+
"epoch": 4.421819484668438,
|
20747 |
+
"grad_norm": 5.430739879608154,
|
20748 |
+
"learning_rate": 0.00015768718678392448,
|
20749 |
+
"loss": 1.1636,
|
20750 |
+
"step": 148100
|
20751 |
+
},
|
20752 |
+
{
|
20753 |
+
"epoch": 4.423312333920521,
|
20754 |
+
"grad_norm": 4.669544219970703,
|
20755 |
+
"learning_rate": 0.00015767290098800564,
|
20756 |
+
"loss": 1.2302,
|
20757 |
+
"step": 148150
|
20758 |
+
},
|
20759 |
+
{
|
20760 |
+
"epoch": 4.424805183172603,
|
20761 |
+
"grad_norm": 4.452166557312012,
|
20762 |
+
"learning_rate": 0.00015765861519208683,
|
20763 |
+
"loss": 1.1997,
|
20764 |
+
"step": 148200
|
20765 |
+
},
|
20766 |
+
{
|
20767 |
+
"epoch": 4.426298032424686,
|
20768 |
+
"grad_norm": 3.695939779281616,
|
20769 |
+
"learning_rate": 0.00015764432939616797,
|
20770 |
+
"loss": 1.2327,
|
20771 |
+
"step": 148250
|
20772 |
+
},
|
20773 |
+
{
|
20774 |
+
"epoch": 4.427790881676768,
|
20775 |
+
"grad_norm": 5.5830535888671875,
|
20776 |
+
"learning_rate": 0.00015763004360024916,
|
20777 |
+
"loss": 1.2306,
|
20778 |
+
"step": 148300
|
20779 |
+
},
|
20780 |
+
{
|
20781 |
+
"epoch": 4.429283730928851,
|
20782 |
+
"grad_norm": 3.978583335876465,
|
20783 |
+
"learning_rate": 0.0001576157578043303,
|
20784 |
+
"loss": 1.1639,
|
20785 |
+
"step": 148350
|
20786 |
+
},
|
20787 |
+
{
|
20788 |
+
"epoch": 4.430776580180933,
|
20789 |
+
"grad_norm": 3.8745334148406982,
|
20790 |
+
"learning_rate": 0.0001576014720084115,
|
20791 |
+
"loss": 1.22,
|
20792 |
+
"step": 148400
|
20793 |
+
},
|
20794 |
+
{
|
20795 |
+
"epoch": 4.432269429433016,
|
20796 |
+
"grad_norm": 4.7659711837768555,
|
20797 |
+
"learning_rate": 0.00015758718621249263,
|
20798 |
+
"loss": 1.1871,
|
20799 |
+
"step": 148450
|
20800 |
+
},
|
20801 |
+
{
|
20802 |
+
"epoch": 4.433762278685099,
|
20803 |
+
"grad_norm": 4.240323066711426,
|
20804 |
+
"learning_rate": 0.00015757290041657382,
|
20805 |
+
"loss": 1.1782,
|
20806 |
+
"step": 148500
|
20807 |
+
},
|
20808 |
+
{
|
20809 |
+
"epoch": 4.435255127937181,
|
20810 |
+
"grad_norm": 4.188791751861572,
|
20811 |
+
"learning_rate": 0.000157558614620655,
|
20812 |
+
"loss": 1.2258,
|
20813 |
+
"step": 148550
|
20814 |
+
},
|
20815 |
+
{
|
20816 |
+
"epoch": 4.436747977189263,
|
20817 |
+
"grad_norm": 4.706462860107422,
|
20818 |
+
"learning_rate": 0.00015754432882473615,
|
20819 |
+
"loss": 1.2095,
|
20820 |
+
"step": 148600
|
20821 |
+
},
|
20822 |
+
{
|
20823 |
+
"epoch": 4.438240826441346,
|
20824 |
+
"grad_norm": 3.878901958465576,
|
20825 |
+
"learning_rate": 0.00015753004302881732,
|
20826 |
+
"loss": 1.2249,
|
20827 |
+
"step": 148650
|
20828 |
+
},
|
20829 |
+
{
|
20830 |
+
"epoch": 4.439733675693429,
|
20831 |
+
"grad_norm": 3.91372013092041,
|
20832 |
+
"learning_rate": 0.00015751575723289848,
|
20833 |
+
"loss": 1.2382,
|
20834 |
+
"step": 148700
|
20835 |
+
},
|
20836 |
+
{
|
20837 |
+
"epoch": 4.441226524945511,
|
20838 |
+
"grad_norm": 3.9671475887298584,
|
20839 |
+
"learning_rate": 0.00015750147143697965,
|
20840 |
+
"loss": 1.2729,
|
20841 |
+
"step": 148750
|
20842 |
+
},
|
20843 |
+
{
|
20844 |
+
"epoch": 4.442719374197593,
|
20845 |
+
"grad_norm": 4.4688591957092285,
|
20846 |
+
"learning_rate": 0.0001574871856410608,
|
20847 |
+
"loss": 1.2032,
|
20848 |
+
"step": 148800
|
20849 |
+
},
|
20850 |
+
{
|
20851 |
+
"epoch": 4.444212223449676,
|
20852 |
+
"grad_norm": 4.55964469909668,
|
20853 |
+
"learning_rate": 0.00015747289984514198,
|
20854 |
+
"loss": 1.2129,
|
20855 |
+
"step": 148850
|
20856 |
+
},
|
20857 |
+
{
|
20858 |
+
"epoch": 4.445705072701759,
|
20859 |
+
"grad_norm": 5.789206027984619,
|
20860 |
+
"learning_rate": 0.00015745861404922314,
|
20861 |
+
"loss": 1.1947,
|
20862 |
+
"step": 148900
|
20863 |
+
},
|
20864 |
+
{
|
20865 |
+
"epoch": 4.447197921953841,
|
20866 |
+
"grad_norm": 4.169732570648193,
|
20867 |
+
"learning_rate": 0.0001574443282533043,
|
20868 |
+
"loss": 1.2135,
|
20869 |
+
"step": 148950
|
20870 |
+
},
|
20871 |
+
{
|
20872 |
+
"epoch": 4.448690771205923,
|
20873 |
+
"grad_norm": 4.305688858032227,
|
20874 |
+
"learning_rate": 0.0001574300424573855,
|
20875 |
+
"loss": 1.1931,
|
20876 |
+
"step": 149000
|
20877 |
+
},
|
20878 |
+
{
|
20879 |
+
"epoch": 4.450183620458006,
|
20880 |
+
"grad_norm": 5.780201435089111,
|
20881 |
+
"learning_rate": 0.00015741575666146663,
|
20882 |
+
"loss": 1.2308,
|
20883 |
+
"step": 149050
|
20884 |
+
},
|
20885 |
+
{
|
20886 |
+
"epoch": 4.451676469710089,
|
20887 |
+
"grad_norm": 5.42836332321167,
|
20888 |
+
"learning_rate": 0.00015740147086554783,
|
20889 |
+
"loss": 1.1898,
|
20890 |
+
"step": 149100
|
20891 |
+
},
|
20892 |
+
{
|
20893 |
+
"epoch": 4.453169318962171,
|
20894 |
+
"grad_norm": 4.967898845672607,
|
20895 |
+
"learning_rate": 0.00015738718506962896,
|
20896 |
+
"loss": 1.1961,
|
20897 |
+
"step": 149150
|
20898 |
+
},
|
20899 |
+
{
|
20900 |
+
"epoch": 4.454662168214254,
|
20901 |
+
"grad_norm": 4.566812038421631,
|
20902 |
+
"learning_rate": 0.00015737289927371015,
|
20903 |
+
"loss": 1.2633,
|
20904 |
+
"step": 149200
|
20905 |
+
},
|
20906 |
+
{
|
20907 |
+
"epoch": 4.456155017466337,
|
20908 |
+
"grad_norm": 4.523815155029297,
|
20909 |
+
"learning_rate": 0.0001573586134777913,
|
20910 |
+
"loss": 1.2104,
|
20911 |
+
"step": 149250
|
20912 |
+
},
|
20913 |
+
{
|
20914 |
+
"epoch": 4.4576478667184185,
|
20915 |
+
"grad_norm": 4.3855180740356445,
|
20916 |
+
"learning_rate": 0.00015734432768187248,
|
20917 |
+
"loss": 1.1904,
|
20918 |
+
"step": 149300
|
20919 |
+
},
|
20920 |
+
{
|
20921 |
+
"epoch": 4.459140715970501,
|
20922 |
+
"grad_norm": 4.116260051727295,
|
20923 |
+
"learning_rate": 0.00015733004188595365,
|
20924 |
+
"loss": 1.2162,
|
20925 |
+
"step": 149350
|
20926 |
+
},
|
20927 |
+
{
|
20928 |
+
"epoch": 4.460633565222584,
|
20929 |
+
"grad_norm": 4.964864253997803,
|
20930 |
+
"learning_rate": 0.0001573157560900348,
|
20931 |
+
"loss": 1.233,
|
20932 |
+
"step": 149400
|
20933 |
+
},
|
20934 |
+
{
|
20935 |
+
"epoch": 4.462126414474667,
|
20936 |
+
"grad_norm": 4.709635257720947,
|
20937 |
+
"learning_rate": 0.00015730147029411598,
|
20938 |
+
"loss": 1.2226,
|
20939 |
+
"step": 149450
|
20940 |
+
},
|
20941 |
+
{
|
20942 |
+
"epoch": 4.463619263726748,
|
20943 |
+
"grad_norm": 5.310390949249268,
|
20944 |
+
"learning_rate": 0.00015728718449819714,
|
20945 |
+
"loss": 1.1655,
|
20946 |
+
"step": 149500
|
20947 |
+
},
|
20948 |
+
{
|
20949 |
+
"epoch": 4.465112112978831,
|
20950 |
+
"grad_norm": 5.156966209411621,
|
20951 |
+
"learning_rate": 0.0001572728987022783,
|
20952 |
+
"loss": 1.1738,
|
20953 |
+
"step": 149550
|
20954 |
+
},
|
20955 |
+
{
|
20956 |
+
"epoch": 4.466604962230914,
|
20957 |
+
"grad_norm": 4.476001739501953,
|
20958 |
+
"learning_rate": 0.00015725861290635947,
|
20959 |
+
"loss": 1.188,
|
20960 |
+
"step": 149600
|
20961 |
+
},
|
20962 |
+
{
|
20963 |
+
"epoch": 4.468097811482997,
|
20964 |
+
"grad_norm": 4.519705295562744,
|
20965 |
+
"learning_rate": 0.00015724432711044064,
|
20966 |
+
"loss": 1.1512,
|
20967 |
+
"step": 149650
|
20968 |
+
},
|
20969 |
+
{
|
20970 |
+
"epoch": 4.469590660735079,
|
20971 |
+
"grad_norm": 5.426435470581055,
|
20972 |
+
"learning_rate": 0.0001572300413145218,
|
20973 |
+
"loss": 1.2137,
|
20974 |
+
"step": 149700
|
20975 |
+
},
|
20976 |
+
{
|
20977 |
+
"epoch": 4.471083509987161,
|
20978 |
+
"grad_norm": 3.799715757369995,
|
20979 |
+
"learning_rate": 0.00015721575551860297,
|
20980 |
+
"loss": 1.2298,
|
20981 |
+
"step": 149750
|
20982 |
+
},
|
20983 |
+
{
|
20984 |
+
"epoch": 4.472576359239244,
|
20985 |
+
"grad_norm": 3.640909194946289,
|
20986 |
+
"learning_rate": 0.00015720146972268416,
|
20987 |
+
"loss": 1.1837,
|
20988 |
+
"step": 149800
|
20989 |
+
},
|
20990 |
+
{
|
20991 |
+
"epoch": 4.4740692084913265,
|
20992 |
+
"grad_norm": 3.8437955379486084,
|
20993 |
+
"learning_rate": 0.0001571871839267653,
|
20994 |
+
"loss": 1.2025,
|
20995 |
+
"step": 149850
|
20996 |
+
},
|
20997 |
+
{
|
20998 |
+
"epoch": 4.475562057743409,
|
20999 |
+
"grad_norm": 3.9769036769866943,
|
21000 |
+
"learning_rate": 0.0001571728981308465,
|
21001 |
+
"loss": 1.1918,
|
21002 |
+
"step": 149900
|
21003 |
+
},
|
21004 |
+
{
|
21005 |
+
"epoch": 4.477054906995492,
|
21006 |
+
"grad_norm": 4.485401630401611,
|
21007 |
+
"learning_rate": 0.00015715861233492762,
|
21008 |
+
"loss": 1.1588,
|
21009 |
+
"step": 149950
|
21010 |
+
},
|
21011 |
+
{
|
21012 |
+
"epoch": 4.478547756247574,
|
21013 |
+
"grad_norm": 4.401375770568848,
|
21014 |
+
"learning_rate": 0.00015714432653900882,
|
21015 |
+
"loss": 1.2089,
|
21016 |
+
"step": 150000
|
21017 |
}
|
21018 |
],
|
21019 |
"logging_steps": 50,
|
|
|
21033 |
"attributes": {}
|
21034 |
}
|
21035 |
},
|
21036 |
+
"total_flos": 3.790073141417476e+18,
|
21037 |
"train_batch_size": 2,
|
21038 |
"trial_name": null,
|
21039 |
"trial_params": null
|