Training in progress, step 180000, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step180000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step180000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/trainer_state.json +1403 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 42002584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f0094ab9d915902b4990fc82d24f0fb8705d8c0471888307def672b6974759e
|
3 |
size 42002584
|
last-checkpoint/global_step180000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d7145c4cc4ad2c0b5237e0098e626e4bb431ec97ecddd9945381a495bfa8475
|
3 |
+
size 251710672
|
last-checkpoint/global_step180000/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a5d3cb88c9ca21665b4d6e6f747dc4e2efadfeef482ce91d09e04f5bb502f9f
|
3 |
+
size 153747385
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step180000
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d46e8f677db1f75dc63cd8cf94001df57ebe470c748bd8c570515fe8fe189eeb
|
3 |
size 14244
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 5.
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -23814,6 +23814,1406 @@
|
|
23814 |
"learning_rate": 0.00015143000817147527,
|
23815 |
"loss": 1.1233,
|
23816 |
"step": 170000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23817 |
}
|
23818 |
],
|
23819 |
"logging_steps": 50,
|
@@ -23833,7 +25233,7 @@
|
|
23833 |
"attributes": {}
|
23834 |
}
|
23835 |
},
|
23836 |
-
"total_flos": 4.
|
23837 |
"train_batch_size": 2,
|
23838 |
"trial_name": null,
|
23839 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 5.374257307497089,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 180000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
23814 |
"learning_rate": 0.00015143000817147527,
|
23815 |
"loss": 1.1233,
|
23816 |
"step": 170000
|
23817 |
+
},
|
23818 |
+
{
|
23819 |
+
"epoch": 5.077180306332666,
|
23820 |
+
"grad_norm": 4.841482639312744,
|
23821 |
+
"learning_rate": 0.00015141572237555644,
|
23822 |
+
"loss": 1.1201,
|
23823 |
+
"step": 170050
|
23824 |
+
},
|
23825 |
+
{
|
23826 |
+
"epoch": 5.078673155584749,
|
23827 |
+
"grad_norm": 5.517151832580566,
|
23828 |
+
"learning_rate": 0.0001514014365796376,
|
23829 |
+
"loss": 1.0986,
|
23830 |
+
"step": 170100
|
23831 |
+
},
|
23832 |
+
{
|
23833 |
+
"epoch": 5.080166004836832,
|
23834 |
+
"grad_norm": 4.323915004730225,
|
23835 |
+
"learning_rate": 0.00015138715078371877,
|
23836 |
+
"loss": 1.1362,
|
23837 |
+
"step": 170150
|
23838 |
+
},
|
23839 |
+
{
|
23840 |
+
"epoch": 5.081658854088914,
|
23841 |
+
"grad_norm": 4.015235900878906,
|
23842 |
+
"learning_rate": 0.00015137286498779993,
|
23843 |
+
"loss": 1.1571,
|
23844 |
+
"step": 170200
|
23845 |
+
},
|
23846 |
+
{
|
23847 |
+
"epoch": 5.083151703340997,
|
23848 |
+
"grad_norm": 5.928144454956055,
|
23849 |
+
"learning_rate": 0.0001513585791918811,
|
23850 |
+
"loss": 1.1672,
|
23851 |
+
"step": 170250
|
23852 |
+
},
|
23853 |
+
{
|
23854 |
+
"epoch": 5.084644552593079,
|
23855 |
+
"grad_norm": 4.04873514175415,
|
23856 |
+
"learning_rate": 0.0001513442933959623,
|
23857 |
+
"loss": 1.1028,
|
23858 |
+
"step": 170300
|
23859 |
+
},
|
23860 |
+
{
|
23861 |
+
"epoch": 5.0861374018451615,
|
23862 |
+
"grad_norm": 5.569055557250977,
|
23863 |
+
"learning_rate": 0.00015133000760004343,
|
23864 |
+
"loss": 1.1479,
|
23865 |
+
"step": 170350
|
23866 |
+
},
|
23867 |
+
{
|
23868 |
+
"epoch": 5.087630251097244,
|
23869 |
+
"grad_norm": 5.128480911254883,
|
23870 |
+
"learning_rate": 0.00015131572180412462,
|
23871 |
+
"loss": 1.1567,
|
23872 |
+
"step": 170400
|
23873 |
+
},
|
23874 |
+
{
|
23875 |
+
"epoch": 5.089123100349327,
|
23876 |
+
"grad_norm": 5.323390007019043,
|
23877 |
+
"learning_rate": 0.00015130143600820576,
|
23878 |
+
"loss": 1.1072,
|
23879 |
+
"step": 170450
|
23880 |
+
},
|
23881 |
+
{
|
23882 |
+
"epoch": 5.09061594960141,
|
23883 |
+
"grad_norm": 3.7644312381744385,
|
23884 |
+
"learning_rate": 0.00015128715021228695,
|
23885 |
+
"loss": 1.1435,
|
23886 |
+
"step": 170500
|
23887 |
+
},
|
23888 |
+
{
|
23889 |
+
"epoch": 5.092108798853491,
|
23890 |
+
"grad_norm": 4.050746917724609,
|
23891 |
+
"learning_rate": 0.00015127286441636809,
|
23892 |
+
"loss": 1.2082,
|
23893 |
+
"step": 170550
|
23894 |
+
},
|
23895 |
+
{
|
23896 |
+
"epoch": 5.093601648105574,
|
23897 |
+
"grad_norm": 6.779710292816162,
|
23898 |
+
"learning_rate": 0.00015125857862044928,
|
23899 |
+
"loss": 1.1314,
|
23900 |
+
"step": 170600
|
23901 |
+
},
|
23902 |
+
{
|
23903 |
+
"epoch": 5.095094497357657,
|
23904 |
+
"grad_norm": 4.560274124145508,
|
23905 |
+
"learning_rate": 0.00015124429282453044,
|
23906 |
+
"loss": 1.2157,
|
23907 |
+
"step": 170650
|
23908 |
+
},
|
23909 |
+
{
|
23910 |
+
"epoch": 5.0965873466097396,
|
23911 |
+
"grad_norm": 5.227869987487793,
|
23912 |
+
"learning_rate": 0.0001512300070286116,
|
23913 |
+
"loss": 1.187,
|
23914 |
+
"step": 170700
|
23915 |
+
},
|
23916 |
+
{
|
23917 |
+
"epoch": 5.098080195861822,
|
23918 |
+
"grad_norm": 5.666630744934082,
|
23919 |
+
"learning_rate": 0.00015121572123269277,
|
23920 |
+
"loss": 1.2005,
|
23921 |
+
"step": 170750
|
23922 |
+
},
|
23923 |
+
{
|
23924 |
+
"epoch": 5.099573045113904,
|
23925 |
+
"grad_norm": 4.06814432144165,
|
23926 |
+
"learning_rate": 0.00015120143543677394,
|
23927 |
+
"loss": 1.1351,
|
23928 |
+
"step": 170800
|
23929 |
+
},
|
23930 |
+
{
|
23931 |
+
"epoch": 5.101065894365987,
|
23932 |
+
"grad_norm": 5.10957145690918,
|
23933 |
+
"learning_rate": 0.0001511871496408551,
|
23934 |
+
"loss": 1.193,
|
23935 |
+
"step": 170850
|
23936 |
+
},
|
23937 |
+
{
|
23938 |
+
"epoch": 5.1025587436180695,
|
23939 |
+
"grad_norm": 5.230775833129883,
|
23940 |
+
"learning_rate": 0.00015117286384493627,
|
23941 |
+
"loss": 1.2186,
|
23942 |
+
"step": 170900
|
23943 |
+
},
|
23944 |
+
{
|
23945 |
+
"epoch": 5.104051592870152,
|
23946 |
+
"grad_norm": 4.77035665512085,
|
23947 |
+
"learning_rate": 0.00015115857804901743,
|
23948 |
+
"loss": 1.165,
|
23949 |
+
"step": 170950
|
23950 |
+
},
|
23951 |
+
{
|
23952 |
+
"epoch": 5.105544442122235,
|
23953 |
+
"grad_norm": 3.762669801712036,
|
23954 |
+
"learning_rate": 0.0001511442922530986,
|
23955 |
+
"loss": 1.0899,
|
23956 |
+
"step": 171000
|
23957 |
+
},
|
23958 |
+
{
|
23959 |
+
"epoch": 5.107037291374317,
|
23960 |
+
"grad_norm": 4.551312446594238,
|
23961 |
+
"learning_rate": 0.00015113000645717976,
|
23962 |
+
"loss": 1.1306,
|
23963 |
+
"step": 171050
|
23964 |
+
},
|
23965 |
+
{
|
23966 |
+
"epoch": 5.108530140626399,
|
23967 |
+
"grad_norm": 3.9182794094085693,
|
23968 |
+
"learning_rate": 0.00015111572066126092,
|
23969 |
+
"loss": 1.1365,
|
23970 |
+
"step": 171100
|
23971 |
+
},
|
23972 |
+
{
|
23973 |
+
"epoch": 5.110022989878482,
|
23974 |
+
"grad_norm": 4.611422538757324,
|
23975 |
+
"learning_rate": 0.0001511014348653421,
|
23976 |
+
"loss": 1.2169,
|
23977 |
+
"step": 171150
|
23978 |
+
},
|
23979 |
+
{
|
23980 |
+
"epoch": 5.111515839130565,
|
23981 |
+
"grad_norm": 4.5237321853637695,
|
23982 |
+
"learning_rate": 0.00015108714906942325,
|
23983 |
+
"loss": 1.1373,
|
23984 |
+
"step": 171200
|
23985 |
+
},
|
23986 |
+
{
|
23987 |
+
"epoch": 5.1130086883826475,
|
23988 |
+
"grad_norm": 4.893795967102051,
|
23989 |
+
"learning_rate": 0.00015107286327350442,
|
23990 |
+
"loss": 1.1307,
|
23991 |
+
"step": 171250
|
23992 |
+
},
|
23993 |
+
{
|
23994 |
+
"epoch": 5.114501537634729,
|
23995 |
+
"grad_norm": 4.26010799407959,
|
23996 |
+
"learning_rate": 0.00015105857747758558,
|
23997 |
+
"loss": 1.1461,
|
23998 |
+
"step": 171300
|
23999 |
+
},
|
24000 |
+
{
|
24001 |
+
"epoch": 5.115994386886812,
|
24002 |
+
"grad_norm": 5.365528583526611,
|
24003 |
+
"learning_rate": 0.00015104429168166675,
|
24004 |
+
"loss": 1.1156,
|
24005 |
+
"step": 171350
|
24006 |
+
},
|
24007 |
+
{
|
24008 |
+
"epoch": 5.117487236138895,
|
24009 |
+
"grad_norm": 3.8632538318634033,
|
24010 |
+
"learning_rate": 0.0001510300058857479,
|
24011 |
+
"loss": 1.1354,
|
24012 |
+
"step": 171400
|
24013 |
+
},
|
24014 |
+
{
|
24015 |
+
"epoch": 5.1189800853909775,
|
24016 |
+
"grad_norm": 5.570279598236084,
|
24017 |
+
"learning_rate": 0.0001510157200898291,
|
24018 |
+
"loss": 1.1758,
|
24019 |
+
"step": 171450
|
24020 |
+
},
|
24021 |
+
{
|
24022 |
+
"epoch": 5.120472934643059,
|
24023 |
+
"grad_norm": 4.220273017883301,
|
24024 |
+
"learning_rate": 0.00015100143429391024,
|
24025 |
+
"loss": 1.149,
|
24026 |
+
"step": 171500
|
24027 |
+
},
|
24028 |
+
{
|
24029 |
+
"epoch": 5.121965783895142,
|
24030 |
+
"grad_norm": 5.220869541168213,
|
24031 |
+
"learning_rate": 0.00015098714849799143,
|
24032 |
+
"loss": 1.208,
|
24033 |
+
"step": 171550
|
24034 |
+
},
|
24035 |
+
{
|
24036 |
+
"epoch": 5.123458633147225,
|
24037 |
+
"grad_norm": 3.9469594955444336,
|
24038 |
+
"learning_rate": 0.00015097286270207257,
|
24039 |
+
"loss": 1.1431,
|
24040 |
+
"step": 171600
|
24041 |
+
},
|
24042 |
+
{
|
24043 |
+
"epoch": 5.124951482399307,
|
24044 |
+
"grad_norm": 3.932490587234497,
|
24045 |
+
"learning_rate": 0.00015095857690615376,
|
24046 |
+
"loss": 1.1621,
|
24047 |
+
"step": 171650
|
24048 |
+
},
|
24049 |
+
{
|
24050 |
+
"epoch": 5.12644433165139,
|
24051 |
+
"grad_norm": 5.017124176025391,
|
24052 |
+
"learning_rate": 0.00015094429111023493,
|
24053 |
+
"loss": 1.1341,
|
24054 |
+
"step": 171700
|
24055 |
+
},
|
24056 |
+
{
|
24057 |
+
"epoch": 5.127937180903472,
|
24058 |
+
"grad_norm": 5.0581488609313965,
|
24059 |
+
"learning_rate": 0.0001509300053143161,
|
24060 |
+
"loss": 1.1756,
|
24061 |
+
"step": 171750
|
24062 |
+
},
|
24063 |
+
{
|
24064 |
+
"epoch": 5.129430030155555,
|
24065 |
+
"grad_norm": 5.208496570587158,
|
24066 |
+
"learning_rate": 0.00015091571951839726,
|
24067 |
+
"loss": 1.1226,
|
24068 |
+
"step": 171800
|
24069 |
+
},
|
24070 |
+
{
|
24071 |
+
"epoch": 5.130922879407637,
|
24072 |
+
"grad_norm": 3.9350638389587402,
|
24073 |
+
"learning_rate": 0.00015090143372247842,
|
24074 |
+
"loss": 1.1443,
|
24075 |
+
"step": 171850
|
24076 |
+
},
|
24077 |
+
{
|
24078 |
+
"epoch": 5.13241572865972,
|
24079 |
+
"grad_norm": 3.7098276615142822,
|
24080 |
+
"learning_rate": 0.00015088714792655959,
|
24081 |
+
"loss": 1.1346,
|
24082 |
+
"step": 171900
|
24083 |
+
},
|
24084 |
+
{
|
24085 |
+
"epoch": 5.133908577911803,
|
24086 |
+
"grad_norm": 4.124345779418945,
|
24087 |
+
"learning_rate": 0.00015087286213064075,
|
24088 |
+
"loss": 1.1955,
|
24089 |
+
"step": 171950
|
24090 |
+
},
|
24091 |
+
{
|
24092 |
+
"epoch": 5.135401427163885,
|
24093 |
+
"grad_norm": 3.5282959938049316,
|
24094 |
+
"learning_rate": 0.00015085857633472192,
|
24095 |
+
"loss": 1.1457,
|
24096 |
+
"step": 172000
|
24097 |
+
},
|
24098 |
+
{
|
24099 |
+
"epoch": 5.136894276415967,
|
24100 |
+
"grad_norm": 4.17992639541626,
|
24101 |
+
"learning_rate": 0.00015084429053880308,
|
24102 |
+
"loss": 1.161,
|
24103 |
+
"step": 172050
|
24104 |
+
},
|
24105 |
+
{
|
24106 |
+
"epoch": 5.13838712566805,
|
24107 |
+
"grad_norm": 3.838498830795288,
|
24108 |
+
"learning_rate": 0.00015083000474288424,
|
24109 |
+
"loss": 1.1381,
|
24110 |
+
"step": 172100
|
24111 |
+
},
|
24112 |
+
{
|
24113 |
+
"epoch": 5.139879974920133,
|
24114 |
+
"grad_norm": 5.275324821472168,
|
24115 |
+
"learning_rate": 0.00015081571894696544,
|
24116 |
+
"loss": 1.0803,
|
24117 |
+
"step": 172150
|
24118 |
+
},
|
24119 |
+
{
|
24120 |
+
"epoch": 5.141372824172215,
|
24121 |
+
"grad_norm": 4.769461154937744,
|
24122 |
+
"learning_rate": 0.00015080143315104657,
|
24123 |
+
"loss": 1.1494,
|
24124 |
+
"step": 172200
|
24125 |
+
},
|
24126 |
+
{
|
24127 |
+
"epoch": 5.142865673424297,
|
24128 |
+
"grad_norm": 4.681296348571777,
|
24129 |
+
"learning_rate": 0.00015078714735512777,
|
24130 |
+
"loss": 1.1726,
|
24131 |
+
"step": 172250
|
24132 |
+
},
|
24133 |
+
{
|
24134 |
+
"epoch": 5.14435852267638,
|
24135 |
+
"grad_norm": 4.308079719543457,
|
24136 |
+
"learning_rate": 0.0001507728615592089,
|
24137 |
+
"loss": 1.1331,
|
24138 |
+
"step": 172300
|
24139 |
+
},
|
24140 |
+
{
|
24141 |
+
"epoch": 5.145851371928463,
|
24142 |
+
"grad_norm": 4.144331455230713,
|
24143 |
+
"learning_rate": 0.0001507585757632901,
|
24144 |
+
"loss": 1.1579,
|
24145 |
+
"step": 172350
|
24146 |
+
},
|
24147 |
+
{
|
24148 |
+
"epoch": 5.147344221180545,
|
24149 |
+
"grad_norm": 4.138561725616455,
|
24150 |
+
"learning_rate": 0.00015074428996737123,
|
24151 |
+
"loss": 1.1462,
|
24152 |
+
"step": 172400
|
24153 |
+
},
|
24154 |
+
{
|
24155 |
+
"epoch": 5.148837070432628,
|
24156 |
+
"grad_norm": 4.038719177246094,
|
24157 |
+
"learning_rate": 0.00015073000417145242,
|
24158 |
+
"loss": 1.1604,
|
24159 |
+
"step": 172450
|
24160 |
+
},
|
24161 |
+
{
|
24162 |
+
"epoch": 5.15032991968471,
|
24163 |
+
"grad_norm": 3.903709888458252,
|
24164 |
+
"learning_rate": 0.0001507157183755336,
|
24165 |
+
"loss": 1.1504,
|
24166 |
+
"step": 172500
|
24167 |
+
},
|
24168 |
+
{
|
24169 |
+
"epoch": 5.151822768936793,
|
24170 |
+
"grad_norm": 5.332316875457764,
|
24171 |
+
"learning_rate": 0.00015070143257961475,
|
24172 |
+
"loss": 1.1616,
|
24173 |
+
"step": 172550
|
24174 |
+
},
|
24175 |
+
{
|
24176 |
+
"epoch": 5.153315618188875,
|
24177 |
+
"grad_norm": 4.094432353973389,
|
24178 |
+
"learning_rate": 0.00015068714678369592,
|
24179 |
+
"loss": 1.1771,
|
24180 |
+
"step": 172600
|
24181 |
+
},
|
24182 |
+
{
|
24183 |
+
"epoch": 5.154808467440958,
|
24184 |
+
"grad_norm": 4.575977325439453,
|
24185 |
+
"learning_rate": 0.00015067286098777708,
|
24186 |
+
"loss": 1.1587,
|
24187 |
+
"step": 172650
|
24188 |
+
},
|
24189 |
+
{
|
24190 |
+
"epoch": 5.156301316693041,
|
24191 |
+
"grad_norm": 4.0832414627075195,
|
24192 |
+
"learning_rate": 0.00015065857519185825,
|
24193 |
+
"loss": 1.1432,
|
24194 |
+
"step": 172700
|
24195 |
+
},
|
24196 |
+
{
|
24197 |
+
"epoch": 5.1577941659451225,
|
24198 |
+
"grad_norm": 3.969329595565796,
|
24199 |
+
"learning_rate": 0.0001506442893959394,
|
24200 |
+
"loss": 1.1735,
|
24201 |
+
"step": 172750
|
24202 |
+
},
|
24203 |
+
{
|
24204 |
+
"epoch": 5.159287015197205,
|
24205 |
+
"grad_norm": 4.063047885894775,
|
24206 |
+
"learning_rate": 0.00015063000360002058,
|
24207 |
+
"loss": 1.1071,
|
24208 |
+
"step": 172800
|
24209 |
+
},
|
24210 |
+
{
|
24211 |
+
"epoch": 5.160779864449288,
|
24212 |
+
"grad_norm": 4.360136985778809,
|
24213 |
+
"learning_rate": 0.00015061571780410174,
|
24214 |
+
"loss": 1.154,
|
24215 |
+
"step": 172850
|
24216 |
+
},
|
24217 |
+
{
|
24218 |
+
"epoch": 5.162272713701371,
|
24219 |
+
"grad_norm": 3.850008010864258,
|
24220 |
+
"learning_rate": 0.0001506014320081829,
|
24221 |
+
"loss": 1.2022,
|
24222 |
+
"step": 172900
|
24223 |
+
},
|
24224 |
+
{
|
24225 |
+
"epoch": 5.163765562953453,
|
24226 |
+
"grad_norm": 4.514410972595215,
|
24227 |
+
"learning_rate": 0.0001505871462122641,
|
24228 |
+
"loss": 1.1265,
|
24229 |
+
"step": 172950
|
24230 |
+
},
|
24231 |
+
{
|
24232 |
+
"epoch": 5.165258412205535,
|
24233 |
+
"grad_norm": 4.641385555267334,
|
24234 |
+
"learning_rate": 0.00015057286041634524,
|
24235 |
+
"loss": 1.1373,
|
24236 |
+
"step": 173000
|
24237 |
+
},
|
24238 |
+
{
|
24239 |
+
"epoch": 5.166751261457618,
|
24240 |
+
"grad_norm": 4.439742088317871,
|
24241 |
+
"learning_rate": 0.00015055857462042643,
|
24242 |
+
"loss": 1.1792,
|
24243 |
+
"step": 173050
|
24244 |
+
},
|
24245 |
+
{
|
24246 |
+
"epoch": 5.168244110709701,
|
24247 |
+
"grad_norm": 4.819581985473633,
|
24248 |
+
"learning_rate": 0.00015054428882450756,
|
24249 |
+
"loss": 1.1055,
|
24250 |
+
"step": 173100
|
24251 |
+
},
|
24252 |
+
{
|
24253 |
+
"epoch": 5.169736959961783,
|
24254 |
+
"grad_norm": 4.719875335693359,
|
24255 |
+
"learning_rate": 0.00015053000302858876,
|
24256 |
+
"loss": 1.1097,
|
24257 |
+
"step": 173150
|
24258 |
+
},
|
24259 |
+
{
|
24260 |
+
"epoch": 5.171229809213866,
|
24261 |
+
"grad_norm": 3.7248473167419434,
|
24262 |
+
"learning_rate": 0.0001505157172326699,
|
24263 |
+
"loss": 1.1809,
|
24264 |
+
"step": 173200
|
24265 |
+
},
|
24266 |
+
{
|
24267 |
+
"epoch": 5.172722658465948,
|
24268 |
+
"grad_norm": 4.468180179595947,
|
24269 |
+
"learning_rate": 0.00015050143143675109,
|
24270 |
+
"loss": 1.1218,
|
24271 |
+
"step": 173250
|
24272 |
+
},
|
24273 |
+
{
|
24274 |
+
"epoch": 5.1742155077180305,
|
24275 |
+
"grad_norm": 5.018815994262695,
|
24276 |
+
"learning_rate": 0.00015048714564083225,
|
24277 |
+
"loss": 1.1562,
|
24278 |
+
"step": 173300
|
24279 |
+
},
|
24280 |
+
{
|
24281 |
+
"epoch": 5.175708356970113,
|
24282 |
+
"grad_norm": 5.088767051696777,
|
24283 |
+
"learning_rate": 0.00015047285984491342,
|
24284 |
+
"loss": 1.0996,
|
24285 |
+
"step": 173350
|
24286 |
+
},
|
24287 |
+
{
|
24288 |
+
"epoch": 5.177201206222196,
|
24289 |
+
"grad_norm": 5.453111171722412,
|
24290 |
+
"learning_rate": 0.00015045857404899458,
|
24291 |
+
"loss": 1.2058,
|
24292 |
+
"step": 173400
|
24293 |
+
},
|
24294 |
+
{
|
24295 |
+
"epoch": 5.178694055474279,
|
24296 |
+
"grad_norm": 5.479541778564453,
|
24297 |
+
"learning_rate": 0.00015044428825307574,
|
24298 |
+
"loss": 1.1409,
|
24299 |
+
"step": 173450
|
24300 |
+
},
|
24301 |
+
{
|
24302 |
+
"epoch": 5.1801869047263605,
|
24303 |
+
"grad_norm": 5.977447509765625,
|
24304 |
+
"learning_rate": 0.0001504300024571569,
|
24305 |
+
"loss": 1.1197,
|
24306 |
+
"step": 173500
|
24307 |
+
},
|
24308 |
+
{
|
24309 |
+
"epoch": 5.181679753978443,
|
24310 |
+
"grad_norm": 5.65791130065918,
|
24311 |
+
"learning_rate": 0.00015041571666123807,
|
24312 |
+
"loss": 1.1486,
|
24313 |
+
"step": 173550
|
24314 |
+
},
|
24315 |
+
{
|
24316 |
+
"epoch": 5.183172603230526,
|
24317 |
+
"grad_norm": 6.653776168823242,
|
24318 |
+
"learning_rate": 0.00015040143086531924,
|
24319 |
+
"loss": 1.1734,
|
24320 |
+
"step": 173600
|
24321 |
+
},
|
24322 |
+
{
|
24323 |
+
"epoch": 5.184665452482609,
|
24324 |
+
"grad_norm": 4.445573806762695,
|
24325 |
+
"learning_rate": 0.0001503871450694004,
|
24326 |
+
"loss": 1.1776,
|
24327 |
+
"step": 173650
|
24328 |
+
},
|
24329 |
+
{
|
24330 |
+
"epoch": 5.186158301734691,
|
24331 |
+
"grad_norm": 4.370992183685303,
|
24332 |
+
"learning_rate": 0.00015037285927348157,
|
24333 |
+
"loss": 1.1274,
|
24334 |
+
"step": 173700
|
24335 |
+
},
|
24336 |
+
{
|
24337 |
+
"epoch": 5.187651150986773,
|
24338 |
+
"grad_norm": 4.479329586029053,
|
24339 |
+
"learning_rate": 0.00015035857347756273,
|
24340 |
+
"loss": 1.1387,
|
24341 |
+
"step": 173750
|
24342 |
+
},
|
24343 |
+
{
|
24344 |
+
"epoch": 5.189144000238856,
|
24345 |
+
"grad_norm": 4.528141498565674,
|
24346 |
+
"learning_rate": 0.0001503442876816439,
|
24347 |
+
"loss": 1.1586,
|
24348 |
+
"step": 173800
|
24349 |
+
},
|
24350 |
+
{
|
24351 |
+
"epoch": 5.1906368494909385,
|
24352 |
+
"grad_norm": 4.396657943725586,
|
24353 |
+
"learning_rate": 0.00015033000188572506,
|
24354 |
+
"loss": 1.1918,
|
24355 |
+
"step": 173850
|
24356 |
+
},
|
24357 |
+
{
|
24358 |
+
"epoch": 5.192129698743021,
|
24359 |
+
"grad_norm": 4.700211048126221,
|
24360 |
+
"learning_rate": 0.00015031571608980623,
|
24361 |
+
"loss": 1.144,
|
24362 |
+
"step": 173900
|
24363 |
+
},
|
24364 |
+
{
|
24365 |
+
"epoch": 5.193622547995103,
|
24366 |
+
"grad_norm": 3.748830795288086,
|
24367 |
+
"learning_rate": 0.0001503014302938874,
|
24368 |
+
"loss": 1.1584,
|
24369 |
+
"step": 173950
|
24370 |
+
},
|
24371 |
+
{
|
24372 |
+
"epoch": 5.195115397247186,
|
24373 |
+
"grad_norm": 4.841403484344482,
|
24374 |
+
"learning_rate": 0.00015028714449796856,
|
24375 |
+
"loss": 1.1419,
|
24376 |
+
"step": 174000
|
24377 |
+
},
|
24378 |
+
{
|
24379 |
+
"epoch": 5.1966082464992684,
|
24380 |
+
"grad_norm": 5.811513423919678,
|
24381 |
+
"learning_rate": 0.00015027285870204972,
|
24382 |
+
"loss": 1.1155,
|
24383 |
+
"step": 174050
|
24384 |
+
},
|
24385 |
+
{
|
24386 |
+
"epoch": 5.198101095751351,
|
24387 |
+
"grad_norm": 4.072244167327881,
|
24388 |
+
"learning_rate": 0.0001502585729061309,
|
24389 |
+
"loss": 1.0956,
|
24390 |
+
"step": 174100
|
24391 |
+
},
|
24392 |
+
{
|
24393 |
+
"epoch": 5.199593945003434,
|
24394 |
+
"grad_norm": 4.929732799530029,
|
24395 |
+
"learning_rate": 0.00015024428711021205,
|
24396 |
+
"loss": 1.1761,
|
24397 |
+
"step": 174150
|
24398 |
+
},
|
24399 |
+
{
|
24400 |
+
"epoch": 5.201086794255516,
|
24401 |
+
"grad_norm": 6.669888019561768,
|
24402 |
+
"learning_rate": 0.00015023000131429324,
|
24403 |
+
"loss": 1.1291,
|
24404 |
+
"step": 174200
|
24405 |
+
},
|
24406 |
+
{
|
24407 |
+
"epoch": 5.202579643507598,
|
24408 |
+
"grad_norm": 4.264486789703369,
|
24409 |
+
"learning_rate": 0.00015021571551837438,
|
24410 |
+
"loss": 1.1596,
|
24411 |
+
"step": 174250
|
24412 |
+
},
|
24413 |
+
{
|
24414 |
+
"epoch": 5.204072492759681,
|
24415 |
+
"grad_norm": 4.881564140319824,
|
24416 |
+
"learning_rate": 0.00015020142972245557,
|
24417 |
+
"loss": 1.1884,
|
24418 |
+
"step": 174300
|
24419 |
+
},
|
24420 |
+
{
|
24421 |
+
"epoch": 5.205565342011764,
|
24422 |
+
"grad_norm": 5.406867504119873,
|
24423 |
+
"learning_rate": 0.00015018714392653674,
|
24424 |
+
"loss": 1.1987,
|
24425 |
+
"step": 174350
|
24426 |
+
},
|
24427 |
+
{
|
24428 |
+
"epoch": 5.2070581912638465,
|
24429 |
+
"grad_norm": 5.110748767852783,
|
24430 |
+
"learning_rate": 0.0001501728581306179,
|
24431 |
+
"loss": 1.1758,
|
24432 |
+
"step": 174400
|
24433 |
+
},
|
24434 |
+
{
|
24435 |
+
"epoch": 5.208551040515928,
|
24436 |
+
"grad_norm": 4.510517120361328,
|
24437 |
+
"learning_rate": 0.00015015857233469906,
|
24438 |
+
"loss": 1.1746,
|
24439 |
+
"step": 174450
|
24440 |
+
},
|
24441 |
+
{
|
24442 |
+
"epoch": 5.210043889768011,
|
24443 |
+
"grad_norm": 4.028598308563232,
|
24444 |
+
"learning_rate": 0.00015014428653878023,
|
24445 |
+
"loss": 1.1727,
|
24446 |
+
"step": 174500
|
24447 |
+
},
|
24448 |
+
{
|
24449 |
+
"epoch": 5.211536739020094,
|
24450 |
+
"grad_norm": 5.600837707519531,
|
24451 |
+
"learning_rate": 0.0001501300007428614,
|
24452 |
+
"loss": 1.2241,
|
24453 |
+
"step": 174550
|
24454 |
+
},
|
24455 |
+
{
|
24456 |
+
"epoch": 5.213029588272176,
|
24457 |
+
"grad_norm": 5.198084831237793,
|
24458 |
+
"learning_rate": 0.00015011571494694256,
|
24459 |
+
"loss": 1.1599,
|
24460 |
+
"step": 174600
|
24461 |
+
},
|
24462 |
+
{
|
24463 |
+
"epoch": 5.214522437524259,
|
24464 |
+
"grad_norm": 4.601158142089844,
|
24465 |
+
"learning_rate": 0.00015010142915102372,
|
24466 |
+
"loss": 1.1764,
|
24467 |
+
"step": 174650
|
24468 |
+
},
|
24469 |
+
{
|
24470 |
+
"epoch": 5.216015286776341,
|
24471 |
+
"grad_norm": 5.581248760223389,
|
24472 |
+
"learning_rate": 0.0001500871433551049,
|
24473 |
+
"loss": 1.174,
|
24474 |
+
"step": 174700
|
24475 |
+
},
|
24476 |
+
{
|
24477 |
+
"epoch": 5.217508136028424,
|
24478 |
+
"grad_norm": 6.802553176879883,
|
24479 |
+
"learning_rate": 0.00015007285755918605,
|
24480 |
+
"loss": 1.1922,
|
24481 |
+
"step": 174750
|
24482 |
+
},
|
24483 |
+
{
|
24484 |
+
"epoch": 5.219000985280506,
|
24485 |
+
"grad_norm": 4.5715651512146,
|
24486 |
+
"learning_rate": 0.00015005857176326722,
|
24487 |
+
"loss": 1.1835,
|
24488 |
+
"step": 174800
|
24489 |
+
},
|
24490 |
+
{
|
24491 |
+
"epoch": 5.220493834532589,
|
24492 |
+
"grad_norm": 3.920050621032715,
|
24493 |
+
"learning_rate": 0.00015004428596734838,
|
24494 |
+
"loss": 1.1529,
|
24495 |
+
"step": 174850
|
24496 |
+
},
|
24497 |
+
{
|
24498 |
+
"epoch": 5.221986683784672,
|
24499 |
+
"grad_norm": 5.379642009735107,
|
24500 |
+
"learning_rate": 0.00015003000017142957,
|
24501 |
+
"loss": 1.1016,
|
24502 |
+
"step": 174900
|
24503 |
+
},
|
24504 |
+
{
|
24505 |
+
"epoch": 5.223479533036754,
|
24506 |
+
"grad_norm": 5.268077373504639,
|
24507 |
+
"learning_rate": 0.0001500157143755107,
|
24508 |
+
"loss": 1.1792,
|
24509 |
+
"step": 174950
|
24510 |
+
},
|
24511 |
+
{
|
24512 |
+
"epoch": 5.224972382288836,
|
24513 |
+
"grad_norm": 4.864146709442139,
|
24514 |
+
"learning_rate": 0.0001500014285795919,
|
24515 |
+
"loss": 1.1511,
|
24516 |
+
"step": 175000
|
24517 |
+
},
|
24518 |
+
{
|
24519 |
+
"epoch": 5.226465231540919,
|
24520 |
+
"grad_norm": 3.050143241882324,
|
24521 |
+
"learning_rate": 0.00014998714278367304,
|
24522 |
+
"loss": 1.2307,
|
24523 |
+
"step": 175050
|
24524 |
+
},
|
24525 |
+
{
|
24526 |
+
"epoch": 5.227958080793002,
|
24527 |
+
"grad_norm": 4.5110764503479,
|
24528 |
+
"learning_rate": 0.00014997285698775423,
|
24529 |
+
"loss": 1.1558,
|
24530 |
+
"step": 175100
|
24531 |
+
},
|
24532 |
+
{
|
24533 |
+
"epoch": 5.229450930045084,
|
24534 |
+
"grad_norm": 4.925455093383789,
|
24535 |
+
"learning_rate": 0.0001499585711918354,
|
24536 |
+
"loss": 1.194,
|
24537 |
+
"step": 175150
|
24538 |
+
},
|
24539 |
+
{
|
24540 |
+
"epoch": 5.230943779297166,
|
24541 |
+
"grad_norm": 5.321854591369629,
|
24542 |
+
"learning_rate": 0.00014994428539591656,
|
24543 |
+
"loss": 1.1927,
|
24544 |
+
"step": 175200
|
24545 |
+
},
|
24546 |
+
{
|
24547 |
+
"epoch": 5.232436628549249,
|
24548 |
+
"grad_norm": 3.1668155193328857,
|
24549 |
+
"learning_rate": 0.00014992999959999773,
|
24550 |
+
"loss": 1.1243,
|
24551 |
+
"step": 175250
|
24552 |
+
},
|
24553 |
+
{
|
24554 |
+
"epoch": 5.233929477801332,
|
24555 |
+
"grad_norm": 5.259143829345703,
|
24556 |
+
"learning_rate": 0.0001499157138040789,
|
24557 |
+
"loss": 1.181,
|
24558 |
+
"step": 175300
|
24559 |
+
},
|
24560 |
+
{
|
24561 |
+
"epoch": 5.235422327053414,
|
24562 |
+
"grad_norm": 5.783114910125732,
|
24563 |
+
"learning_rate": 0.00014990142800816006,
|
24564 |
+
"loss": 1.1156,
|
24565 |
+
"step": 175350
|
24566 |
+
},
|
24567 |
+
{
|
24568 |
+
"epoch": 5.236915176305497,
|
24569 |
+
"grad_norm": 5.6330952644348145,
|
24570 |
+
"learning_rate": 0.00014988714221224122,
|
24571 |
+
"loss": 1.1513,
|
24572 |
+
"step": 175400
|
24573 |
+
},
|
24574 |
+
{
|
24575 |
+
"epoch": 5.238408025557579,
|
24576 |
+
"grad_norm": 4.543177127838135,
|
24577 |
+
"learning_rate": 0.00014987285641632238,
|
24578 |
+
"loss": 1.1898,
|
24579 |
+
"step": 175450
|
24580 |
+
},
|
24581 |
+
{
|
24582 |
+
"epoch": 5.239900874809662,
|
24583 |
+
"grad_norm": 3.058159828186035,
|
24584 |
+
"learning_rate": 0.00014985857062040355,
|
24585 |
+
"loss": 1.2119,
|
24586 |
+
"step": 175500
|
24587 |
+
},
|
24588 |
+
{
|
24589 |
+
"epoch": 5.241393724061744,
|
24590 |
+
"grad_norm": 5.873176097869873,
|
24591 |
+
"learning_rate": 0.00014984428482448471,
|
24592 |
+
"loss": 1.1351,
|
24593 |
+
"step": 175550
|
24594 |
+
},
|
24595 |
+
{
|
24596 |
+
"epoch": 5.242886573313827,
|
24597 |
+
"grad_norm": 4.126943588256836,
|
24598 |
+
"learning_rate": 0.0001498299990285659,
|
24599 |
+
"loss": 1.2393,
|
24600 |
+
"step": 175600
|
24601 |
+
},
|
24602 |
+
{
|
24603 |
+
"epoch": 5.24437942256591,
|
24604 |
+
"grad_norm": 5.249665260314941,
|
24605 |
+
"learning_rate": 0.00014981571323264704,
|
24606 |
+
"loss": 1.2033,
|
24607 |
+
"step": 175650
|
24608 |
+
},
|
24609 |
+
{
|
24610 |
+
"epoch": 5.2458722718179915,
|
24611 |
+
"grad_norm": 6.529717445373535,
|
24612 |
+
"learning_rate": 0.00014980142743672824,
|
24613 |
+
"loss": 1.1383,
|
24614 |
+
"step": 175700
|
24615 |
+
},
|
24616 |
+
{
|
24617 |
+
"epoch": 5.247365121070074,
|
24618 |
+
"grad_norm": 4.902076244354248,
|
24619 |
+
"learning_rate": 0.00014978714164080937,
|
24620 |
+
"loss": 1.1252,
|
24621 |
+
"step": 175750
|
24622 |
+
},
|
24623 |
+
{
|
24624 |
+
"epoch": 5.248857970322157,
|
24625 |
+
"grad_norm": 5.3138017654418945,
|
24626 |
+
"learning_rate": 0.00014977285584489056,
|
24627 |
+
"loss": 1.1629,
|
24628 |
+
"step": 175800
|
24629 |
+
},
|
24630 |
+
{
|
24631 |
+
"epoch": 5.25035081957424,
|
24632 |
+
"grad_norm": 4.891197204589844,
|
24633 |
+
"learning_rate": 0.0001497585700489717,
|
24634 |
+
"loss": 1.144,
|
24635 |
+
"step": 175850
|
24636 |
+
},
|
24637 |
+
{
|
24638 |
+
"epoch": 5.2518436688263215,
|
24639 |
+
"grad_norm": 5.521958827972412,
|
24640 |
+
"learning_rate": 0.0001497442842530529,
|
24641 |
+
"loss": 1.2136,
|
24642 |
+
"step": 175900
|
24643 |
+
},
|
24644 |
+
{
|
24645 |
+
"epoch": 5.253336518078404,
|
24646 |
+
"grad_norm": 3.749346971511841,
|
24647 |
+
"learning_rate": 0.00014972999845713406,
|
24648 |
+
"loss": 1.1887,
|
24649 |
+
"step": 175950
|
24650 |
+
},
|
24651 |
+
{
|
24652 |
+
"epoch": 5.254829367330487,
|
24653 |
+
"grad_norm": 3.947131633758545,
|
24654 |
+
"learning_rate": 0.00014971571266121522,
|
24655 |
+
"loss": 1.1959,
|
24656 |
+
"step": 176000
|
24657 |
+
},
|
24658 |
+
{
|
24659 |
+
"epoch": 5.25632221658257,
|
24660 |
+
"grad_norm": 3.47645902633667,
|
24661 |
+
"learning_rate": 0.0001497014268652964,
|
24662 |
+
"loss": 1.1597,
|
24663 |
+
"step": 176050
|
24664 |
+
},
|
24665 |
+
{
|
24666 |
+
"epoch": 5.257815065834652,
|
24667 |
+
"grad_norm": 3.3379621505737305,
|
24668 |
+
"learning_rate": 0.00014968714106937755,
|
24669 |
+
"loss": 1.1643,
|
24670 |
+
"step": 176100
|
24671 |
+
},
|
24672 |
+
{
|
24673 |
+
"epoch": 5.259307915086735,
|
24674 |
+
"grad_norm": 3.426164388656616,
|
24675 |
+
"learning_rate": 0.00014967285527345872,
|
24676 |
+
"loss": 1.131,
|
24677 |
+
"step": 176150
|
24678 |
+
},
|
24679 |
+
{
|
24680 |
+
"epoch": 5.260800764338817,
|
24681 |
+
"grad_norm": 4.857641696929932,
|
24682 |
+
"learning_rate": 0.00014965856947753988,
|
24683 |
+
"loss": 1.1171,
|
24684 |
+
"step": 176200
|
24685 |
+
},
|
24686 |
+
{
|
24687 |
+
"epoch": 5.2622936135908995,
|
24688 |
+
"grad_norm": 3.9440646171569824,
|
24689 |
+
"learning_rate": 0.00014964428368162105,
|
24690 |
+
"loss": 1.1529,
|
24691 |
+
"step": 176250
|
24692 |
+
},
|
24693 |
+
{
|
24694 |
+
"epoch": 5.263786462842982,
|
24695 |
+
"grad_norm": 4.392827987670898,
|
24696 |
+
"learning_rate": 0.0001496299978857022,
|
24697 |
+
"loss": 1.1946,
|
24698 |
+
"step": 176300
|
24699 |
+
},
|
24700 |
+
{
|
24701 |
+
"epoch": 5.265279312095065,
|
24702 |
+
"grad_norm": 4.131694316864014,
|
24703 |
+
"learning_rate": 0.00014961571208978338,
|
24704 |
+
"loss": 1.1702,
|
24705 |
+
"step": 176350
|
24706 |
+
},
|
24707 |
+
{
|
24708 |
+
"epoch": 5.266772161347147,
|
24709 |
+
"grad_norm": 4.955690383911133,
|
24710 |
+
"learning_rate": 0.00014960142629386454,
|
24711 |
+
"loss": 1.1059,
|
24712 |
+
"step": 176400
|
24713 |
+
},
|
24714 |
+
{
|
24715 |
+
"epoch": 5.2682650105992295,
|
24716 |
+
"grad_norm": 3.52262544631958,
|
24717 |
+
"learning_rate": 0.0001495871404979457,
|
24718 |
+
"loss": 1.1873,
|
24719 |
+
"step": 176450
|
24720 |
+
},
|
24721 |
+
{
|
24722 |
+
"epoch": 5.269757859851312,
|
24723 |
+
"grad_norm": 5.748971939086914,
|
24724 |
+
"learning_rate": 0.00014957285470202687,
|
24725 |
+
"loss": 1.1285,
|
24726 |
+
"step": 176500
|
24727 |
+
},
|
24728 |
+
{
|
24729 |
+
"epoch": 5.271250709103395,
|
24730 |
+
"grad_norm": 5.502897262573242,
|
24731 |
+
"learning_rate": 0.00014955856890610803,
|
24732 |
+
"loss": 1.1292,
|
24733 |
+
"step": 176550
|
24734 |
+
},
|
24735 |
+
{
|
24736 |
+
"epoch": 5.272743558355478,
|
24737 |
+
"grad_norm": 3.8092453479766846,
|
24738 |
+
"learning_rate": 0.0001495442831101892,
|
24739 |
+
"loss": 1.1545,
|
24740 |
+
"step": 176600
|
24741 |
+
},
|
24742 |
+
{
|
24743 |
+
"epoch": 5.274236407607559,
|
24744 |
+
"grad_norm": 4.742307186126709,
|
24745 |
+
"learning_rate": 0.00014952999731427036,
|
24746 |
+
"loss": 1.1732,
|
24747 |
+
"step": 176650
|
24748 |
+
},
|
24749 |
+
{
|
24750 |
+
"epoch": 5.275729256859642,
|
24751 |
+
"grad_norm": 5.108506202697754,
|
24752 |
+
"learning_rate": 0.00014951571151835153,
|
24753 |
+
"loss": 1.2033,
|
24754 |
+
"step": 176700
|
24755 |
+
},
|
24756 |
+
{
|
24757 |
+
"epoch": 5.277222106111725,
|
24758 |
+
"grad_norm": 3.655372381210327,
|
24759 |
+
"learning_rate": 0.00014950142572243272,
|
24760 |
+
"loss": 1.1837,
|
24761 |
+
"step": 176750
|
24762 |
+
},
|
24763 |
+
{
|
24764 |
+
"epoch": 5.2787149553638075,
|
24765 |
+
"grad_norm": 4.246975898742676,
|
24766 |
+
"learning_rate": 0.00014948713992651386,
|
24767 |
+
"loss": 1.1572,
|
24768 |
+
"step": 176800
|
24769 |
+
},
|
24770 |
+
{
|
24771 |
+
"epoch": 5.28020780461589,
|
24772 |
+
"grad_norm": 3.185184955596924,
|
24773 |
+
"learning_rate": 0.00014947285413059505,
|
24774 |
+
"loss": 1.1773,
|
24775 |
+
"step": 176850
|
24776 |
+
},
|
24777 |
+
{
|
24778 |
+
"epoch": 5.281700653867972,
|
24779 |
+
"grad_norm": 4.376567363739014,
|
24780 |
+
"learning_rate": 0.0001494585683346762,
|
24781 |
+
"loss": 1.1662,
|
24782 |
+
"step": 176900
|
24783 |
+
},
|
24784 |
+
{
|
24785 |
+
"epoch": 5.283193503120055,
|
24786 |
+
"grad_norm": 4.7851457595825195,
|
24787 |
+
"learning_rate": 0.00014944428253875738,
|
24788 |
+
"loss": 1.2018,
|
24789 |
+
"step": 176950
|
24790 |
+
},
|
24791 |
+
{
|
24792 |
+
"epoch": 5.2846863523721375,
|
24793 |
+
"grad_norm": 4.13614559173584,
|
24794 |
+
"learning_rate": 0.00014942999674283852,
|
24795 |
+
"loss": 1.1137,
|
24796 |
+
"step": 177000
|
24797 |
+
},
|
24798 |
+
{
|
24799 |
+
"epoch": 5.28617920162422,
|
24800 |
+
"grad_norm": 5.2566609382629395,
|
24801 |
+
"learning_rate": 0.0001494157109469197,
|
24802 |
+
"loss": 1.1698,
|
24803 |
+
"step": 177050
|
24804 |
+
},
|
24805 |
+
{
|
24806 |
+
"epoch": 5.287672050876303,
|
24807 |
+
"grad_norm": 4.4063825607299805,
|
24808 |
+
"learning_rate": 0.00014940142515100087,
|
24809 |
+
"loss": 1.1507,
|
24810 |
+
"step": 177100
|
24811 |
+
},
|
24812 |
+
{
|
24813 |
+
"epoch": 5.289164900128385,
|
24814 |
+
"grad_norm": 6.457488059997559,
|
24815 |
+
"learning_rate": 0.00014938713935508204,
|
24816 |
+
"loss": 1.2139,
|
24817 |
+
"step": 177150
|
24818 |
+
},
|
24819 |
+
{
|
24820 |
+
"epoch": 5.290657749380467,
|
24821 |
+
"grad_norm": 3.639181137084961,
|
24822 |
+
"learning_rate": 0.0001493728535591632,
|
24823 |
+
"loss": 1.146,
|
24824 |
+
"step": 177200
|
24825 |
+
},
|
24826 |
+
{
|
24827 |
+
"epoch": 5.29215059863255,
|
24828 |
+
"grad_norm": 4.316925525665283,
|
24829 |
+
"learning_rate": 0.00014935856776324437,
|
24830 |
+
"loss": 1.1696,
|
24831 |
+
"step": 177250
|
24832 |
+
},
|
24833 |
+
{
|
24834 |
+
"epoch": 5.293643447884633,
|
24835 |
+
"grad_norm": 3.8231780529022217,
|
24836 |
+
"learning_rate": 0.00014934428196732553,
|
24837 |
+
"loss": 1.2053,
|
24838 |
+
"step": 177300
|
24839 |
+
},
|
24840 |
+
{
|
24841 |
+
"epoch": 5.2951362971367155,
|
24842 |
+
"grad_norm": 5.597898483276367,
|
24843 |
+
"learning_rate": 0.0001493299961714067,
|
24844 |
+
"loss": 1.2097,
|
24845 |
+
"step": 177350
|
24846 |
+
},
|
24847 |
+
{
|
24848 |
+
"epoch": 5.296629146388797,
|
24849 |
+
"grad_norm": 5.123006820678711,
|
24850 |
+
"learning_rate": 0.00014931571037548786,
|
24851 |
+
"loss": 1.1213,
|
24852 |
+
"step": 177400
|
24853 |
+
},
|
24854 |
+
{
|
24855 |
+
"epoch": 5.29812199564088,
|
24856 |
+
"grad_norm": 4.827045917510986,
|
24857 |
+
"learning_rate": 0.00014930142457956903,
|
24858 |
+
"loss": 1.1784,
|
24859 |
+
"step": 177450
|
24860 |
+
},
|
24861 |
+
{
|
24862 |
+
"epoch": 5.299614844892963,
|
24863 |
+
"grad_norm": 4.859231472015381,
|
24864 |
+
"learning_rate": 0.0001492871387836502,
|
24865 |
+
"loss": 1.1418,
|
24866 |
+
"step": 177500
|
24867 |
+
},
|
24868 |
+
{
|
24869 |
+
"epoch": 5.3011076941450455,
|
24870 |
+
"grad_norm": 5.134034633636475,
|
24871 |
+
"learning_rate": 0.00014927285298773138,
|
24872 |
+
"loss": 1.1989,
|
24873 |
+
"step": 177550
|
24874 |
+
},
|
24875 |
+
{
|
24876 |
+
"epoch": 5.302600543397128,
|
24877 |
+
"grad_norm": 6.3303327560424805,
|
24878 |
+
"learning_rate": 0.00014925856719181252,
|
24879 |
+
"loss": 1.1866,
|
24880 |
+
"step": 177600
|
24881 |
+
},
|
24882 |
+
{
|
24883 |
+
"epoch": 5.30409339264921,
|
24884 |
+
"grad_norm": 4.182743549346924,
|
24885 |
+
"learning_rate": 0.0001492442813958937,
|
24886 |
+
"loss": 1.1781,
|
24887 |
+
"step": 177650
|
24888 |
+
},
|
24889 |
+
{
|
24890 |
+
"epoch": 5.305586241901293,
|
24891 |
+
"grad_norm": 4.055047512054443,
|
24892 |
+
"learning_rate": 0.00014922999559997485,
|
24893 |
+
"loss": 1.1735,
|
24894 |
+
"step": 177700
|
24895 |
+
},
|
24896 |
+
{
|
24897 |
+
"epoch": 5.307079091153375,
|
24898 |
+
"grad_norm": 5.255928993225098,
|
24899 |
+
"learning_rate": 0.00014921570980405604,
|
24900 |
+
"loss": 1.1627,
|
24901 |
+
"step": 177750
|
24902 |
+
},
|
24903 |
+
{
|
24904 |
+
"epoch": 5.308571940405458,
|
24905 |
+
"grad_norm": 4.9416584968566895,
|
24906 |
+
"learning_rate": 0.0001492014240081372,
|
24907 |
+
"loss": 1.0982,
|
24908 |
+
"step": 177800
|
24909 |
+
},
|
24910 |
+
{
|
24911 |
+
"epoch": 5.31006478965754,
|
24912 |
+
"grad_norm": 6.426959037780762,
|
24913 |
+
"learning_rate": 0.00014918713821221837,
|
24914 |
+
"loss": 1.1585,
|
24915 |
+
"step": 177850
|
24916 |
+
},
|
24917 |
+
{
|
24918 |
+
"epoch": 5.311557638909623,
|
24919 |
+
"grad_norm": 3.433701753616333,
|
24920 |
+
"learning_rate": 0.00014917285241629953,
|
24921 |
+
"loss": 1.1728,
|
24922 |
+
"step": 177900
|
24923 |
+
},
|
24924 |
+
{
|
24925 |
+
"epoch": 5.313050488161705,
|
24926 |
+
"grad_norm": 4.7097320556640625,
|
24927 |
+
"learning_rate": 0.0001491585666203807,
|
24928 |
+
"loss": 1.1183,
|
24929 |
+
"step": 177950
|
24930 |
+
},
|
24931 |
+
{
|
24932 |
+
"epoch": 5.314543337413788,
|
24933 |
+
"grad_norm": 5.154112815856934,
|
24934 |
+
"learning_rate": 0.00014914428082446186,
|
24935 |
+
"loss": 1.2097,
|
24936 |
+
"step": 178000
|
24937 |
+
},
|
24938 |
+
{
|
24939 |
+
"epoch": 5.316036186665871,
|
24940 |
+
"grad_norm": 4.5182929039001465,
|
24941 |
+
"learning_rate": 0.00014912999502854303,
|
24942 |
+
"loss": 1.2144,
|
24943 |
+
"step": 178050
|
24944 |
+
},
|
24945 |
+
{
|
24946 |
+
"epoch": 5.3175290359179535,
|
24947 |
+
"grad_norm": 4.899813175201416,
|
24948 |
+
"learning_rate": 0.0001491157092326242,
|
24949 |
+
"loss": 1.1513,
|
24950 |
+
"step": 178100
|
24951 |
+
},
|
24952 |
+
{
|
24953 |
+
"epoch": 5.319021885170035,
|
24954 |
+
"grad_norm": 4.790624141693115,
|
24955 |
+
"learning_rate": 0.00014910142343670536,
|
24956 |
+
"loss": 1.1579,
|
24957 |
+
"step": 178150
|
24958 |
+
},
|
24959 |
+
{
|
24960 |
+
"epoch": 5.320514734422118,
|
24961 |
+
"grad_norm": 4.103714466094971,
|
24962 |
+
"learning_rate": 0.00014908713764078652,
|
24963 |
+
"loss": 1.1407,
|
24964 |
+
"step": 178200
|
24965 |
+
},
|
24966 |
+
{
|
24967 |
+
"epoch": 5.322007583674201,
|
24968 |
+
"grad_norm": 3.641630172729492,
|
24969 |
+
"learning_rate": 0.0001490728518448677,
|
24970 |
+
"loss": 1.1401,
|
24971 |
+
"step": 178250
|
24972 |
+
},
|
24973 |
+
{
|
24974 |
+
"epoch": 5.323500432926283,
|
24975 |
+
"grad_norm": 4.197120666503906,
|
24976 |
+
"learning_rate": 0.00014905856604894885,
|
24977 |
+
"loss": 1.151,
|
24978 |
+
"step": 178300
|
24979 |
+
},
|
24980 |
+
{
|
24981 |
+
"epoch": 5.324993282178365,
|
24982 |
+
"grad_norm": 6.3722405433654785,
|
24983 |
+
"learning_rate": 0.00014904428025303004,
|
24984 |
+
"loss": 1.2082,
|
24985 |
+
"step": 178350
|
24986 |
+
},
|
24987 |
+
{
|
24988 |
+
"epoch": 5.326486131430448,
|
24989 |
+
"grad_norm": 6.299606800079346,
|
24990 |
+
"learning_rate": 0.00014902999445711118,
|
24991 |
+
"loss": 1.2194,
|
24992 |
+
"step": 178400
|
24993 |
+
},
|
24994 |
+
{
|
24995 |
+
"epoch": 5.327978980682531,
|
24996 |
+
"grad_norm": 4.542901992797852,
|
24997 |
+
"learning_rate": 0.00014901570866119237,
|
24998 |
+
"loss": 1.199,
|
24999 |
+
"step": 178450
|
25000 |
+
},
|
25001 |
+
{
|
25002 |
+
"epoch": 5.329471829934613,
|
25003 |
+
"grad_norm": 5.075666904449463,
|
25004 |
+
"learning_rate": 0.0001490014228652735,
|
25005 |
+
"loss": 1.1677,
|
25006 |
+
"step": 178500
|
25007 |
+
},
|
25008 |
+
{
|
25009 |
+
"epoch": 5.330964679186696,
|
25010 |
+
"grad_norm": 4.868073463439941,
|
25011 |
+
"learning_rate": 0.0001489871370693547,
|
25012 |
+
"loss": 1.1416,
|
25013 |
+
"step": 178550
|
25014 |
+
},
|
25015 |
+
{
|
25016 |
+
"epoch": 5.332457528438778,
|
25017 |
+
"grad_norm": 4.414193630218506,
|
25018 |
+
"learning_rate": 0.00014897285127343587,
|
25019 |
+
"loss": 1.1757,
|
25020 |
+
"step": 178600
|
25021 |
+
},
|
25022 |
+
{
|
25023 |
+
"epoch": 5.3339503776908606,
|
25024 |
+
"grad_norm": 4.902929306030273,
|
25025 |
+
"learning_rate": 0.00014895856547751703,
|
25026 |
+
"loss": 1.1555,
|
25027 |
+
"step": 178650
|
25028 |
+
},
|
25029 |
+
{
|
25030 |
+
"epoch": 5.335443226942943,
|
25031 |
+
"grad_norm": 3.7560372352600098,
|
25032 |
+
"learning_rate": 0.0001489442796815982,
|
25033 |
+
"loss": 1.1768,
|
25034 |
+
"step": 178700
|
25035 |
+
},
|
25036 |
+
{
|
25037 |
+
"epoch": 5.336936076195026,
|
25038 |
+
"grad_norm": 4.610017776489258,
|
25039 |
+
"learning_rate": 0.00014892999388567936,
|
25040 |
+
"loss": 1.1467,
|
25041 |
+
"step": 178750
|
25042 |
+
},
|
25043 |
+
{
|
25044 |
+
"epoch": 5.338428925447109,
|
25045 |
+
"grad_norm": 3.9438891410827637,
|
25046 |
+
"learning_rate": 0.00014891570808976053,
|
25047 |
+
"loss": 1.1995,
|
25048 |
+
"step": 178800
|
25049 |
+
},
|
25050 |
+
{
|
25051 |
+
"epoch": 5.3399217746991905,
|
25052 |
+
"grad_norm": 4.497828483581543,
|
25053 |
+
"learning_rate": 0.0001489014222938417,
|
25054 |
+
"loss": 1.1962,
|
25055 |
+
"step": 178850
|
25056 |
+
},
|
25057 |
+
{
|
25058 |
+
"epoch": 5.341414623951273,
|
25059 |
+
"grad_norm": 6.109000205993652,
|
25060 |
+
"learning_rate": 0.00014888713649792285,
|
25061 |
+
"loss": 1.1826,
|
25062 |
+
"step": 178900
|
25063 |
+
},
|
25064 |
+
{
|
25065 |
+
"epoch": 5.342907473203356,
|
25066 |
+
"grad_norm": 4.150059223175049,
|
25067 |
+
"learning_rate": 0.00014887285070200402,
|
25068 |
+
"loss": 1.1613,
|
25069 |
+
"step": 178950
|
25070 |
+
},
|
25071 |
+
{
|
25072 |
+
"epoch": 5.344400322455439,
|
25073 |
+
"grad_norm": 4.3024492263793945,
|
25074 |
+
"learning_rate": 0.00014885856490608518,
|
25075 |
+
"loss": 1.1438,
|
25076 |
+
"step": 179000
|
25077 |
+
},
|
25078 |
+
{
|
25079 |
+
"epoch": 5.345893171707521,
|
25080 |
+
"grad_norm": 4.783211708068848,
|
25081 |
+
"learning_rate": 0.00014884427911016635,
|
25082 |
+
"loss": 1.1864,
|
25083 |
+
"step": 179050
|
25084 |
+
},
|
25085 |
+
{
|
25086 |
+
"epoch": 5.347386020959603,
|
25087 |
+
"grad_norm": 5.038503646850586,
|
25088 |
+
"learning_rate": 0.0001488299933142475,
|
25089 |
+
"loss": 1.2297,
|
25090 |
+
"step": 179100
|
25091 |
+
},
|
25092 |
+
{
|
25093 |
+
"epoch": 5.348878870211686,
|
25094 |
+
"grad_norm": 5.009586334228516,
|
25095 |
+
"learning_rate": 0.00014881570751832868,
|
25096 |
+
"loss": 1.1509,
|
25097 |
+
"step": 179150
|
25098 |
+
},
|
25099 |
+
{
|
25100 |
+
"epoch": 5.3503717194637685,
|
25101 |
+
"grad_norm": 5.057339191436768,
|
25102 |
+
"learning_rate": 0.00014880142172240984,
|
25103 |
+
"loss": 1.162,
|
25104 |
+
"step": 179200
|
25105 |
+
},
|
25106 |
+
{
|
25107 |
+
"epoch": 5.351864568715851,
|
25108 |
+
"grad_norm": 7.249011993408203,
|
25109 |
+
"learning_rate": 0.000148787135926491,
|
25110 |
+
"loss": 1.1634,
|
25111 |
+
"step": 179250
|
25112 |
+
},
|
25113 |
+
{
|
25114 |
+
"epoch": 5.353357417967934,
|
25115 |
+
"grad_norm": 4.474646091461182,
|
25116 |
+
"learning_rate": 0.00014877285013057217,
|
25117 |
+
"loss": 1.1652,
|
25118 |
+
"step": 179300
|
25119 |
+
},
|
25120 |
+
{
|
25121 |
+
"epoch": 5.354850267220016,
|
25122 |
+
"grad_norm": 4.699128150939941,
|
25123 |
+
"learning_rate": 0.00014875856433465334,
|
25124 |
+
"loss": 1.1875,
|
25125 |
+
"step": 179350
|
25126 |
+
},
|
25127 |
+
{
|
25128 |
+
"epoch": 5.3563431164720985,
|
25129 |
+
"grad_norm": 3.6825220584869385,
|
25130 |
+
"learning_rate": 0.00014874427853873453,
|
25131 |
+
"loss": 1.1896,
|
25132 |
+
"step": 179400
|
25133 |
+
},
|
25134 |
+
{
|
25135 |
+
"epoch": 5.357835965724181,
|
25136 |
+
"grad_norm": 5.787204742431641,
|
25137 |
+
"learning_rate": 0.00014872999274281567,
|
25138 |
+
"loss": 1.1793,
|
25139 |
+
"step": 179450
|
25140 |
+
},
|
25141 |
+
{
|
25142 |
+
"epoch": 5.359328814976264,
|
25143 |
+
"grad_norm": 6.388000011444092,
|
25144 |
+
"learning_rate": 0.00014871570694689686,
|
25145 |
+
"loss": 1.2219,
|
25146 |
+
"step": 179500
|
25147 |
+
},
|
25148 |
+
{
|
25149 |
+
"epoch": 5.360821664228347,
|
25150 |
+
"grad_norm": 4.790463924407959,
|
25151 |
+
"learning_rate": 0.000148701421150978,
|
25152 |
+
"loss": 1.1739,
|
25153 |
+
"step": 179550
|
25154 |
+
},
|
25155 |
+
{
|
25156 |
+
"epoch": 5.362314513480428,
|
25157 |
+
"grad_norm": 2.8773367404937744,
|
25158 |
+
"learning_rate": 0.0001486871353550592,
|
25159 |
+
"loss": 1.1894,
|
25160 |
+
"step": 179600
|
25161 |
+
},
|
25162 |
+
{
|
25163 |
+
"epoch": 5.363807362732511,
|
25164 |
+
"grad_norm": 3.305040121078491,
|
25165 |
+
"learning_rate": 0.00014867284955914032,
|
25166 |
+
"loss": 1.2278,
|
25167 |
+
"step": 179650
|
25168 |
+
},
|
25169 |
+
{
|
25170 |
+
"epoch": 5.365300211984594,
|
25171 |
+
"grad_norm": 4.368061065673828,
|
25172 |
+
"learning_rate": 0.00014865856376322152,
|
25173 |
+
"loss": 1.214,
|
25174 |
+
"step": 179700
|
25175 |
+
},
|
25176 |
+
{
|
25177 |
+
"epoch": 5.3667930612366765,
|
25178 |
+
"grad_norm": 5.629176139831543,
|
25179 |
+
"learning_rate": 0.00014864427796730268,
|
25180 |
+
"loss": 1.1611,
|
25181 |
+
"step": 179750
|
25182 |
+
},
|
25183 |
+
{
|
25184 |
+
"epoch": 5.368285910488759,
|
25185 |
+
"grad_norm": 5.811650276184082,
|
25186 |
+
"learning_rate": 0.00014862999217138385,
|
25187 |
+
"loss": 1.1861,
|
25188 |
+
"step": 179800
|
25189 |
+
},
|
25190 |
+
{
|
25191 |
+
"epoch": 5.369778759740841,
|
25192 |
+
"grad_norm": 3.6000754833221436,
|
25193 |
+
"learning_rate": 0.000148615706375465,
|
25194 |
+
"loss": 1.1636,
|
25195 |
+
"step": 179850
|
25196 |
+
},
|
25197 |
+
{
|
25198 |
+
"epoch": 5.371271608992924,
|
25199 |
+
"grad_norm": 4.922330379486084,
|
25200 |
+
"learning_rate": 0.00014860142057954617,
|
25201 |
+
"loss": 1.1534,
|
25202 |
+
"step": 179900
|
25203 |
+
},
|
25204 |
+
{
|
25205 |
+
"epoch": 5.3727644582450065,
|
25206 |
+
"grad_norm": 5.780492305755615,
|
25207 |
+
"learning_rate": 0.00014858713478362734,
|
25208 |
+
"loss": 1.2066,
|
25209 |
+
"step": 179950
|
25210 |
+
},
|
25211 |
+
{
|
25212 |
+
"epoch": 5.374257307497089,
|
25213 |
+
"grad_norm": 4.670080184936523,
|
25214 |
+
"learning_rate": 0.0001485728489877085,
|
25215 |
+
"loss": 1.1812,
|
25216 |
+
"step": 180000
|
25217 |
}
|
25218 |
],
|
25219 |
"logging_steps": 50,
|
|
|
25233 |
"attributes": {}
|
25234 |
}
|
25235 |
},
|
25236 |
+
"total_flos": 4.5513784407443374e+18,
|
25237 |
"train_batch_size": 2,
|
25238 |
"trial_name": null,
|
25239 |
"trial_params": null
|