Training in progress, step 60000, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step60000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step60000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/trainer_state.json +1403 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 42002584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff7f352959a4d95b5b3caf7854233c45faf1f1fc2d270761c458e87a9761a568
|
3 |
size 42002584
|
last-checkpoint/global_step60000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d4eda03d8d38d325e3fa1c9ac3db0bfa575f928344f48c69f667f444838fcf0
|
3 |
+
size 251710672
|
last-checkpoint/global_step60000/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5a921a4e10245f0928f09959bb57c399dc1008e5295bdc949df4bf3ca71d9af
|
3 |
+
size 153747385
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step60000
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa7eb5ddbfbe8c21bb6f5ec0c848d508578d6f2feee3ccad23d775a2b907625c
|
3 |
size 14244
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7014,6 +7014,1406 @@
|
|
7014 |
"learning_rate": 0.00018571591837667644,
|
7015 |
"loss": 1.314,
|
7016 |
"step": 50000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7017 |
}
|
7018 |
],
|
7019 |
"logging_steps": 50,
|
@@ -7033,7 +8433,7 @@
|
|
7033 |
"attributes": {}
|
7034 |
}
|
7035 |
},
|
7036 |
-
"total_flos": 1.
|
7037 |
"train_batch_size": 2,
|
7038 |
"trial_name": null,
|
7039 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.7914191024990296,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 60000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7014 |
"learning_rate": 0.00018571591837667644,
|
7015 |
"loss": 1.314,
|
7016 |
"step": 50000
|
7017 |
+
},
|
7018 |
+
{
|
7019 |
+
"epoch": 1.4943421013346072,
|
7020 |
+
"grad_norm": 5.21511697769165,
|
7021 |
+
"learning_rate": 0.0001857016325807576,
|
7022 |
+
"loss": 1.363,
|
7023 |
+
"step": 50050
|
7024 |
+
},
|
7025 |
+
{
|
7026 |
+
"epoch": 1.4958349505866897,
|
7027 |
+
"grad_norm": 4.303940296173096,
|
7028 |
+
"learning_rate": 0.0001856873467848388,
|
7029 |
+
"loss": 1.2926,
|
7030 |
+
"step": 50100
|
7031 |
+
},
|
7032 |
+
{
|
7033 |
+
"epoch": 1.4973277998387724,
|
7034 |
+
"grad_norm": 4.7364020347595215,
|
7035 |
+
"learning_rate": 0.00018567306098891994,
|
7036 |
+
"loss": 1.3325,
|
7037 |
+
"step": 50150
|
7038 |
+
},
|
7039 |
+
{
|
7040 |
+
"epoch": 1.4988206490908549,
|
7041 |
+
"grad_norm": 4.325379371643066,
|
7042 |
+
"learning_rate": 0.00018565877519300113,
|
7043 |
+
"loss": 1.31,
|
7044 |
+
"step": 50200
|
7045 |
+
},
|
7046 |
+
{
|
7047 |
+
"epoch": 1.5003134983429374,
|
7048 |
+
"grad_norm": 5.442165374755859,
|
7049 |
+
"learning_rate": 0.00018564448939708227,
|
7050 |
+
"loss": 1.3445,
|
7051 |
+
"step": 50250
|
7052 |
+
},
|
7053 |
+
{
|
7054 |
+
"epoch": 1.5018063475950199,
|
7055 |
+
"grad_norm": 5.861702919006348,
|
7056 |
+
"learning_rate": 0.00018563020360116346,
|
7057 |
+
"loss": 1.3392,
|
7058 |
+
"step": 50300
|
7059 |
+
},
|
7060 |
+
{
|
7061 |
+
"epoch": 1.5032991968471023,
|
7062 |
+
"grad_norm": 4.277650356292725,
|
7063 |
+
"learning_rate": 0.00018561591780524462,
|
7064 |
+
"loss": 1.3127,
|
7065 |
+
"step": 50350
|
7066 |
+
},
|
7067 |
+
{
|
7068 |
+
"epoch": 1.504792046099185,
|
7069 |
+
"grad_norm": 5.670056343078613,
|
7070 |
+
"learning_rate": 0.00018560163200932576,
|
7071 |
+
"loss": 1.346,
|
7072 |
+
"step": 50400
|
7073 |
+
},
|
7074 |
+
{
|
7075 |
+
"epoch": 1.5062848953512673,
|
7076 |
+
"grad_norm": 4.479572772979736,
|
7077 |
+
"learning_rate": 0.00018558734621340695,
|
7078 |
+
"loss": 1.3254,
|
7079 |
+
"step": 50450
|
7080 |
+
},
|
7081 |
+
{
|
7082 |
+
"epoch": 1.50777774460335,
|
7083 |
+
"grad_norm": 4.820215225219727,
|
7084 |
+
"learning_rate": 0.0001855730604174881,
|
7085 |
+
"loss": 1.3043,
|
7086 |
+
"step": 50500
|
7087 |
+
},
|
7088 |
+
{
|
7089 |
+
"epoch": 1.5092705938554325,
|
7090 |
+
"grad_norm": 5.21779203414917,
|
7091 |
+
"learning_rate": 0.00018555877462156928,
|
7092 |
+
"loss": 1.2953,
|
7093 |
+
"step": 50550
|
7094 |
+
},
|
7095 |
+
{
|
7096 |
+
"epoch": 1.510763443107515,
|
7097 |
+
"grad_norm": 4.987244129180908,
|
7098 |
+
"learning_rate": 0.00018554448882565042,
|
7099 |
+
"loss": 1.3008,
|
7100 |
+
"step": 50600
|
7101 |
+
},
|
7102 |
+
{
|
7103 |
+
"epoch": 1.5122562923595977,
|
7104 |
+
"grad_norm": 4.658651828765869,
|
7105 |
+
"learning_rate": 0.0001855302030297316,
|
7106 |
+
"loss": 1.3146,
|
7107 |
+
"step": 50650
|
7108 |
+
},
|
7109 |
+
{
|
7110 |
+
"epoch": 1.51374914161168,
|
7111 |
+
"grad_norm": 4.724532604217529,
|
7112 |
+
"learning_rate": 0.00018551591723381278,
|
7113 |
+
"loss": 1.3467,
|
7114 |
+
"step": 50700
|
7115 |
+
},
|
7116 |
+
{
|
7117 |
+
"epoch": 1.5152419908637627,
|
7118 |
+
"grad_norm": 6.199383735656738,
|
7119 |
+
"learning_rate": 0.00018550163143789394,
|
7120 |
+
"loss": 1.3244,
|
7121 |
+
"step": 50750
|
7122 |
+
},
|
7123 |
+
{
|
7124 |
+
"epoch": 1.5167348401158451,
|
7125 |
+
"grad_norm": 4.255542278289795,
|
7126 |
+
"learning_rate": 0.0001854873456419751,
|
7127 |
+
"loss": 1.3373,
|
7128 |
+
"step": 50800
|
7129 |
+
},
|
7130 |
+
{
|
7131 |
+
"epoch": 1.5182276893679276,
|
7132 |
+
"grad_norm": 5.011137962341309,
|
7133 |
+
"learning_rate": 0.00018547305984605627,
|
7134 |
+
"loss": 1.2945,
|
7135 |
+
"step": 50850
|
7136 |
+
},
|
7137 |
+
{
|
7138 |
+
"epoch": 1.51972053862001,
|
7139 |
+
"grad_norm": 4.276299476623535,
|
7140 |
+
"learning_rate": 0.00018545877405013743,
|
7141 |
+
"loss": 1.3503,
|
7142 |
+
"step": 50900
|
7143 |
+
},
|
7144 |
+
{
|
7145 |
+
"epoch": 1.5212133878720926,
|
7146 |
+
"grad_norm": 5.867751598358154,
|
7147 |
+
"learning_rate": 0.0001854444882542186,
|
7148 |
+
"loss": 1.345,
|
7149 |
+
"step": 50950
|
7150 |
+
},
|
7151 |
+
{
|
7152 |
+
"epoch": 1.5227062371241753,
|
7153 |
+
"grad_norm": 3.6653764247894287,
|
7154 |
+
"learning_rate": 0.00018543020245829976,
|
7155 |
+
"loss": 1.3353,
|
7156 |
+
"step": 51000
|
7157 |
+
},
|
7158 |
+
{
|
7159 |
+
"epoch": 1.5241990863762576,
|
7160 |
+
"grad_norm": 4.758911609649658,
|
7161 |
+
"learning_rate": 0.00018541591666238093,
|
7162 |
+
"loss": 1.3488,
|
7163 |
+
"step": 51050
|
7164 |
+
},
|
7165 |
+
{
|
7166 |
+
"epoch": 1.5256919356283403,
|
7167 |
+
"grad_norm": 4.028106689453125,
|
7168 |
+
"learning_rate": 0.0001854016308664621,
|
7169 |
+
"loss": 1.3215,
|
7170 |
+
"step": 51100
|
7171 |
+
},
|
7172 |
+
{
|
7173 |
+
"epoch": 1.5271847848804228,
|
7174 |
+
"grad_norm": 4.353278636932373,
|
7175 |
+
"learning_rate": 0.00018538734507054329,
|
7176 |
+
"loss": 1.2984,
|
7177 |
+
"step": 51150
|
7178 |
+
},
|
7179 |
+
{
|
7180 |
+
"epoch": 1.5286776341325052,
|
7181 |
+
"grad_norm": 7.112466812133789,
|
7182 |
+
"learning_rate": 0.00018537305927462442,
|
7183 |
+
"loss": 1.3057,
|
7184 |
+
"step": 51200
|
7185 |
+
},
|
7186 |
+
{
|
7187 |
+
"epoch": 1.530170483384588,
|
7188 |
+
"grad_norm": 4.703035354614258,
|
7189 |
+
"learning_rate": 0.00018535877347870561,
|
7190 |
+
"loss": 1.3289,
|
7191 |
+
"step": 51250
|
7192 |
+
},
|
7193 |
+
{
|
7194 |
+
"epoch": 1.5316633326366702,
|
7195 |
+
"grad_norm": 4.110894203186035,
|
7196 |
+
"learning_rate": 0.00018534448768278675,
|
7197 |
+
"loss": 1.3587,
|
7198 |
+
"step": 51300
|
7199 |
+
},
|
7200 |
+
{
|
7201 |
+
"epoch": 1.533156181888753,
|
7202 |
+
"grad_norm": 5.426900863647461,
|
7203 |
+
"learning_rate": 0.00018533020188686794,
|
7204 |
+
"loss": 1.316,
|
7205 |
+
"step": 51350
|
7206 |
+
},
|
7207 |
+
{
|
7208 |
+
"epoch": 1.5346490311408354,
|
7209 |
+
"grad_norm": 4.927971839904785,
|
7210 |
+
"learning_rate": 0.00018531591609094908,
|
7211 |
+
"loss": 1.3775,
|
7212 |
+
"step": 51400
|
7213 |
+
},
|
7214 |
+
{
|
7215 |
+
"epoch": 1.5361418803929179,
|
7216 |
+
"grad_norm": 4.867948055267334,
|
7217 |
+
"learning_rate": 0.00018530163029503027,
|
7218 |
+
"loss": 1.3038,
|
7219 |
+
"step": 51450
|
7220 |
+
},
|
7221 |
+
{
|
7222 |
+
"epoch": 1.5376347296450006,
|
7223 |
+
"grad_norm": 4.8490471839904785,
|
7224 |
+
"learning_rate": 0.00018528734449911144,
|
7225 |
+
"loss": 1.3851,
|
7226 |
+
"step": 51500
|
7227 |
+
},
|
7228 |
+
{
|
7229 |
+
"epoch": 1.5391275788970828,
|
7230 |
+
"grad_norm": 4.634105682373047,
|
7231 |
+
"learning_rate": 0.0001852730587031926,
|
7232 |
+
"loss": 1.3215,
|
7233 |
+
"step": 51550
|
7234 |
+
},
|
7235 |
+
{
|
7236 |
+
"epoch": 1.5406204281491656,
|
7237 |
+
"grad_norm": 3.7799713611602783,
|
7238 |
+
"learning_rate": 0.00018525877290727377,
|
7239 |
+
"loss": 1.3371,
|
7240 |
+
"step": 51600
|
7241 |
+
},
|
7242 |
+
{
|
7243 |
+
"epoch": 1.542113277401248,
|
7244 |
+
"grad_norm": 3.9134316444396973,
|
7245 |
+
"learning_rate": 0.00018524448711135493,
|
7246 |
+
"loss": 1.3073,
|
7247 |
+
"step": 51650
|
7248 |
+
},
|
7249 |
+
{
|
7250 |
+
"epoch": 1.5436061266533305,
|
7251 |
+
"grad_norm": 3.5114309787750244,
|
7252 |
+
"learning_rate": 0.0001852302013154361,
|
7253 |
+
"loss": 1.3859,
|
7254 |
+
"step": 51700
|
7255 |
+
},
|
7256 |
+
{
|
7257 |
+
"epoch": 1.5450989759054132,
|
7258 |
+
"grad_norm": 3.7832348346710205,
|
7259 |
+
"learning_rate": 0.00018521591551951726,
|
7260 |
+
"loss": 1.369,
|
7261 |
+
"step": 51750
|
7262 |
+
},
|
7263 |
+
{
|
7264 |
+
"epoch": 1.5465918251574955,
|
7265 |
+
"grad_norm": 3.666222333908081,
|
7266 |
+
"learning_rate": 0.00018520162972359843,
|
7267 |
+
"loss": 1.3552,
|
7268 |
+
"step": 51800
|
7269 |
+
},
|
7270 |
+
{
|
7271 |
+
"epoch": 1.5480846744095782,
|
7272 |
+
"grad_norm": 4.288083076477051,
|
7273 |
+
"learning_rate": 0.0001851873439276796,
|
7274 |
+
"loss": 1.3139,
|
7275 |
+
"step": 51850
|
7276 |
+
},
|
7277 |
+
{
|
7278 |
+
"epoch": 1.5495775236616607,
|
7279 |
+
"grad_norm": 6.1794939041137695,
|
7280 |
+
"learning_rate": 0.00018517305813176075,
|
7281 |
+
"loss": 1.2984,
|
7282 |
+
"step": 51900
|
7283 |
+
},
|
7284 |
+
{
|
7285 |
+
"epoch": 1.5510703729137432,
|
7286 |
+
"grad_norm": 4.89127779006958,
|
7287 |
+
"learning_rate": 0.00018515877233584195,
|
7288 |
+
"loss": 1.3251,
|
7289 |
+
"step": 51950
|
7290 |
+
},
|
7291 |
+
{
|
7292 |
+
"epoch": 1.5525632221658257,
|
7293 |
+
"grad_norm": 3.1737000942230225,
|
7294 |
+
"learning_rate": 0.00018514448653992308,
|
7295 |
+
"loss": 1.342,
|
7296 |
+
"step": 52000
|
7297 |
+
},
|
7298 |
+
{
|
7299 |
+
"epoch": 1.5540560714179081,
|
7300 |
+
"grad_norm": 5.618998050689697,
|
7301 |
+
"learning_rate": 0.00018513020074400428,
|
7302 |
+
"loss": 1.3234,
|
7303 |
+
"step": 52050
|
7304 |
+
},
|
7305 |
+
{
|
7306 |
+
"epoch": 1.5555489206699908,
|
7307 |
+
"grad_norm": 4.3047685623168945,
|
7308 |
+
"learning_rate": 0.0001851159149480854,
|
7309 |
+
"loss": 1.3305,
|
7310 |
+
"step": 52100
|
7311 |
+
},
|
7312 |
+
{
|
7313 |
+
"epoch": 1.557041769922073,
|
7314 |
+
"grad_norm": 5.245848178863525,
|
7315 |
+
"learning_rate": 0.0001851016291521666,
|
7316 |
+
"loss": 1.3052,
|
7317 |
+
"step": 52150
|
7318 |
+
},
|
7319 |
+
{
|
7320 |
+
"epoch": 1.5585346191741558,
|
7321 |
+
"grad_norm": 5.337231636047363,
|
7322 |
+
"learning_rate": 0.00018508734335624774,
|
7323 |
+
"loss": 1.3353,
|
7324 |
+
"step": 52200
|
7325 |
+
},
|
7326 |
+
{
|
7327 |
+
"epoch": 1.5600274684262383,
|
7328 |
+
"grad_norm": 4.924198627471924,
|
7329 |
+
"learning_rate": 0.00018507305756032893,
|
7330 |
+
"loss": 1.3022,
|
7331 |
+
"step": 52250
|
7332 |
+
},
|
7333 |
+
{
|
7334 |
+
"epoch": 1.5615203176783208,
|
7335 |
+
"grad_norm": 4.464673042297363,
|
7336 |
+
"learning_rate": 0.0001850587717644101,
|
7337 |
+
"loss": 1.3168,
|
7338 |
+
"step": 52300
|
7339 |
+
},
|
7340 |
+
{
|
7341 |
+
"epoch": 1.5630131669304035,
|
7342 |
+
"grad_norm": 3.7909131050109863,
|
7343 |
+
"learning_rate": 0.00018504448596849126,
|
7344 |
+
"loss": 1.3307,
|
7345 |
+
"step": 52350
|
7346 |
+
},
|
7347 |
+
{
|
7348 |
+
"epoch": 1.5645060161824857,
|
7349 |
+
"grad_norm": 4.331950664520264,
|
7350 |
+
"learning_rate": 0.00018503020017257243,
|
7351 |
+
"loss": 1.3574,
|
7352 |
+
"step": 52400
|
7353 |
+
},
|
7354 |
+
{
|
7355 |
+
"epoch": 1.5659988654345685,
|
7356 |
+
"grad_norm": 4.650835990905762,
|
7357 |
+
"learning_rate": 0.0001850159143766536,
|
7358 |
+
"loss": 1.3356,
|
7359 |
+
"step": 52450
|
7360 |
+
},
|
7361 |
+
{
|
7362 |
+
"epoch": 1.567491714686651,
|
7363 |
+
"grad_norm": 5.968230247497559,
|
7364 |
+
"learning_rate": 0.00018500162858073476,
|
7365 |
+
"loss": 1.3515,
|
7366 |
+
"step": 52500
|
7367 |
+
},
|
7368 |
+
{
|
7369 |
+
"epoch": 1.5689845639387334,
|
7370 |
+
"grad_norm": 3.9560632705688477,
|
7371 |
+
"learning_rate": 0.00018498734278481592,
|
7372 |
+
"loss": 1.3262,
|
7373 |
+
"step": 52550
|
7374 |
+
},
|
7375 |
+
{
|
7376 |
+
"epoch": 1.5704774131908161,
|
7377 |
+
"grad_norm": 3.6332058906555176,
|
7378 |
+
"learning_rate": 0.0001849730569888971,
|
7379 |
+
"loss": 1.3708,
|
7380 |
+
"step": 52600
|
7381 |
+
},
|
7382 |
+
{
|
7383 |
+
"epoch": 1.5719702624428984,
|
7384 |
+
"grad_norm": 5.171941757202148,
|
7385 |
+
"learning_rate": 0.00018495877119297825,
|
7386 |
+
"loss": 1.3201,
|
7387 |
+
"step": 52650
|
7388 |
+
},
|
7389 |
+
{
|
7390 |
+
"epoch": 1.573463111694981,
|
7391 |
+
"grad_norm": 4.531052589416504,
|
7392 |
+
"learning_rate": 0.00018494448539705942,
|
7393 |
+
"loss": 1.364,
|
7394 |
+
"step": 52700
|
7395 |
+
},
|
7396 |
+
{
|
7397 |
+
"epoch": 1.5749559609470636,
|
7398 |
+
"grad_norm": 3.860273838043213,
|
7399 |
+
"learning_rate": 0.0001849301996011406,
|
7400 |
+
"loss": 1.3729,
|
7401 |
+
"step": 52750
|
7402 |
+
},
|
7403 |
+
{
|
7404 |
+
"epoch": 1.576448810199146,
|
7405 |
+
"grad_norm": 4.562480926513672,
|
7406 |
+
"learning_rate": 0.00018491591380522175,
|
7407 |
+
"loss": 1.2745,
|
7408 |
+
"step": 52800
|
7409 |
+
},
|
7410 |
+
{
|
7411 |
+
"epoch": 1.5779416594512288,
|
7412 |
+
"grad_norm": 4.759164333343506,
|
7413 |
+
"learning_rate": 0.00018490162800930294,
|
7414 |
+
"loss": 1.3399,
|
7415 |
+
"step": 52850
|
7416 |
+
},
|
7417 |
+
{
|
7418 |
+
"epoch": 1.579434508703311,
|
7419 |
+
"grad_norm": 6.885429859161377,
|
7420 |
+
"learning_rate": 0.00018488734221338408,
|
7421 |
+
"loss": 1.2915,
|
7422 |
+
"step": 52900
|
7423 |
+
},
|
7424 |
+
{
|
7425 |
+
"epoch": 1.5809273579553937,
|
7426 |
+
"grad_norm": 4.555217742919922,
|
7427 |
+
"learning_rate": 0.00018487305641746527,
|
7428 |
+
"loss": 1.3897,
|
7429 |
+
"step": 52950
|
7430 |
+
},
|
7431 |
+
{
|
7432 |
+
"epoch": 1.5824202072074762,
|
7433 |
+
"grad_norm": 5.686909198760986,
|
7434 |
+
"learning_rate": 0.0001848587706215464,
|
7435 |
+
"loss": 1.3234,
|
7436 |
+
"step": 53000
|
7437 |
+
},
|
7438 |
+
{
|
7439 |
+
"epoch": 1.5839130564595587,
|
7440 |
+
"grad_norm": 3.994685411453247,
|
7441 |
+
"learning_rate": 0.00018484448482562757,
|
7442 |
+
"loss": 1.3265,
|
7443 |
+
"step": 53050
|
7444 |
+
},
|
7445 |
+
{
|
7446 |
+
"epoch": 1.5854059057116412,
|
7447 |
+
"grad_norm": 4.623023986816406,
|
7448 |
+
"learning_rate": 0.00018483019902970876,
|
7449 |
+
"loss": 1.3479,
|
7450 |
+
"step": 53100
|
7451 |
+
},
|
7452 |
+
{
|
7453 |
+
"epoch": 1.5868987549637237,
|
7454 |
+
"grad_norm": 6.000095844268799,
|
7455 |
+
"learning_rate": 0.0001848159132337899,
|
7456 |
+
"loss": 1.3333,
|
7457 |
+
"step": 53150
|
7458 |
+
},
|
7459 |
+
{
|
7460 |
+
"epoch": 1.5883916042158064,
|
7461 |
+
"grad_norm": 7.388947010040283,
|
7462 |
+
"learning_rate": 0.0001848016274378711,
|
7463 |
+
"loss": 1.306,
|
7464 |
+
"step": 53200
|
7465 |
+
},
|
7466 |
+
{
|
7467 |
+
"epoch": 1.5898844534678886,
|
7468 |
+
"grad_norm": 4.04311990737915,
|
7469 |
+
"learning_rate": 0.00018478734164195223,
|
7470 |
+
"loss": 1.3263,
|
7471 |
+
"step": 53250
|
7472 |
+
},
|
7473 |
+
{
|
7474 |
+
"epoch": 1.5913773027199714,
|
7475 |
+
"grad_norm": 5.370228290557861,
|
7476 |
+
"learning_rate": 0.00018477305584603342,
|
7477 |
+
"loss": 1.3279,
|
7478 |
+
"step": 53300
|
7479 |
+
},
|
7480 |
+
{
|
7481 |
+
"epoch": 1.5928701519720538,
|
7482 |
+
"grad_norm": 5.453476905822754,
|
7483 |
+
"learning_rate": 0.00018475877005011458,
|
7484 |
+
"loss": 1.3477,
|
7485 |
+
"step": 53350
|
7486 |
+
},
|
7487 |
+
{
|
7488 |
+
"epoch": 1.5943630012241363,
|
7489 |
+
"grad_norm": 3.9221408367156982,
|
7490 |
+
"learning_rate": 0.00018474448425419575,
|
7491 |
+
"loss": 1.3391,
|
7492 |
+
"step": 53400
|
7493 |
+
},
|
7494 |
+
{
|
7495 |
+
"epoch": 1.595855850476219,
|
7496 |
+
"grad_norm": 4.4245781898498535,
|
7497 |
+
"learning_rate": 0.0001847301984582769,
|
7498 |
+
"loss": 1.3263,
|
7499 |
+
"step": 53450
|
7500 |
+
},
|
7501 |
+
{
|
7502 |
+
"epoch": 1.5973486997283013,
|
7503 |
+
"grad_norm": 4.352360725402832,
|
7504 |
+
"learning_rate": 0.00018471591266235808,
|
7505 |
+
"loss": 1.3059,
|
7506 |
+
"step": 53500
|
7507 |
+
},
|
7508 |
+
{
|
7509 |
+
"epoch": 1.598841548980384,
|
7510 |
+
"grad_norm": 5.2914509773254395,
|
7511 |
+
"learning_rate": 0.00018470162686643924,
|
7512 |
+
"loss": 1.3353,
|
7513 |
+
"step": 53550
|
7514 |
+
},
|
7515 |
+
{
|
7516 |
+
"epoch": 1.6003343982324665,
|
7517 |
+
"grad_norm": 5.199918746948242,
|
7518 |
+
"learning_rate": 0.0001846873410705204,
|
7519 |
+
"loss": 1.3667,
|
7520 |
+
"step": 53600
|
7521 |
+
},
|
7522 |
+
{
|
7523 |
+
"epoch": 1.601827247484549,
|
7524 |
+
"grad_norm": 4.404350280761719,
|
7525 |
+
"learning_rate": 0.00018467305527460157,
|
7526 |
+
"loss": 1.3245,
|
7527 |
+
"step": 53650
|
7528 |
+
},
|
7529 |
+
{
|
7530 |
+
"epoch": 1.6033200967366317,
|
7531 |
+
"grad_norm": 4.452194690704346,
|
7532 |
+
"learning_rate": 0.00018465876947868274,
|
7533 |
+
"loss": 1.3194,
|
7534 |
+
"step": 53700
|
7535 |
+
},
|
7536 |
+
{
|
7537 |
+
"epoch": 1.604812945988714,
|
7538 |
+
"grad_norm": 4.387017726898193,
|
7539 |
+
"learning_rate": 0.0001846444836827639,
|
7540 |
+
"loss": 1.2448,
|
7541 |
+
"step": 53750
|
7542 |
+
},
|
7543 |
+
{
|
7544 |
+
"epoch": 1.6063057952407966,
|
7545 |
+
"grad_norm": 5.375933647155762,
|
7546 |
+
"learning_rate": 0.0001846301978868451,
|
7547 |
+
"loss": 1.3515,
|
7548 |
+
"step": 53800
|
7549 |
+
},
|
7550 |
+
{
|
7551 |
+
"epoch": 1.6077986444928791,
|
7552 |
+
"grad_norm": 4.564199447631836,
|
7553 |
+
"learning_rate": 0.00018461591209092623,
|
7554 |
+
"loss": 1.3182,
|
7555 |
+
"step": 53850
|
7556 |
+
},
|
7557 |
+
{
|
7558 |
+
"epoch": 1.6092914937449616,
|
7559 |
+
"grad_norm": 5.435900688171387,
|
7560 |
+
"learning_rate": 0.00018460162629500742,
|
7561 |
+
"loss": 1.3071,
|
7562 |
+
"step": 53900
|
7563 |
+
},
|
7564 |
+
{
|
7565 |
+
"epoch": 1.6107843429970443,
|
7566 |
+
"grad_norm": 3.600592613220215,
|
7567 |
+
"learning_rate": 0.00018458734049908856,
|
7568 |
+
"loss": 1.4009,
|
7569 |
+
"step": 53950
|
7570 |
+
},
|
7571 |
+
{
|
7572 |
+
"epoch": 1.6122771922491266,
|
7573 |
+
"grad_norm": 5.12654447555542,
|
7574 |
+
"learning_rate": 0.00018457305470316975,
|
7575 |
+
"loss": 1.3285,
|
7576 |
+
"step": 54000
|
7577 |
+
},
|
7578 |
+
{
|
7579 |
+
"epoch": 1.6137700415012093,
|
7580 |
+
"grad_norm": 3.6472108364105225,
|
7581 |
+
"learning_rate": 0.0001845587689072509,
|
7582 |
+
"loss": 1.32,
|
7583 |
+
"step": 54050
|
7584 |
+
},
|
7585 |
+
{
|
7586 |
+
"epoch": 1.6152628907532918,
|
7587 |
+
"grad_norm": 5.0967559814453125,
|
7588 |
+
"learning_rate": 0.00018454448311133208,
|
7589 |
+
"loss": 1.3018,
|
7590 |
+
"step": 54100
|
7591 |
+
},
|
7592 |
+
{
|
7593 |
+
"epoch": 1.6167557400053743,
|
7594 |
+
"grad_norm": 5.078010082244873,
|
7595 |
+
"learning_rate": 0.00018453019731541325,
|
7596 |
+
"loss": 1.3539,
|
7597 |
+
"step": 54150
|
7598 |
+
},
|
7599 |
+
{
|
7600 |
+
"epoch": 1.6182485892574567,
|
7601 |
+
"grad_norm": 4.601762294769287,
|
7602 |
+
"learning_rate": 0.0001845159115194944,
|
7603 |
+
"loss": 1.3153,
|
7604 |
+
"step": 54200
|
7605 |
+
},
|
7606 |
+
{
|
7607 |
+
"epoch": 1.6197414385095392,
|
7608 |
+
"grad_norm": 3.8307271003723145,
|
7609 |
+
"learning_rate": 0.00018450162572357557,
|
7610 |
+
"loss": 1.34,
|
7611 |
+
"step": 54250
|
7612 |
+
},
|
7613 |
+
{
|
7614 |
+
"epoch": 1.621234287761622,
|
7615 |
+
"grad_norm": 6.145284175872803,
|
7616 |
+
"learning_rate": 0.00018448733992765674,
|
7617 |
+
"loss": 1.3315,
|
7618 |
+
"step": 54300
|
7619 |
+
},
|
7620 |
+
{
|
7621 |
+
"epoch": 1.6227271370137042,
|
7622 |
+
"grad_norm": 9.96362590789795,
|
7623 |
+
"learning_rate": 0.0001844730541317379,
|
7624 |
+
"loss": 1.3269,
|
7625 |
+
"step": 54350
|
7626 |
+
},
|
7627 |
+
{
|
7628 |
+
"epoch": 1.624219986265787,
|
7629 |
+
"grad_norm": 5.126537322998047,
|
7630 |
+
"learning_rate": 0.00018445876833581907,
|
7631 |
+
"loss": 1.3604,
|
7632 |
+
"step": 54400
|
7633 |
+
},
|
7634 |
+
{
|
7635 |
+
"epoch": 1.6257128355178694,
|
7636 |
+
"grad_norm": 3.604433059692383,
|
7637 |
+
"learning_rate": 0.00018444448253990023,
|
7638 |
+
"loss": 1.3651,
|
7639 |
+
"step": 54450
|
7640 |
+
},
|
7641 |
+
{
|
7642 |
+
"epoch": 1.6272056847699519,
|
7643 |
+
"grad_norm": 4.835113048553467,
|
7644 |
+
"learning_rate": 0.0001844301967439814,
|
7645 |
+
"loss": 1.2787,
|
7646 |
+
"step": 54500
|
7647 |
+
},
|
7648 |
+
{
|
7649 |
+
"epoch": 1.6286985340220346,
|
7650 |
+
"grad_norm": 4.534168243408203,
|
7651 |
+
"learning_rate": 0.00018441591094806256,
|
7652 |
+
"loss": 1.3354,
|
7653 |
+
"step": 54550
|
7654 |
+
},
|
7655 |
+
{
|
7656 |
+
"epoch": 1.6301913832741168,
|
7657 |
+
"grad_norm": 7.929208755493164,
|
7658 |
+
"learning_rate": 0.00018440162515214375,
|
7659 |
+
"loss": 1.3361,
|
7660 |
+
"step": 54600
|
7661 |
+
},
|
7662 |
+
{
|
7663 |
+
"epoch": 1.6316842325261995,
|
7664 |
+
"grad_norm": 4.582381248474121,
|
7665 |
+
"learning_rate": 0.0001843873393562249,
|
7666 |
+
"loss": 1.354,
|
7667 |
+
"step": 54650
|
7668 |
+
},
|
7669 |
+
{
|
7670 |
+
"epoch": 1.633177081778282,
|
7671 |
+
"grad_norm": 4.781309604644775,
|
7672 |
+
"learning_rate": 0.00018437305356030608,
|
7673 |
+
"loss": 1.3035,
|
7674 |
+
"step": 54700
|
7675 |
+
},
|
7676 |
+
{
|
7677 |
+
"epoch": 1.6346699310303645,
|
7678 |
+
"grad_norm": 6.0308308601379395,
|
7679 |
+
"learning_rate": 0.00018435876776438722,
|
7680 |
+
"loss": 1.3392,
|
7681 |
+
"step": 54750
|
7682 |
+
},
|
7683 |
+
{
|
7684 |
+
"epoch": 1.6361627802824472,
|
7685 |
+
"grad_norm": 5.485566139221191,
|
7686 |
+
"learning_rate": 0.0001843444819684684,
|
7687 |
+
"loss": 1.3408,
|
7688 |
+
"step": 54800
|
7689 |
+
},
|
7690 |
+
{
|
7691 |
+
"epoch": 1.6376556295345295,
|
7692 |
+
"grad_norm": 6.658644676208496,
|
7693 |
+
"learning_rate": 0.00018433019617254955,
|
7694 |
+
"loss": 1.3772,
|
7695 |
+
"step": 54850
|
7696 |
+
},
|
7697 |
+
{
|
7698 |
+
"epoch": 1.6391484787866122,
|
7699 |
+
"grad_norm": 4.54780912399292,
|
7700 |
+
"learning_rate": 0.00018431591037663074,
|
7701 |
+
"loss": 1.3511,
|
7702 |
+
"step": 54900
|
7703 |
+
},
|
7704 |
+
{
|
7705 |
+
"epoch": 1.6406413280386947,
|
7706 |
+
"grad_norm": 5.600758075714111,
|
7707 |
+
"learning_rate": 0.0001843016245807119,
|
7708 |
+
"loss": 1.3301,
|
7709 |
+
"step": 54950
|
7710 |
+
},
|
7711 |
+
{
|
7712 |
+
"epoch": 1.6421341772907772,
|
7713 |
+
"grad_norm": 4.194091796875,
|
7714 |
+
"learning_rate": 0.00018428733878479307,
|
7715 |
+
"loss": 1.3159,
|
7716 |
+
"step": 55000
|
7717 |
+
},
|
7718 |
+
{
|
7719 |
+
"epoch": 1.6436270265428599,
|
7720 |
+
"grad_norm": 6.306137561798096,
|
7721 |
+
"learning_rate": 0.00018427305298887424,
|
7722 |
+
"loss": 1.3401,
|
7723 |
+
"step": 55050
|
7724 |
+
},
|
7725 |
+
{
|
7726 |
+
"epoch": 1.6451198757949421,
|
7727 |
+
"grad_norm": 4.377103328704834,
|
7728 |
+
"learning_rate": 0.0001842587671929554,
|
7729 |
+
"loss": 1.34,
|
7730 |
+
"step": 55100
|
7731 |
+
},
|
7732 |
+
{
|
7733 |
+
"epoch": 1.6466127250470248,
|
7734 |
+
"grad_norm": 3.450533390045166,
|
7735 |
+
"learning_rate": 0.00018424448139703657,
|
7736 |
+
"loss": 1.3367,
|
7737 |
+
"step": 55150
|
7738 |
+
},
|
7739 |
+
{
|
7740 |
+
"epoch": 1.6481055742991073,
|
7741 |
+
"grad_norm": 3.5845348834991455,
|
7742 |
+
"learning_rate": 0.00018423019560111773,
|
7743 |
+
"loss": 1.3755,
|
7744 |
+
"step": 55200
|
7745 |
+
},
|
7746 |
+
{
|
7747 |
+
"epoch": 1.6495984235511898,
|
7748 |
+
"grad_norm": 4.741848468780518,
|
7749 |
+
"learning_rate": 0.0001842159098051989,
|
7750 |
+
"loss": 1.3116,
|
7751 |
+
"step": 55250
|
7752 |
+
},
|
7753 |
+
{
|
7754 |
+
"epoch": 1.6510912728032723,
|
7755 |
+
"grad_norm": 4.1155619621276855,
|
7756 |
+
"learning_rate": 0.00018420162400928006,
|
7757 |
+
"loss": 1.3682,
|
7758 |
+
"step": 55300
|
7759 |
+
},
|
7760 |
+
{
|
7761 |
+
"epoch": 1.6525841220553548,
|
7762 |
+
"grad_norm": 4.3722028732299805,
|
7763 |
+
"learning_rate": 0.00018418733821336122,
|
7764 |
+
"loss": 1.2916,
|
7765 |
+
"step": 55350
|
7766 |
+
},
|
7767 |
+
{
|
7768 |
+
"epoch": 1.6540769713074375,
|
7769 |
+
"grad_norm": 4.6453118324279785,
|
7770 |
+
"learning_rate": 0.00018417305241744242,
|
7771 |
+
"loss": 1.3743,
|
7772 |
+
"step": 55400
|
7773 |
+
},
|
7774 |
+
{
|
7775 |
+
"epoch": 1.6555698205595197,
|
7776 |
+
"grad_norm": 4.749643802642822,
|
7777 |
+
"learning_rate": 0.00018415876662152355,
|
7778 |
+
"loss": 1.3649,
|
7779 |
+
"step": 55450
|
7780 |
+
},
|
7781 |
+
{
|
7782 |
+
"epoch": 1.6570626698116024,
|
7783 |
+
"grad_norm": 3.9023261070251465,
|
7784 |
+
"learning_rate": 0.00018414448082560475,
|
7785 |
+
"loss": 1.4002,
|
7786 |
+
"step": 55500
|
7787 |
+
},
|
7788 |
+
{
|
7789 |
+
"epoch": 1.658555519063685,
|
7790 |
+
"grad_norm": 4.34132719039917,
|
7791 |
+
"learning_rate": 0.00018413019502968588,
|
7792 |
+
"loss": 1.3344,
|
7793 |
+
"step": 55550
|
7794 |
+
},
|
7795 |
+
{
|
7796 |
+
"epoch": 1.6600483683157674,
|
7797 |
+
"grad_norm": 5.479841709136963,
|
7798 |
+
"learning_rate": 0.00018411590923376707,
|
7799 |
+
"loss": 1.3511,
|
7800 |
+
"step": 55600
|
7801 |
+
},
|
7802 |
+
{
|
7803 |
+
"epoch": 1.6615412175678501,
|
7804 |
+
"grad_norm": 3.340822696685791,
|
7805 |
+
"learning_rate": 0.0001841016234378482,
|
7806 |
+
"loss": 1.3789,
|
7807 |
+
"step": 55650
|
7808 |
+
},
|
7809 |
+
{
|
7810 |
+
"epoch": 1.6630340668199324,
|
7811 |
+
"grad_norm": 5.259148597717285,
|
7812 |
+
"learning_rate": 0.00018408733764192938,
|
7813 |
+
"loss": 1.3092,
|
7814 |
+
"step": 55700
|
7815 |
+
},
|
7816 |
+
{
|
7817 |
+
"epoch": 1.664526916072015,
|
7818 |
+
"grad_norm": 3.590052843093872,
|
7819 |
+
"learning_rate": 0.00018407305184601057,
|
7820 |
+
"loss": 1.3596,
|
7821 |
+
"step": 55750
|
7822 |
+
},
|
7823 |
+
{
|
7824 |
+
"epoch": 1.6660197653240976,
|
7825 |
+
"grad_norm": 5.688297748565674,
|
7826 |
+
"learning_rate": 0.0001840587660500917,
|
7827 |
+
"loss": 1.3629,
|
7828 |
+
"step": 55800
|
7829 |
+
},
|
7830 |
+
{
|
7831 |
+
"epoch": 1.66751261457618,
|
7832 |
+
"grad_norm": 5.826660633087158,
|
7833 |
+
"learning_rate": 0.0001840444802541729,
|
7834 |
+
"loss": 1.3757,
|
7835 |
+
"step": 55850
|
7836 |
+
},
|
7837 |
+
{
|
7838 |
+
"epoch": 1.6690054638282628,
|
7839 |
+
"grad_norm": 3.0926566123962402,
|
7840 |
+
"learning_rate": 0.00018403019445825404,
|
7841 |
+
"loss": 1.3507,
|
7842 |
+
"step": 55900
|
7843 |
+
},
|
7844 |
+
{
|
7845 |
+
"epoch": 1.670498313080345,
|
7846 |
+
"grad_norm": 7.726663589477539,
|
7847 |
+
"learning_rate": 0.00018401590866233523,
|
7848 |
+
"loss": 1.3162,
|
7849 |
+
"step": 55950
|
7850 |
+
},
|
7851 |
+
{
|
7852 |
+
"epoch": 1.6719911623324277,
|
7853 |
+
"grad_norm": 4.687432765960693,
|
7854 |
+
"learning_rate": 0.0001840016228664164,
|
7855 |
+
"loss": 1.289,
|
7856 |
+
"step": 56000
|
7857 |
+
},
|
7858 |
+
{
|
7859 |
+
"epoch": 1.6734840115845102,
|
7860 |
+
"grad_norm": 4.279799461364746,
|
7861 |
+
"learning_rate": 0.00018398733707049756,
|
7862 |
+
"loss": 1.3438,
|
7863 |
+
"step": 56050
|
7864 |
+
},
|
7865 |
+
{
|
7866 |
+
"epoch": 1.6749768608365927,
|
7867 |
+
"grad_norm": 6.4994797706604,
|
7868 |
+
"learning_rate": 0.00018397305127457872,
|
7869 |
+
"loss": 1.3373,
|
7870 |
+
"step": 56100
|
7871 |
+
},
|
7872 |
+
{
|
7873 |
+
"epoch": 1.6764697100886754,
|
7874 |
+
"grad_norm": 4.511581897735596,
|
7875 |
+
"learning_rate": 0.00018395876547865989,
|
7876 |
+
"loss": 1.3549,
|
7877 |
+
"step": 56150
|
7878 |
+
},
|
7879 |
+
{
|
7880 |
+
"epoch": 1.6779625593407577,
|
7881 |
+
"grad_norm": 5.1127190589904785,
|
7882 |
+
"learning_rate": 0.00018394447968274105,
|
7883 |
+
"loss": 1.2734,
|
7884 |
+
"step": 56200
|
7885 |
+
},
|
7886 |
+
{
|
7887 |
+
"epoch": 1.6794554085928404,
|
7888 |
+
"grad_norm": 4.7369561195373535,
|
7889 |
+
"learning_rate": 0.00018393019388682222,
|
7890 |
+
"loss": 1.3193,
|
7891 |
+
"step": 56250
|
7892 |
+
},
|
7893 |
+
{
|
7894 |
+
"epoch": 1.6809482578449229,
|
7895 |
+
"grad_norm": 5.101438522338867,
|
7896 |
+
"learning_rate": 0.00018391590809090338,
|
7897 |
+
"loss": 1.3311,
|
7898 |
+
"step": 56300
|
7899 |
+
},
|
7900 |
+
{
|
7901 |
+
"epoch": 1.6824411070970053,
|
7902 |
+
"grad_norm": 5.68798303604126,
|
7903 |
+
"learning_rate": 0.00018390162229498454,
|
7904 |
+
"loss": 1.3723,
|
7905 |
+
"step": 56350
|
7906 |
+
},
|
7907 |
+
{
|
7908 |
+
"epoch": 1.6839339563490878,
|
7909 |
+
"grad_norm": 5.620144367218018,
|
7910 |
+
"learning_rate": 0.0001838873364990657,
|
7911 |
+
"loss": 1.3051,
|
7912 |
+
"step": 56400
|
7913 |
+
},
|
7914 |
+
{
|
7915 |
+
"epoch": 1.6854268056011703,
|
7916 |
+
"grad_norm": 4.335910797119141,
|
7917 |
+
"learning_rate": 0.00018387305070314687,
|
7918 |
+
"loss": 1.2625,
|
7919 |
+
"step": 56450
|
7920 |
+
},
|
7921 |
+
{
|
7922 |
+
"epoch": 1.686919654853253,
|
7923 |
+
"grad_norm": 5.112996578216553,
|
7924 |
+
"learning_rate": 0.00018385876490722804,
|
7925 |
+
"loss": 1.3288,
|
7926 |
+
"step": 56500
|
7927 |
+
},
|
7928 |
+
{
|
7929 |
+
"epoch": 1.6884125041053353,
|
7930 |
+
"grad_norm": 5.485559463500977,
|
7931 |
+
"learning_rate": 0.00018384447911130923,
|
7932 |
+
"loss": 1.3171,
|
7933 |
+
"step": 56550
|
7934 |
+
},
|
7935 |
+
{
|
7936 |
+
"epoch": 1.689905353357418,
|
7937 |
+
"grad_norm": 5.58554744720459,
|
7938 |
+
"learning_rate": 0.00018383019331539037,
|
7939 |
+
"loss": 1.3002,
|
7940 |
+
"step": 56600
|
7941 |
+
},
|
7942 |
+
{
|
7943 |
+
"epoch": 1.6913982026095005,
|
7944 |
+
"grad_norm": 4.202605247497559,
|
7945 |
+
"learning_rate": 0.00018381590751947156,
|
7946 |
+
"loss": 1.3018,
|
7947 |
+
"step": 56650
|
7948 |
+
},
|
7949 |
+
{
|
7950 |
+
"epoch": 1.692891051861583,
|
7951 |
+
"grad_norm": 5.206381797790527,
|
7952 |
+
"learning_rate": 0.0001838016217235527,
|
7953 |
+
"loss": 1.3696,
|
7954 |
+
"step": 56700
|
7955 |
+
},
|
7956 |
+
{
|
7957 |
+
"epoch": 1.6943839011136657,
|
7958 |
+
"grad_norm": 5.87765645980835,
|
7959 |
+
"learning_rate": 0.0001837873359276339,
|
7960 |
+
"loss": 1.3616,
|
7961 |
+
"step": 56750
|
7962 |
+
},
|
7963 |
+
{
|
7964 |
+
"epoch": 1.695876750365748,
|
7965 |
+
"grad_norm": 3.4311394691467285,
|
7966 |
+
"learning_rate": 0.00018377305013171505,
|
7967 |
+
"loss": 1.3249,
|
7968 |
+
"step": 56800
|
7969 |
+
},
|
7970 |
+
{
|
7971 |
+
"epoch": 1.6973695996178306,
|
7972 |
+
"grad_norm": 2.7731411457061768,
|
7973 |
+
"learning_rate": 0.00018375876433579622,
|
7974 |
+
"loss": 1.3715,
|
7975 |
+
"step": 56850
|
7976 |
+
},
|
7977 |
+
{
|
7978 |
+
"epoch": 1.698862448869913,
|
7979 |
+
"grad_norm": 6.399103164672852,
|
7980 |
+
"learning_rate": 0.00018374447853987738,
|
7981 |
+
"loss": 1.3654,
|
7982 |
+
"step": 56900
|
7983 |
+
},
|
7984 |
+
{
|
7985 |
+
"epoch": 1.7003552981219956,
|
7986 |
+
"grad_norm": 3.8202717304229736,
|
7987 |
+
"learning_rate": 0.00018373019274395855,
|
7988 |
+
"loss": 1.3565,
|
7989 |
+
"step": 56950
|
7990 |
+
},
|
7991 |
+
{
|
7992 |
+
"epoch": 1.7018481473740783,
|
7993 |
+
"grad_norm": 4.040468215942383,
|
7994 |
+
"learning_rate": 0.0001837159069480397,
|
7995 |
+
"loss": 1.3373,
|
7996 |
+
"step": 57000
|
7997 |
+
},
|
7998 |
+
{
|
7999 |
+
"epoch": 1.7033409966261606,
|
8000 |
+
"grad_norm": 4.522890090942383,
|
8001 |
+
"learning_rate": 0.00018370162115212088,
|
8002 |
+
"loss": 1.3393,
|
8003 |
+
"step": 57050
|
8004 |
+
},
|
8005 |
+
{
|
8006 |
+
"epoch": 1.7048338458782433,
|
8007 |
+
"grad_norm": 4.771418571472168,
|
8008 |
+
"learning_rate": 0.00018368733535620204,
|
8009 |
+
"loss": 1.2437,
|
8010 |
+
"step": 57100
|
8011 |
+
},
|
8012 |
+
{
|
8013 |
+
"epoch": 1.7063266951303258,
|
8014 |
+
"grad_norm": 4.333567142486572,
|
8015 |
+
"learning_rate": 0.0001836730495602832,
|
8016 |
+
"loss": 1.2876,
|
8017 |
+
"step": 57150
|
8018 |
+
},
|
8019 |
+
{
|
8020 |
+
"epoch": 1.7078195443824082,
|
8021 |
+
"grad_norm": 5.1498703956604,
|
8022 |
+
"learning_rate": 0.00018365876376436437,
|
8023 |
+
"loss": 1.4321,
|
8024 |
+
"step": 57200
|
8025 |
+
},
|
8026 |
+
{
|
8027 |
+
"epoch": 1.709312393634491,
|
8028 |
+
"grad_norm": 3.8978734016418457,
|
8029 |
+
"learning_rate": 0.00018364447796844556,
|
8030 |
+
"loss": 1.2815,
|
8031 |
+
"step": 57250
|
8032 |
+
},
|
8033 |
+
{
|
8034 |
+
"epoch": 1.7108052428865732,
|
8035 |
+
"grad_norm": 5.365299224853516,
|
8036 |
+
"learning_rate": 0.0001836301921725267,
|
8037 |
+
"loss": 1.2443,
|
8038 |
+
"step": 57300
|
8039 |
+
},
|
8040 |
+
{
|
8041 |
+
"epoch": 1.712298092138656,
|
8042 |
+
"grad_norm": 4.474836826324463,
|
8043 |
+
"learning_rate": 0.0001836159063766079,
|
8044 |
+
"loss": 1.3634,
|
8045 |
+
"step": 57350
|
8046 |
+
},
|
8047 |
+
{
|
8048 |
+
"epoch": 1.7137909413907384,
|
8049 |
+
"grad_norm": 3.8736836910247803,
|
8050 |
+
"learning_rate": 0.00018360162058068903,
|
8051 |
+
"loss": 1.3889,
|
8052 |
+
"step": 57400
|
8053 |
+
},
|
8054 |
+
{
|
8055 |
+
"epoch": 1.7152837906428209,
|
8056 |
+
"grad_norm": 4.919806957244873,
|
8057 |
+
"learning_rate": 0.00018358733478477022,
|
8058 |
+
"loss": 1.3216,
|
8059 |
+
"step": 57450
|
8060 |
+
},
|
8061 |
+
{
|
8062 |
+
"epoch": 1.7167766398949034,
|
8063 |
+
"grad_norm": 6.2346954345703125,
|
8064 |
+
"learning_rate": 0.00018357304898885136,
|
8065 |
+
"loss": 1.3294,
|
8066 |
+
"step": 57500
|
8067 |
+
},
|
8068 |
+
{
|
8069 |
+
"epoch": 1.7182694891469859,
|
8070 |
+
"grad_norm": 4.782268524169922,
|
8071 |
+
"learning_rate": 0.00018355876319293255,
|
8072 |
+
"loss": 1.363,
|
8073 |
+
"step": 57550
|
8074 |
+
},
|
8075 |
+
{
|
8076 |
+
"epoch": 1.7197623383990686,
|
8077 |
+
"grad_norm": 5.770125389099121,
|
8078 |
+
"learning_rate": 0.00018354447739701372,
|
8079 |
+
"loss": 1.3441,
|
8080 |
+
"step": 57600
|
8081 |
+
},
|
8082 |
+
{
|
8083 |
+
"epoch": 1.721255187651151,
|
8084 |
+
"grad_norm": 5.742440223693848,
|
8085 |
+
"learning_rate": 0.00018353019160109488,
|
8086 |
+
"loss": 1.302,
|
8087 |
+
"step": 57650
|
8088 |
+
},
|
8089 |
+
{
|
8090 |
+
"epoch": 1.7227480369032335,
|
8091 |
+
"grad_norm": 5.430489540100098,
|
8092 |
+
"learning_rate": 0.00018351590580517604,
|
8093 |
+
"loss": 1.3183,
|
8094 |
+
"step": 57700
|
8095 |
+
},
|
8096 |
+
{
|
8097 |
+
"epoch": 1.724240886155316,
|
8098 |
+
"grad_norm": 6.17573356628418,
|
8099 |
+
"learning_rate": 0.0001835016200092572,
|
8100 |
+
"loss": 1.3421,
|
8101 |
+
"step": 57750
|
8102 |
+
},
|
8103 |
+
{
|
8104 |
+
"epoch": 1.7257337354073985,
|
8105 |
+
"grad_norm": 5.362539291381836,
|
8106 |
+
"learning_rate": 0.00018348733421333837,
|
8107 |
+
"loss": 1.3333,
|
8108 |
+
"step": 57800
|
8109 |
+
},
|
8110 |
+
{
|
8111 |
+
"epoch": 1.7272265846594812,
|
8112 |
+
"grad_norm": 4.4574294090271,
|
8113 |
+
"learning_rate": 0.00018347304841741954,
|
8114 |
+
"loss": 1.2792,
|
8115 |
+
"step": 57850
|
8116 |
+
},
|
8117 |
+
{
|
8118 |
+
"epoch": 1.7287194339115635,
|
8119 |
+
"grad_norm": 4.277164936065674,
|
8120 |
+
"learning_rate": 0.0001834587626215007,
|
8121 |
+
"loss": 1.342,
|
8122 |
+
"step": 57900
|
8123 |
+
},
|
8124 |
+
{
|
8125 |
+
"epoch": 1.7302122831636462,
|
8126 |
+
"grad_norm": 4.775753498077393,
|
8127 |
+
"learning_rate": 0.00018344447682558187,
|
8128 |
+
"loss": 1.3618,
|
8129 |
+
"step": 57950
|
8130 |
+
},
|
8131 |
+
{
|
8132 |
+
"epoch": 1.7317051324157287,
|
8133 |
+
"grad_norm": 4.7877702713012695,
|
8134 |
+
"learning_rate": 0.00018343019102966303,
|
8135 |
+
"loss": 1.3631,
|
8136 |
+
"step": 58000
|
8137 |
+
},
|
8138 |
+
{
|
8139 |
+
"epoch": 1.7331979816678111,
|
8140 |
+
"grad_norm": 5.616472244262695,
|
8141 |
+
"learning_rate": 0.00018341590523374422,
|
8142 |
+
"loss": 1.3558,
|
8143 |
+
"step": 58050
|
8144 |
+
},
|
8145 |
+
{
|
8146 |
+
"epoch": 1.7346908309198938,
|
8147 |
+
"grad_norm": 5.436590671539307,
|
8148 |
+
"learning_rate": 0.00018340161943782536,
|
8149 |
+
"loss": 1.3999,
|
8150 |
+
"step": 58100
|
8151 |
+
},
|
8152 |
+
{
|
8153 |
+
"epoch": 1.736183680171976,
|
8154 |
+
"grad_norm": 4.245835304260254,
|
8155 |
+
"learning_rate": 0.00018338733364190655,
|
8156 |
+
"loss": 1.3592,
|
8157 |
+
"step": 58150
|
8158 |
+
},
|
8159 |
+
{
|
8160 |
+
"epoch": 1.7376765294240588,
|
8161 |
+
"grad_norm": 4.440322399139404,
|
8162 |
+
"learning_rate": 0.0001833730478459877,
|
8163 |
+
"loss": 1.312,
|
8164 |
+
"step": 58200
|
8165 |
+
},
|
8166 |
+
{
|
8167 |
+
"epoch": 1.7391693786761413,
|
8168 |
+
"grad_norm": 4.439347743988037,
|
8169 |
+
"learning_rate": 0.00018335876205006888,
|
8170 |
+
"loss": 1.3769,
|
8171 |
+
"step": 58250
|
8172 |
+
},
|
8173 |
+
{
|
8174 |
+
"epoch": 1.7406622279282238,
|
8175 |
+
"grad_norm": 4.374370098114014,
|
8176 |
+
"learning_rate": 0.00018334447625415002,
|
8177 |
+
"loss": 1.3328,
|
8178 |
+
"step": 58300
|
8179 |
+
},
|
8180 |
+
{
|
8181 |
+
"epoch": 1.7421550771803065,
|
8182 |
+
"grad_norm": 3.9568932056427,
|
8183 |
+
"learning_rate": 0.00018333019045823119,
|
8184 |
+
"loss": 1.2992,
|
8185 |
+
"step": 58350
|
8186 |
+
},
|
8187 |
+
{
|
8188 |
+
"epoch": 1.7436479264323888,
|
8189 |
+
"grad_norm": 4.133091926574707,
|
8190 |
+
"learning_rate": 0.00018331590466231238,
|
8191 |
+
"loss": 1.3685,
|
8192 |
+
"step": 58400
|
8193 |
+
},
|
8194 |
+
{
|
8195 |
+
"epoch": 1.7451407756844715,
|
8196 |
+
"grad_norm": 4.492006778717041,
|
8197 |
+
"learning_rate": 0.00018330161886639351,
|
8198 |
+
"loss": 1.3543,
|
8199 |
+
"step": 58450
|
8200 |
+
},
|
8201 |
+
{
|
8202 |
+
"epoch": 1.746633624936554,
|
8203 |
+
"grad_norm": 2.6073009967803955,
|
8204 |
+
"learning_rate": 0.0001832873330704747,
|
8205 |
+
"loss": 1.36,
|
8206 |
+
"step": 58500
|
8207 |
+
},
|
8208 |
+
{
|
8209 |
+
"epoch": 1.7481264741886364,
|
8210 |
+
"grad_norm": 4.137593746185303,
|
8211 |
+
"learning_rate": 0.00018327304727455584,
|
8212 |
+
"loss": 1.3599,
|
8213 |
+
"step": 58550
|
8214 |
+
},
|
8215 |
+
{
|
8216 |
+
"epoch": 1.749619323440719,
|
8217 |
+
"grad_norm": 4.932790756225586,
|
8218 |
+
"learning_rate": 0.00018325876147863704,
|
8219 |
+
"loss": 1.3265,
|
8220 |
+
"step": 58600
|
8221 |
+
},
|
8222 |
+
{
|
8223 |
+
"epoch": 1.7511121726928014,
|
8224 |
+
"grad_norm": 4.623594760894775,
|
8225 |
+
"learning_rate": 0.00018324447568271817,
|
8226 |
+
"loss": 1.3186,
|
8227 |
+
"step": 58650
|
8228 |
+
},
|
8229 |
+
{
|
8230 |
+
"epoch": 1.752605021944884,
|
8231 |
+
"grad_norm": 4.066229343414307,
|
8232 |
+
"learning_rate": 0.00018323018988679936,
|
8233 |
+
"loss": 1.3196,
|
8234 |
+
"step": 58700
|
8235 |
+
},
|
8236 |
+
{
|
8237 |
+
"epoch": 1.7540978711969666,
|
8238 |
+
"grad_norm": 4.105048656463623,
|
8239 |
+
"learning_rate": 0.00018321590409088053,
|
8240 |
+
"loss": 1.3663,
|
8241 |
+
"step": 58750
|
8242 |
+
},
|
8243 |
+
{
|
8244 |
+
"epoch": 1.755590720449049,
|
8245 |
+
"grad_norm": 4.454251766204834,
|
8246 |
+
"learning_rate": 0.0001832016182949617,
|
8247 |
+
"loss": 1.2823,
|
8248 |
+
"step": 58800
|
8249 |
+
},
|
8250 |
+
{
|
8251 |
+
"epoch": 1.7570835697011316,
|
8252 |
+
"grad_norm": 4.3030242919921875,
|
8253 |
+
"learning_rate": 0.00018318733249904286,
|
8254 |
+
"loss": 1.3267,
|
8255 |
+
"step": 58850
|
8256 |
+
},
|
8257 |
+
{
|
8258 |
+
"epoch": 1.758576418953214,
|
8259 |
+
"grad_norm": 5.350499629974365,
|
8260 |
+
"learning_rate": 0.00018317304670312402,
|
8261 |
+
"loss": 1.2992,
|
8262 |
+
"step": 58900
|
8263 |
+
},
|
8264 |
+
{
|
8265 |
+
"epoch": 1.7600692682052967,
|
8266 |
+
"grad_norm": 4.497105598449707,
|
8267 |
+
"learning_rate": 0.0001831587609072052,
|
8268 |
+
"loss": 1.2759,
|
8269 |
+
"step": 58950
|
8270 |
+
},
|
8271 |
+
{
|
8272 |
+
"epoch": 1.761562117457379,
|
8273 |
+
"grad_norm": 4.683063983917236,
|
8274 |
+
"learning_rate": 0.00018314447511128635,
|
8275 |
+
"loss": 1.3342,
|
8276 |
+
"step": 59000
|
8277 |
+
},
|
8278 |
+
{
|
8279 |
+
"epoch": 1.7630549667094617,
|
8280 |
+
"grad_norm": 3.4937946796417236,
|
8281 |
+
"learning_rate": 0.00018313018931536752,
|
8282 |
+
"loss": 1.2969,
|
8283 |
+
"step": 59050
|
8284 |
+
},
|
8285 |
+
{
|
8286 |
+
"epoch": 1.7645478159615442,
|
8287 |
+
"grad_norm": 5.422043800354004,
|
8288 |
+
"learning_rate": 0.00018311590351944868,
|
8289 |
+
"loss": 1.3116,
|
8290 |
+
"step": 59100
|
8291 |
+
},
|
8292 |
+
{
|
8293 |
+
"epoch": 1.7660406652136267,
|
8294 |
+
"grad_norm": 3.8882923126220703,
|
8295 |
+
"learning_rate": 0.00018310161772352985,
|
8296 |
+
"loss": 1.3073,
|
8297 |
+
"step": 59150
|
8298 |
+
},
|
8299 |
+
{
|
8300 |
+
"epoch": 1.7675335144657094,
|
8301 |
+
"grad_norm": 5.60578727722168,
|
8302 |
+
"learning_rate": 0.00018308733192761104,
|
8303 |
+
"loss": 1.31,
|
8304 |
+
"step": 59200
|
8305 |
+
},
|
8306 |
+
{
|
8307 |
+
"epoch": 1.7690263637177917,
|
8308 |
+
"grad_norm": 5.257917881011963,
|
8309 |
+
"learning_rate": 0.00018307304613169218,
|
8310 |
+
"loss": 1.3617,
|
8311 |
+
"step": 59250
|
8312 |
+
},
|
8313 |
+
{
|
8314 |
+
"epoch": 1.7705192129698744,
|
8315 |
+
"grad_norm": 4.838101863861084,
|
8316 |
+
"learning_rate": 0.00018305876033577337,
|
8317 |
+
"loss": 1.367,
|
8318 |
+
"step": 59300
|
8319 |
+
},
|
8320 |
+
{
|
8321 |
+
"epoch": 1.7720120622219568,
|
8322 |
+
"grad_norm": 4.63686990737915,
|
8323 |
+
"learning_rate": 0.0001830444745398545,
|
8324 |
+
"loss": 1.3653,
|
8325 |
+
"step": 59350
|
8326 |
+
},
|
8327 |
+
{
|
8328 |
+
"epoch": 1.7735049114740393,
|
8329 |
+
"grad_norm": 3.8766531944274902,
|
8330 |
+
"learning_rate": 0.0001830301887439357,
|
8331 |
+
"loss": 1.298,
|
8332 |
+
"step": 59400
|
8333 |
+
},
|
8334 |
+
{
|
8335 |
+
"epoch": 1.774997760726122,
|
8336 |
+
"grad_norm": 5.356032848358154,
|
8337 |
+
"learning_rate": 0.00018301590294801686,
|
8338 |
+
"loss": 1.3132,
|
8339 |
+
"step": 59450
|
8340 |
+
},
|
8341 |
+
{
|
8342 |
+
"epoch": 1.7764906099782043,
|
8343 |
+
"grad_norm": 4.647774696350098,
|
8344 |
+
"learning_rate": 0.00018300161715209803,
|
8345 |
+
"loss": 1.3594,
|
8346 |
+
"step": 59500
|
8347 |
+
},
|
8348 |
+
{
|
8349 |
+
"epoch": 1.777983459230287,
|
8350 |
+
"grad_norm": 3.6505117416381836,
|
8351 |
+
"learning_rate": 0.0001829873313561792,
|
8352 |
+
"loss": 1.3178,
|
8353 |
+
"step": 59550
|
8354 |
+
},
|
8355 |
+
{
|
8356 |
+
"epoch": 1.7794763084823695,
|
8357 |
+
"grad_norm": 4.109731674194336,
|
8358 |
+
"learning_rate": 0.00018297304556026036,
|
8359 |
+
"loss": 1.2996,
|
8360 |
+
"step": 59600
|
8361 |
+
},
|
8362 |
+
{
|
8363 |
+
"epoch": 1.780969157734452,
|
8364 |
+
"grad_norm": 3.942424774169922,
|
8365 |
+
"learning_rate": 0.00018295875976434152,
|
8366 |
+
"loss": 1.3066,
|
8367 |
+
"step": 59650
|
8368 |
+
},
|
8369 |
+
{
|
8370 |
+
"epoch": 1.7824620069865345,
|
8371 |
+
"grad_norm": 5.511881351470947,
|
8372 |
+
"learning_rate": 0.00018294447396842268,
|
8373 |
+
"loss": 1.3009,
|
8374 |
+
"step": 59700
|
8375 |
+
},
|
8376 |
+
{
|
8377 |
+
"epoch": 1.783954856238617,
|
8378 |
+
"grad_norm": 4.0874552726745605,
|
8379 |
+
"learning_rate": 0.00018293018817250385,
|
8380 |
+
"loss": 1.3426,
|
8381 |
+
"step": 59750
|
8382 |
+
},
|
8383 |
+
{
|
8384 |
+
"epoch": 1.7854477054906996,
|
8385 |
+
"grad_norm": 4.787434101104736,
|
8386 |
+
"learning_rate": 0.00018291590237658501,
|
8387 |
+
"loss": 1.3765,
|
8388 |
+
"step": 59800
|
8389 |
+
},
|
8390 |
+
{
|
8391 |
+
"epoch": 1.7869405547427821,
|
8392 |
+
"grad_norm": 4.477612018585205,
|
8393 |
+
"learning_rate": 0.00018290161658066618,
|
8394 |
+
"loss": 1.3215,
|
8395 |
+
"step": 59850
|
8396 |
+
},
|
8397 |
+
{
|
8398 |
+
"epoch": 1.7884334039948646,
|
8399 |
+
"grad_norm": 6.102437973022461,
|
8400 |
+
"learning_rate": 0.00018288733078474737,
|
8401 |
+
"loss": 1.2529,
|
8402 |
+
"step": 59900
|
8403 |
+
},
|
8404 |
+
{
|
8405 |
+
"epoch": 1.789926253246947,
|
8406 |
+
"grad_norm": 4.351520538330078,
|
8407 |
+
"learning_rate": 0.0001828730449888285,
|
8408 |
+
"loss": 1.2889,
|
8409 |
+
"step": 59950
|
8410 |
+
},
|
8411 |
+
{
|
8412 |
+
"epoch": 1.7914191024990296,
|
8413 |
+
"grad_norm": 5.043869972229004,
|
8414 |
+
"learning_rate": 0.0001828587591929097,
|
8415 |
+
"loss": 1.3149,
|
8416 |
+
"step": 60000
|
8417 |
}
|
8418 |
],
|
8419 |
"logging_steps": 50,
|
|
|
8433 |
"attributes": {}
|
8434 |
}
|
8435 |
},
|
8436 |
+
"total_flos": 1.5166094478640415e+18,
|
8437 |
"train_batch_size": 2,
|
8438 |
"trial_name": null,
|
8439 |
"trial_params": null
|