Training in progress, step 160000, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step160000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step160000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/trainer_state.json +1403 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 42002584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bbecd548ad17fd83bbf06fa406bf1397ce14f85bd39672e9dbd6d312bad304d5
|
3 |
size 42002584
|
last-checkpoint/global_step160000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c80e3dd983d9d468a8b98d30aa4c828540ec0e76a7c2f9b24c362ddad78e950
|
3 |
+
size 251710672
|
last-checkpoint/global_step160000/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6269580d9bd188f4f9ca20b059ec39633299be47294e70df34444548d46dd775
|
3 |
+
size 153747385
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step160000
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25f76b1fb940b8ac586f9c57b5abe2fccf9aa348d52fdae6ee57b97edaa5c988
|
3 |
size 14244
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 4.
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -21014,6 +21014,1406 @@
|
|
21014 |
"learning_rate": 0.00015714432653900882,
|
21015 |
"loss": 1.2089,
|
21016 |
"step": 150000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21017 |
}
|
21018 |
],
|
21019 |
"logging_steps": 50,
|
@@ -21033,7 +22433,7 @@
|
|
21033 |
"attributes": {}
|
21034 |
}
|
21035 |
},
|
21036 |
-
"total_flos":
|
21037 |
"train_batch_size": 2,
|
21038 |
"trial_name": null,
|
21039 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 4.7771176066640795,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 160000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
21014 |
"learning_rate": 0.00015714432653900882,
|
21015 |
"loss": 1.2089,
|
21016 |
"step": 150000
|
21017 |
+
},
|
21018 |
+
{
|
21019 |
+
"epoch": 4.480040605499656,
|
21020 |
+
"grad_norm": 4.181282043457031,
|
21021 |
+
"learning_rate": 0.00015713004074308998,
|
21022 |
+
"loss": 1.2139,
|
21023 |
+
"step": 150050
|
21024 |
+
},
|
21025 |
+
{
|
21026 |
+
"epoch": 4.481533454751739,
|
21027 |
+
"grad_norm": 4.993724822998047,
|
21028 |
+
"learning_rate": 0.00015711575494717112,
|
21029 |
+
"loss": 1.2247,
|
21030 |
+
"step": 150100
|
21031 |
+
},
|
21032 |
+
{
|
21033 |
+
"epoch": 4.483026304003822,
|
21034 |
+
"grad_norm": 4.870810031890869,
|
21035 |
+
"learning_rate": 0.0001571014691512523,
|
21036 |
+
"loss": 1.281,
|
21037 |
+
"step": 150150
|
21038 |
+
},
|
21039 |
+
{
|
21040 |
+
"epoch": 4.4845191532559046,
|
21041 |
+
"grad_norm": 5.145561218261719,
|
21042 |
+
"learning_rate": 0.00015708718335533345,
|
21043 |
+
"loss": 1.2263,
|
21044 |
+
"step": 150200
|
21045 |
+
},
|
21046 |
+
{
|
21047 |
+
"epoch": 4.486012002507986,
|
21048 |
+
"grad_norm": 4.650093078613281,
|
21049 |
+
"learning_rate": 0.00015707289755941464,
|
21050 |
+
"loss": 1.2627,
|
21051 |
+
"step": 150250
|
21052 |
+
},
|
21053 |
+
{
|
21054 |
+
"epoch": 4.487504851760069,
|
21055 |
+
"grad_norm": 4.286979675292969,
|
21056 |
+
"learning_rate": 0.00015705861176349578,
|
21057 |
+
"loss": 1.1671,
|
21058 |
+
"step": 150300
|
21059 |
+
},
|
21060 |
+
{
|
21061 |
+
"epoch": 4.488997701012152,
|
21062 |
+
"grad_norm": 4.116031646728516,
|
21063 |
+
"learning_rate": 0.00015704432596757697,
|
21064 |
+
"loss": 1.1746,
|
21065 |
+
"step": 150350
|
21066 |
+
},
|
21067 |
+
{
|
21068 |
+
"epoch": 4.4904905502642345,
|
21069 |
+
"grad_norm": 4.999283313751221,
|
21070 |
+
"learning_rate": 0.00015703004017165813,
|
21071 |
+
"loss": 1.1993,
|
21072 |
+
"step": 150400
|
21073 |
+
},
|
21074 |
+
{
|
21075 |
+
"epoch": 4.491983399516317,
|
21076 |
+
"grad_norm": 5.618175983428955,
|
21077 |
+
"learning_rate": 0.0001570157543757393,
|
21078 |
+
"loss": 1.2608,
|
21079 |
+
"step": 150450
|
21080 |
+
},
|
21081 |
+
{
|
21082 |
+
"epoch": 4.493476248768399,
|
21083 |
+
"grad_norm": 4.471452236175537,
|
21084 |
+
"learning_rate": 0.00015700146857982046,
|
21085 |
+
"loss": 1.1811,
|
21086 |
+
"step": 150500
|
21087 |
+
},
|
21088 |
+
{
|
21089 |
+
"epoch": 4.494969098020482,
|
21090 |
+
"grad_norm": 4.4648518562316895,
|
21091 |
+
"learning_rate": 0.00015698718278390163,
|
21092 |
+
"loss": 1.2314,
|
21093 |
+
"step": 150550
|
21094 |
+
},
|
21095 |
+
{
|
21096 |
+
"epoch": 4.496461947272564,
|
21097 |
+
"grad_norm": 5.73147439956665,
|
21098 |
+
"learning_rate": 0.0001569728969879828,
|
21099 |
+
"loss": 1.243,
|
21100 |
+
"step": 150600
|
21101 |
+
},
|
21102 |
+
{
|
21103 |
+
"epoch": 4.497954796524647,
|
21104 |
+
"grad_norm": 6.473092555999756,
|
21105 |
+
"learning_rate": 0.00015695861119206396,
|
21106 |
+
"loss": 1.2208,
|
21107 |
+
"step": 150650
|
21108 |
+
},
|
21109 |
+
{
|
21110 |
+
"epoch": 4.49944764577673,
|
21111 |
+
"grad_norm": 6.348034381866455,
|
21112 |
+
"learning_rate": 0.00015694432539614512,
|
21113 |
+
"loss": 1.214,
|
21114 |
+
"step": 150700
|
21115 |
+
},
|
21116 |
+
{
|
21117 |
+
"epoch": 4.500940495028812,
|
21118 |
+
"grad_norm": 4.37158727645874,
|
21119 |
+
"learning_rate": 0.00015693003960022629,
|
21120 |
+
"loss": 1.2777,
|
21121 |
+
"step": 150750
|
21122 |
+
},
|
21123 |
+
{
|
21124 |
+
"epoch": 4.502433344280894,
|
21125 |
+
"grad_norm": 4.451486110687256,
|
21126 |
+
"learning_rate": 0.00015691575380430745,
|
21127 |
+
"loss": 1.1995,
|
21128 |
+
"step": 150800
|
21129 |
+
},
|
21130 |
+
{
|
21131 |
+
"epoch": 4.503926193532977,
|
21132 |
+
"grad_norm": 4.328061580657959,
|
21133 |
+
"learning_rate": 0.00015690146800838864,
|
21134 |
+
"loss": 1.225,
|
21135 |
+
"step": 150850
|
21136 |
+
},
|
21137 |
+
{
|
21138 |
+
"epoch": 4.50541904278506,
|
21139 |
+
"grad_norm": 5.565524578094482,
|
21140 |
+
"learning_rate": 0.00015688718221246978,
|
21141 |
+
"loss": 1.2471,
|
21142 |
+
"step": 150900
|
21143 |
+
},
|
21144 |
+
{
|
21145 |
+
"epoch": 4.506911892037142,
|
21146 |
+
"grad_norm": 4.532073497772217,
|
21147 |
+
"learning_rate": 0.00015687289641655097,
|
21148 |
+
"loss": 1.2396,
|
21149 |
+
"step": 150950
|
21150 |
+
},
|
21151 |
+
{
|
21152 |
+
"epoch": 4.508404741289224,
|
21153 |
+
"grad_norm": 5.4925618171691895,
|
21154 |
+
"learning_rate": 0.0001568586106206321,
|
21155 |
+
"loss": 1.2135,
|
21156 |
+
"step": 151000
|
21157 |
+
},
|
21158 |
+
{
|
21159 |
+
"epoch": 4.509897590541307,
|
21160 |
+
"grad_norm": 5.885026931762695,
|
21161 |
+
"learning_rate": 0.0001568443248247133,
|
21162 |
+
"loss": 1.2067,
|
21163 |
+
"step": 151050
|
21164 |
+
},
|
21165 |
+
{
|
21166 |
+
"epoch": 4.51139043979339,
|
21167 |
+
"grad_norm": 4.559401988983154,
|
21168 |
+
"learning_rate": 0.00015683003902879444,
|
21169 |
+
"loss": 1.1973,
|
21170 |
+
"step": 151100
|
21171 |
+
},
|
21172 |
+
{
|
21173 |
+
"epoch": 4.512883289045472,
|
21174 |
+
"grad_norm": 2.85530686378479,
|
21175 |
+
"learning_rate": 0.00015681575323287563,
|
21176 |
+
"loss": 1.2574,
|
21177 |
+
"step": 151150
|
21178 |
+
},
|
21179 |
+
{
|
21180 |
+
"epoch": 4.514376138297555,
|
21181 |
+
"grad_norm": 3.6753365993499756,
|
21182 |
+
"learning_rate": 0.0001568014674369568,
|
21183 |
+
"loss": 1.2358,
|
21184 |
+
"step": 151200
|
21185 |
+
},
|
21186 |
+
{
|
21187 |
+
"epoch": 4.515868987549637,
|
21188 |
+
"grad_norm": 4.216865539550781,
|
21189 |
+
"learning_rate": 0.00015678718164103796,
|
21190 |
+
"loss": 1.2624,
|
21191 |
+
"step": 151250
|
21192 |
+
},
|
21193 |
+
{
|
21194 |
+
"epoch": 4.51736183680172,
|
21195 |
+
"grad_norm": 4.118535995483398,
|
21196 |
+
"learning_rate": 0.00015677289584511912,
|
21197 |
+
"loss": 1.2322,
|
21198 |
+
"step": 151300
|
21199 |
+
},
|
21200 |
+
{
|
21201 |
+
"epoch": 4.518854686053802,
|
21202 |
+
"grad_norm": 3.65169095993042,
|
21203 |
+
"learning_rate": 0.0001567586100492003,
|
21204 |
+
"loss": 1.1924,
|
21205 |
+
"step": 151350
|
21206 |
+
},
|
21207 |
+
{
|
21208 |
+
"epoch": 4.520347535305885,
|
21209 |
+
"grad_norm": 4.938554763793945,
|
21210 |
+
"learning_rate": 0.00015674432425328145,
|
21211 |
+
"loss": 1.2083,
|
21212 |
+
"step": 151400
|
21213 |
+
},
|
21214 |
+
{
|
21215 |
+
"epoch": 4.521840384557967,
|
21216 |
+
"grad_norm": 5.959898471832275,
|
21217 |
+
"learning_rate": 0.00015673003845736262,
|
21218 |
+
"loss": 1.3091,
|
21219 |
+
"step": 151450
|
21220 |
+
},
|
21221 |
+
{
|
21222 |
+
"epoch": 4.52333323381005,
|
21223 |
+
"grad_norm": 4.388880252838135,
|
21224 |
+
"learning_rate": 0.00015671575266144378,
|
21225 |
+
"loss": 1.2481,
|
21226 |
+
"step": 151500
|
21227 |
+
},
|
21228 |
+
{
|
21229 |
+
"epoch": 4.524826083062132,
|
21230 |
+
"grad_norm": 4.406062602996826,
|
21231 |
+
"learning_rate": 0.00015670146686552495,
|
21232 |
+
"loss": 1.2337,
|
21233 |
+
"step": 151550
|
21234 |
+
},
|
21235 |
+
{
|
21236 |
+
"epoch": 4.526318932314215,
|
21237 |
+
"grad_norm": 3.515559196472168,
|
21238 |
+
"learning_rate": 0.0001566871810696061,
|
21239 |
+
"loss": 1.1663,
|
21240 |
+
"step": 151600
|
21241 |
+
},
|
21242 |
+
{
|
21243 |
+
"epoch": 4.527811781566298,
|
21244 |
+
"grad_norm": 5.7760701179504395,
|
21245 |
+
"learning_rate": 0.0001566728952736873,
|
21246 |
+
"loss": 1.1952,
|
21247 |
+
"step": 151650
|
21248 |
+
},
|
21249 |
+
{
|
21250 |
+
"epoch": 4.52930463081838,
|
21251 |
+
"grad_norm": 3.783639907836914,
|
21252 |
+
"learning_rate": 0.00015665860947776844,
|
21253 |
+
"loss": 1.2086,
|
21254 |
+
"step": 151700
|
21255 |
+
},
|
21256 |
+
{
|
21257 |
+
"epoch": 4.530797480070462,
|
21258 |
+
"grad_norm": 4.640422344207764,
|
21259 |
+
"learning_rate": 0.00015664432368184963,
|
21260 |
+
"loss": 1.2152,
|
21261 |
+
"step": 151750
|
21262 |
+
},
|
21263 |
+
{
|
21264 |
+
"epoch": 4.532290329322545,
|
21265 |
+
"grad_norm": 5.015472412109375,
|
21266 |
+
"learning_rate": 0.00015663003788593077,
|
21267 |
+
"loss": 1.2133,
|
21268 |
+
"step": 151800
|
21269 |
+
},
|
21270 |
+
{
|
21271 |
+
"epoch": 4.533783178574628,
|
21272 |
+
"grad_norm": 5.7063117027282715,
|
21273 |
+
"learning_rate": 0.00015661575209001196,
|
21274 |
+
"loss": 1.2345,
|
21275 |
+
"step": 151850
|
21276 |
+
},
|
21277 |
+
{
|
21278 |
+
"epoch": 4.53527602782671,
|
21279 |
+
"grad_norm": 4.530743598937988,
|
21280 |
+
"learning_rate": 0.0001566014662940931,
|
21281 |
+
"loss": 1.2408,
|
21282 |
+
"step": 151900
|
21283 |
+
},
|
21284 |
+
{
|
21285 |
+
"epoch": 4.536768877078792,
|
21286 |
+
"grad_norm": 3.597856044769287,
|
21287 |
+
"learning_rate": 0.0001565871804981743,
|
21288 |
+
"loss": 1.2222,
|
21289 |
+
"step": 151950
|
21290 |
+
},
|
21291 |
+
{
|
21292 |
+
"epoch": 4.538261726330875,
|
21293 |
+
"grad_norm": 5.361748218536377,
|
21294 |
+
"learning_rate": 0.00015657289470225546,
|
21295 |
+
"loss": 1.1898,
|
21296 |
+
"step": 152000
|
21297 |
+
},
|
21298 |
+
{
|
21299 |
+
"epoch": 4.539754575582958,
|
21300 |
+
"grad_norm": 6.137198448181152,
|
21301 |
+
"learning_rate": 0.00015655860890633662,
|
21302 |
+
"loss": 1.1988,
|
21303 |
+
"step": 152050
|
21304 |
+
},
|
21305 |
+
{
|
21306 |
+
"epoch": 4.54124742483504,
|
21307 |
+
"grad_norm": 4.32033634185791,
|
21308 |
+
"learning_rate": 0.00015654432311041779,
|
21309 |
+
"loss": 1.2221,
|
21310 |
+
"step": 152100
|
21311 |
+
},
|
21312 |
+
{
|
21313 |
+
"epoch": 4.542740274087123,
|
21314 |
+
"grad_norm": 4.112492084503174,
|
21315 |
+
"learning_rate": 0.00015653003731449895,
|
21316 |
+
"loss": 1.229,
|
21317 |
+
"step": 152150
|
21318 |
+
},
|
21319 |
+
{
|
21320 |
+
"epoch": 4.544233123339206,
|
21321 |
+
"grad_norm": 4.592637062072754,
|
21322 |
+
"learning_rate": 0.00015651575151858012,
|
21323 |
+
"loss": 1.2376,
|
21324 |
+
"step": 152200
|
21325 |
+
},
|
21326 |
+
{
|
21327 |
+
"epoch": 4.5457259725912875,
|
21328 |
+
"grad_norm": 4.471153736114502,
|
21329 |
+
"learning_rate": 0.00015650146572266128,
|
21330 |
+
"loss": 1.1976,
|
21331 |
+
"step": 152250
|
21332 |
+
},
|
21333 |
+
{
|
21334 |
+
"epoch": 4.54721882184337,
|
21335 |
+
"grad_norm": 6.161034107208252,
|
21336 |
+
"learning_rate": 0.00015648717992674244,
|
21337 |
+
"loss": 1.1973,
|
21338 |
+
"step": 152300
|
21339 |
+
},
|
21340 |
+
{
|
21341 |
+
"epoch": 4.548711671095453,
|
21342 |
+
"grad_norm": 4.57336950302124,
|
21343 |
+
"learning_rate": 0.0001564728941308236,
|
21344 |
+
"loss": 1.1773,
|
21345 |
+
"step": 152350
|
21346 |
+
},
|
21347 |
+
{
|
21348 |
+
"epoch": 4.550204520347536,
|
21349 |
+
"grad_norm": 3.936795949935913,
|
21350 |
+
"learning_rate": 0.00015645860833490477,
|
21351 |
+
"loss": 1.2448,
|
21352 |
+
"step": 152400
|
21353 |
+
},
|
21354 |
+
{
|
21355 |
+
"epoch": 4.5516973695996175,
|
21356 |
+
"grad_norm": 3.8005549907684326,
|
21357 |
+
"learning_rate": 0.00015644432253898597,
|
21358 |
+
"loss": 1.2391,
|
21359 |
+
"step": 152450
|
21360 |
+
},
|
21361 |
+
{
|
21362 |
+
"epoch": 4.5531902188517,
|
21363 |
+
"grad_norm": 4.863176345825195,
|
21364 |
+
"learning_rate": 0.0001564300367430671,
|
21365 |
+
"loss": 1.2185,
|
21366 |
+
"step": 152500
|
21367 |
+
},
|
21368 |
+
{
|
21369 |
+
"epoch": 4.554683068103783,
|
21370 |
+
"grad_norm": 4.168069362640381,
|
21371 |
+
"learning_rate": 0.0001564157509471483,
|
21372 |
+
"loss": 1.1793,
|
21373 |
+
"step": 152550
|
21374 |
+
},
|
21375 |
+
{
|
21376 |
+
"epoch": 4.556175917355866,
|
21377 |
+
"grad_norm": 4.241150856018066,
|
21378 |
+
"learning_rate": 0.00015640146515122943,
|
21379 |
+
"loss": 1.2087,
|
21380 |
+
"step": 152600
|
21381 |
+
},
|
21382 |
+
{
|
21383 |
+
"epoch": 4.557668766607948,
|
21384 |
+
"grad_norm": 5.077085971832275,
|
21385 |
+
"learning_rate": 0.00015638717935531062,
|
21386 |
+
"loss": 1.1999,
|
21387 |
+
"step": 152650
|
21388 |
+
},
|
21389 |
+
{
|
21390 |
+
"epoch": 4.55916161586003,
|
21391 |
+
"grad_norm": 4.880026817321777,
|
21392 |
+
"learning_rate": 0.00015637289355939176,
|
21393 |
+
"loss": 1.1691,
|
21394 |
+
"step": 152700
|
21395 |
+
},
|
21396 |
+
{
|
21397 |
+
"epoch": 4.560654465112113,
|
21398 |
+
"grad_norm": 4.0811662673950195,
|
21399 |
+
"learning_rate": 0.00015635860776347293,
|
21400 |
+
"loss": 1.2294,
|
21401 |
+
"step": 152750
|
21402 |
+
},
|
21403 |
+
{
|
21404 |
+
"epoch": 4.5621473143641955,
|
21405 |
+
"grad_norm": 3.9246151447296143,
|
21406 |
+
"learning_rate": 0.00015634432196755412,
|
21407 |
+
"loss": 1.2704,
|
21408 |
+
"step": 152800
|
21409 |
+
},
|
21410 |
+
{
|
21411 |
+
"epoch": 4.563640163616278,
|
21412 |
+
"grad_norm": 4.361538887023926,
|
21413 |
+
"learning_rate": 0.00015633003617163526,
|
21414 |
+
"loss": 1.2227,
|
21415 |
+
"step": 152850
|
21416 |
+
},
|
21417 |
+
{
|
21418 |
+
"epoch": 4.565133012868361,
|
21419 |
+
"grad_norm": 3.964625358581543,
|
21420 |
+
"learning_rate": 0.00015631575037571645,
|
21421 |
+
"loss": 1.2269,
|
21422 |
+
"step": 152900
|
21423 |
+
},
|
21424 |
+
{
|
21425 |
+
"epoch": 4.566625862120443,
|
21426 |
+
"grad_norm": 4.382391452789307,
|
21427 |
+
"learning_rate": 0.00015630146457979759,
|
21428 |
+
"loss": 1.2066,
|
21429 |
+
"step": 152950
|
21430 |
+
},
|
21431 |
+
{
|
21432 |
+
"epoch": 4.5681187113725255,
|
21433 |
+
"grad_norm": 4.823386192321777,
|
21434 |
+
"learning_rate": 0.00015628717878387878,
|
21435 |
+
"loss": 1.2191,
|
21436 |
+
"step": 153000
|
21437 |
+
},
|
21438 |
+
{
|
21439 |
+
"epoch": 4.569611560624608,
|
21440 |
+
"grad_norm": 4.2271223068237305,
|
21441 |
+
"learning_rate": 0.00015627289298795994,
|
21442 |
+
"loss": 1.2611,
|
21443 |
+
"step": 153050
|
21444 |
+
},
|
21445 |
+
{
|
21446 |
+
"epoch": 4.571104409876691,
|
21447 |
+
"grad_norm": 5.857795238494873,
|
21448 |
+
"learning_rate": 0.0001562586071920411,
|
21449 |
+
"loss": 1.2222,
|
21450 |
+
"step": 153100
|
21451 |
+
},
|
21452 |
+
{
|
21453 |
+
"epoch": 4.572597259128774,
|
21454 |
+
"grad_norm": 3.7787981033325195,
|
21455 |
+
"learning_rate": 0.00015624432139612227,
|
21456 |
+
"loss": 1.2648,
|
21457 |
+
"step": 153150
|
21458 |
+
},
|
21459 |
+
{
|
21460 |
+
"epoch": 4.574090108380855,
|
21461 |
+
"grad_norm": 4.284102916717529,
|
21462 |
+
"learning_rate": 0.00015623003560020344,
|
21463 |
+
"loss": 1.2814,
|
21464 |
+
"step": 153200
|
21465 |
+
},
|
21466 |
+
{
|
21467 |
+
"epoch": 4.575582957632938,
|
21468 |
+
"grad_norm": 6.498587131500244,
|
21469 |
+
"learning_rate": 0.0001562157498042846,
|
21470 |
+
"loss": 1.209,
|
21471 |
+
"step": 153250
|
21472 |
+
},
|
21473 |
+
{
|
21474 |
+
"epoch": 4.577075806885021,
|
21475 |
+
"grad_norm": 3.2543442249298096,
|
21476 |
+
"learning_rate": 0.00015620146400836577,
|
21477 |
+
"loss": 1.1867,
|
21478 |
+
"step": 153300
|
21479 |
+
},
|
21480 |
+
{
|
21481 |
+
"epoch": 4.5785686561371035,
|
21482 |
+
"grad_norm": 3.8081936836242676,
|
21483 |
+
"learning_rate": 0.00015618717821244693,
|
21484 |
+
"loss": 1.2242,
|
21485 |
+
"step": 153350
|
21486 |
+
},
|
21487 |
+
{
|
21488 |
+
"epoch": 4.580061505389185,
|
21489 |
+
"grad_norm": 3.8605005741119385,
|
21490 |
+
"learning_rate": 0.0001561728924165281,
|
21491 |
+
"loss": 1.1957,
|
21492 |
+
"step": 153400
|
21493 |
+
},
|
21494 |
+
{
|
21495 |
+
"epoch": 4.581554354641268,
|
21496 |
+
"grad_norm": 6.710892677307129,
|
21497 |
+
"learning_rate": 0.00015615860662060926,
|
21498 |
+
"loss": 1.1945,
|
21499 |
+
"step": 153450
|
21500 |
+
},
|
21501 |
+
{
|
21502 |
+
"epoch": 4.583047203893351,
|
21503 |
+
"grad_norm": 4.212560176849365,
|
21504 |
+
"learning_rate": 0.00015614432082469045,
|
21505 |
+
"loss": 1.2124,
|
21506 |
+
"step": 153500
|
21507 |
+
},
|
21508 |
+
{
|
21509 |
+
"epoch": 4.5845400531454334,
|
21510 |
+
"grad_norm": 4.561640739440918,
|
21511 |
+
"learning_rate": 0.0001561300350287716,
|
21512 |
+
"loss": 1.2355,
|
21513 |
+
"step": 153550
|
21514 |
+
},
|
21515 |
+
{
|
21516 |
+
"epoch": 4.586032902397516,
|
21517 |
+
"grad_norm": 3.4632205963134766,
|
21518 |
+
"learning_rate": 0.00015611574923285278,
|
21519 |
+
"loss": 1.2606,
|
21520 |
+
"step": 153600
|
21521 |
+
},
|
21522 |
+
{
|
21523 |
+
"epoch": 4.587525751649599,
|
21524 |
+
"grad_norm": 3.846022844314575,
|
21525 |
+
"learning_rate": 0.00015610146343693392,
|
21526 |
+
"loss": 1.2386,
|
21527 |
+
"step": 153650
|
21528 |
+
},
|
21529 |
+
{
|
21530 |
+
"epoch": 4.589018600901681,
|
21531 |
+
"grad_norm": 4.297937870025635,
|
21532 |
+
"learning_rate": 0.0001560871776410151,
|
21533 |
+
"loss": 1.2084,
|
21534 |
+
"step": 153700
|
21535 |
+
},
|
21536 |
+
{
|
21537 |
+
"epoch": 4.590511450153763,
|
21538 |
+
"grad_norm": 3.9638116359710693,
|
21539 |
+
"learning_rate": 0.00015607289184509625,
|
21540 |
+
"loss": 1.2334,
|
21541 |
+
"step": 153750
|
21542 |
+
},
|
21543 |
+
{
|
21544 |
+
"epoch": 4.592004299405846,
|
21545 |
+
"grad_norm": 5.708649158477783,
|
21546 |
+
"learning_rate": 0.00015605860604917744,
|
21547 |
+
"loss": 1.1872,
|
21548 |
+
"step": 153800
|
21549 |
+
},
|
21550 |
+
{
|
21551 |
+
"epoch": 4.593497148657929,
|
21552 |
+
"grad_norm": 4.930756092071533,
|
21553 |
+
"learning_rate": 0.0001560443202532586,
|
21554 |
+
"loss": 1.2585,
|
21555 |
+
"step": 153850
|
21556 |
+
},
|
21557 |
+
{
|
21558 |
+
"epoch": 4.594989997910011,
|
21559 |
+
"grad_norm": 4.887782573699951,
|
21560 |
+
"learning_rate": 0.00015603003445733977,
|
21561 |
+
"loss": 1.2325,
|
21562 |
+
"step": 153900
|
21563 |
+
},
|
21564 |
+
{
|
21565 |
+
"epoch": 4.596482847162093,
|
21566 |
+
"grad_norm": 5.5042290687561035,
|
21567 |
+
"learning_rate": 0.00015601574866142093,
|
21568 |
+
"loss": 1.261,
|
21569 |
+
"step": 153950
|
21570 |
+
},
|
21571 |
+
{
|
21572 |
+
"epoch": 4.597975696414176,
|
21573 |
+
"grad_norm": 5.776236534118652,
|
21574 |
+
"learning_rate": 0.0001560014628655021,
|
21575 |
+
"loss": 1.205,
|
21576 |
+
"step": 154000
|
21577 |
+
},
|
21578 |
+
{
|
21579 |
+
"epoch": 4.599468545666259,
|
21580 |
+
"grad_norm": 4.465933799743652,
|
21581 |
+
"learning_rate": 0.00015598717706958326,
|
21582 |
+
"loss": 1.2886,
|
21583 |
+
"step": 154050
|
21584 |
+
},
|
21585 |
+
{
|
21586 |
+
"epoch": 4.600961394918341,
|
21587 |
+
"grad_norm": 4.536434173583984,
|
21588 |
+
"learning_rate": 0.00015597289127366443,
|
21589 |
+
"loss": 1.2107,
|
21590 |
+
"step": 154100
|
21591 |
+
},
|
21592 |
+
{
|
21593 |
+
"epoch": 4.602454244170424,
|
21594 |
+
"grad_norm": 6.132566452026367,
|
21595 |
+
"learning_rate": 0.0001559586054777456,
|
21596 |
+
"loss": 1.2365,
|
21597 |
+
"step": 154150
|
21598 |
+
},
|
21599 |
+
{
|
21600 |
+
"epoch": 4.603947093422506,
|
21601 |
+
"grad_norm": 5.581535816192627,
|
21602 |
+
"learning_rate": 0.00015594431968182676,
|
21603 |
+
"loss": 1.2163,
|
21604 |
+
"step": 154200
|
21605 |
+
},
|
21606 |
+
{
|
21607 |
+
"epoch": 4.605439942674589,
|
21608 |
+
"grad_norm": 4.754735946655273,
|
21609 |
+
"learning_rate": 0.00015593003388590792,
|
21610 |
+
"loss": 1.1939,
|
21611 |
+
"step": 154250
|
21612 |
+
},
|
21613 |
+
{
|
21614 |
+
"epoch": 4.606932791926671,
|
21615 |
+
"grad_norm": 4.529079914093018,
|
21616 |
+
"learning_rate": 0.0001559157480899891,
|
21617 |
+
"loss": 1.2434,
|
21618 |
+
"step": 154300
|
21619 |
+
},
|
21620 |
+
{
|
21621 |
+
"epoch": 4.608425641178754,
|
21622 |
+
"grad_norm": 3.8123152256011963,
|
21623 |
+
"learning_rate": 0.00015590146229407025,
|
21624 |
+
"loss": 1.1727,
|
21625 |
+
"step": 154350
|
21626 |
+
},
|
21627 |
+
{
|
21628 |
+
"epoch": 4.609918490430836,
|
21629 |
+
"grad_norm": 7.249644756317139,
|
21630 |
+
"learning_rate": 0.00015588717649815144,
|
21631 |
+
"loss": 1.2392,
|
21632 |
+
"step": 154400
|
21633 |
+
},
|
21634 |
+
{
|
21635 |
+
"epoch": 4.611411339682919,
|
21636 |
+
"grad_norm": 5.355637073516846,
|
21637 |
+
"learning_rate": 0.00015587289070223258,
|
21638 |
+
"loss": 1.1899,
|
21639 |
+
"step": 154450
|
21640 |
+
},
|
21641 |
+
{
|
21642 |
+
"epoch": 4.612904188935001,
|
21643 |
+
"grad_norm": 3.952632427215576,
|
21644 |
+
"learning_rate": 0.00015585860490631377,
|
21645 |
+
"loss": 1.248,
|
21646 |
+
"step": 154500
|
21647 |
+
},
|
21648 |
+
{
|
21649 |
+
"epoch": 4.614397038187084,
|
21650 |
+
"grad_norm": 5.123134136199951,
|
21651 |
+
"learning_rate": 0.0001558443191103949,
|
21652 |
+
"loss": 1.1783,
|
21653 |
+
"step": 154550
|
21654 |
+
},
|
21655 |
+
{
|
21656 |
+
"epoch": 4.615889887439167,
|
21657 |
+
"grad_norm": 5.032320022583008,
|
21658 |
+
"learning_rate": 0.0001558300333144761,
|
21659 |
+
"loss": 1.2189,
|
21660 |
+
"step": 154600
|
21661 |
+
},
|
21662 |
+
{
|
21663 |
+
"epoch": 4.6173827366912485,
|
21664 |
+
"grad_norm": 4.798953533172607,
|
21665 |
+
"learning_rate": 0.00015581574751855726,
|
21666 |
+
"loss": 1.2064,
|
21667 |
+
"step": 154650
|
21668 |
+
},
|
21669 |
+
{
|
21670 |
+
"epoch": 4.618875585943331,
|
21671 |
+
"grad_norm": 4.720959663391113,
|
21672 |
+
"learning_rate": 0.00015580146172263843,
|
21673 |
+
"loss": 1.2685,
|
21674 |
+
"step": 154700
|
21675 |
+
},
|
21676 |
+
{
|
21677 |
+
"epoch": 4.620368435195414,
|
21678 |
+
"grad_norm": 5.178957939147949,
|
21679 |
+
"learning_rate": 0.0001557871759267196,
|
21680 |
+
"loss": 1.2969,
|
21681 |
+
"step": 154750
|
21682 |
+
},
|
21683 |
+
{
|
21684 |
+
"epoch": 4.621861284447497,
|
21685 |
+
"grad_norm": 2.8498313426971436,
|
21686 |
+
"learning_rate": 0.00015577289013080076,
|
21687 |
+
"loss": 1.2293,
|
21688 |
+
"step": 154800
|
21689 |
+
},
|
21690 |
+
{
|
21691 |
+
"epoch": 4.623354133699579,
|
21692 |
+
"grad_norm": 4.07916784286499,
|
21693 |
+
"learning_rate": 0.00015575860433488192,
|
21694 |
+
"loss": 1.2489,
|
21695 |
+
"step": 154850
|
21696 |
+
},
|
21697 |
+
{
|
21698 |
+
"epoch": 4.624846982951661,
|
21699 |
+
"grad_norm": 4.014176845550537,
|
21700 |
+
"learning_rate": 0.0001557443185389631,
|
21701 |
+
"loss": 1.2244,
|
21702 |
+
"step": 154900
|
21703 |
+
},
|
21704 |
+
{
|
21705 |
+
"epoch": 4.626339832203744,
|
21706 |
+
"grad_norm": 6.280478477478027,
|
21707 |
+
"learning_rate": 0.00015573003274304425,
|
21708 |
+
"loss": 1.2029,
|
21709 |
+
"step": 154950
|
21710 |
+
},
|
21711 |
+
{
|
21712 |
+
"epoch": 4.627832681455827,
|
21713 |
+
"grad_norm": 4.045797348022461,
|
21714 |
+
"learning_rate": 0.00015571574694712542,
|
21715 |
+
"loss": 1.1694,
|
21716 |
+
"step": 155000
|
21717 |
+
},
|
21718 |
+
{
|
21719 |
+
"epoch": 4.629325530707909,
|
21720 |
+
"grad_norm": 4.864063262939453,
|
21721 |
+
"learning_rate": 0.00015570146115120658,
|
21722 |
+
"loss": 1.1799,
|
21723 |
+
"step": 155050
|
21724 |
+
},
|
21725 |
+
{
|
21726 |
+
"epoch": 4.630818379959992,
|
21727 |
+
"grad_norm": 3.5234014987945557,
|
21728 |
+
"learning_rate": 0.00015568717535528777,
|
21729 |
+
"loss": 1.2153,
|
21730 |
+
"step": 155100
|
21731 |
+
},
|
21732 |
+
{
|
21733 |
+
"epoch": 4.632311229212074,
|
21734 |
+
"grad_norm": 4.5075273513793945,
|
21735 |
+
"learning_rate": 0.0001556728895593689,
|
21736 |
+
"loss": 1.1483,
|
21737 |
+
"step": 155150
|
21738 |
+
},
|
21739 |
+
{
|
21740 |
+
"epoch": 4.6338040784641565,
|
21741 |
+
"grad_norm": 5.258499622344971,
|
21742 |
+
"learning_rate": 0.0001556586037634501,
|
21743 |
+
"loss": 1.2606,
|
21744 |
+
"step": 155200
|
21745 |
+
},
|
21746 |
+
{
|
21747 |
+
"epoch": 4.635296927716239,
|
21748 |
+
"grad_norm": 5.108059883117676,
|
21749 |
+
"learning_rate": 0.00015564431796753124,
|
21750 |
+
"loss": 1.2517,
|
21751 |
+
"step": 155250
|
21752 |
+
},
|
21753 |
+
{
|
21754 |
+
"epoch": 4.636789776968322,
|
21755 |
+
"grad_norm": 4.033377647399902,
|
21756 |
+
"learning_rate": 0.00015563003217161243,
|
21757 |
+
"loss": 1.1887,
|
21758 |
+
"step": 155300
|
21759 |
+
},
|
21760 |
+
{
|
21761 |
+
"epoch": 4.638282626220404,
|
21762 |
+
"grad_norm": 5.546184062957764,
|
21763 |
+
"learning_rate": 0.00015561574637569357,
|
21764 |
+
"loss": 1.1573,
|
21765 |
+
"step": 155350
|
21766 |
+
},
|
21767 |
+
{
|
21768 |
+
"epoch": 4.6397754754724865,
|
21769 |
+
"grad_norm": 5.48836088180542,
|
21770 |
+
"learning_rate": 0.00015560146057977473,
|
21771 |
+
"loss": 1.2637,
|
21772 |
+
"step": 155400
|
21773 |
+
},
|
21774 |
+
{
|
21775 |
+
"epoch": 4.641268324724569,
|
21776 |
+
"grad_norm": 5.246400833129883,
|
21777 |
+
"learning_rate": 0.00015558717478385593,
|
21778 |
+
"loss": 1.2008,
|
21779 |
+
"step": 155450
|
21780 |
+
},
|
21781 |
+
{
|
21782 |
+
"epoch": 4.642761173976652,
|
21783 |
+
"grad_norm": 4.412383556365967,
|
21784 |
+
"learning_rate": 0.00015557288898793706,
|
21785 |
+
"loss": 1.168,
|
21786 |
+
"step": 155500
|
21787 |
+
},
|
21788 |
+
{
|
21789 |
+
"epoch": 4.644254023228735,
|
21790 |
+
"grad_norm": 3.9869327545166016,
|
21791 |
+
"learning_rate": 0.00015555860319201826,
|
21792 |
+
"loss": 1.1371,
|
21793 |
+
"step": 155550
|
21794 |
+
},
|
21795 |
+
{
|
21796 |
+
"epoch": 4.645746872480817,
|
21797 |
+
"grad_norm": 3.883467197418213,
|
21798 |
+
"learning_rate": 0.0001555443173960994,
|
21799 |
+
"loss": 1.216,
|
21800 |
+
"step": 155600
|
21801 |
+
},
|
21802 |
+
{
|
21803 |
+
"epoch": 4.647239721732899,
|
21804 |
+
"grad_norm": 4.335739612579346,
|
21805 |
+
"learning_rate": 0.00015553003160018059,
|
21806 |
+
"loss": 1.1792,
|
21807 |
+
"step": 155650
|
21808 |
+
},
|
21809 |
+
{
|
21810 |
+
"epoch": 4.648732570984982,
|
21811 |
+
"grad_norm": 4.266781806945801,
|
21812 |
+
"learning_rate": 0.00015551574580426175,
|
21813 |
+
"loss": 1.2357,
|
21814 |
+
"step": 155700
|
21815 |
+
},
|
21816 |
+
{
|
21817 |
+
"epoch": 4.6502254202370645,
|
21818 |
+
"grad_norm": 4.26521635055542,
|
21819 |
+
"learning_rate": 0.00015550146000834291,
|
21820 |
+
"loss": 1.2613,
|
21821 |
+
"step": 155750
|
21822 |
+
},
|
21823 |
+
{
|
21824 |
+
"epoch": 4.651718269489147,
|
21825 |
+
"grad_norm": 4.667625427246094,
|
21826 |
+
"learning_rate": 0.00015548717421242408,
|
21827 |
+
"loss": 1.213,
|
21828 |
+
"step": 155800
|
21829 |
+
},
|
21830 |
+
{
|
21831 |
+
"epoch": 4.653211118741229,
|
21832 |
+
"grad_norm": 4.781721591949463,
|
21833 |
+
"learning_rate": 0.00015547288841650524,
|
21834 |
+
"loss": 1.2303,
|
21835 |
+
"step": 155850
|
21836 |
+
},
|
21837 |
+
{
|
21838 |
+
"epoch": 4.654703967993312,
|
21839 |
+
"grad_norm": 4.377721309661865,
|
21840 |
+
"learning_rate": 0.0001554586026205864,
|
21841 |
+
"loss": 1.2062,
|
21842 |
+
"step": 155900
|
21843 |
+
},
|
21844 |
+
{
|
21845 |
+
"epoch": 4.6561968172453945,
|
21846 |
+
"grad_norm": 3.987110137939453,
|
21847 |
+
"learning_rate": 0.00015544431682466757,
|
21848 |
+
"loss": 1.1393,
|
21849 |
+
"step": 155950
|
21850 |
+
},
|
21851 |
+
{
|
21852 |
+
"epoch": 4.657689666497477,
|
21853 |
+
"grad_norm": 5.804355144500732,
|
21854 |
+
"learning_rate": 0.00015543003102874874,
|
21855 |
+
"loss": 1.2368,
|
21856 |
+
"step": 156000
|
21857 |
+
},
|
21858 |
+
{
|
21859 |
+
"epoch": 4.65918251574956,
|
21860 |
+
"grad_norm": 4.505835056304932,
|
21861 |
+
"learning_rate": 0.0001554157452328299,
|
21862 |
+
"loss": 1.2318,
|
21863 |
+
"step": 156050
|
21864 |
+
},
|
21865 |
+
{
|
21866 |
+
"epoch": 4.660675365001643,
|
21867 |
+
"grad_norm": 5.454282760620117,
|
21868 |
+
"learning_rate": 0.00015540145943691107,
|
21869 |
+
"loss": 1.2058,
|
21870 |
+
"step": 156100
|
21871 |
+
},
|
21872 |
+
{
|
21873 |
+
"epoch": 4.662168214253724,
|
21874 |
+
"grad_norm": 4.576279163360596,
|
21875 |
+
"learning_rate": 0.00015538717364099223,
|
21876 |
+
"loss": 1.1932,
|
21877 |
+
"step": 156150
|
21878 |
+
},
|
21879 |
+
{
|
21880 |
+
"epoch": 4.663661063505807,
|
21881 |
+
"grad_norm": 4.093862533569336,
|
21882 |
+
"learning_rate": 0.0001553728878450734,
|
21883 |
+
"loss": 1.2505,
|
21884 |
+
"step": 156200
|
21885 |
+
},
|
21886 |
+
{
|
21887 |
+
"epoch": 4.66515391275789,
|
21888 |
+
"grad_norm": 4.815280437469482,
|
21889 |
+
"learning_rate": 0.0001553586020491546,
|
21890 |
+
"loss": 1.3025,
|
21891 |
+
"step": 156250
|
21892 |
+
},
|
21893 |
+
{
|
21894 |
+
"epoch": 4.6666467620099725,
|
21895 |
+
"grad_norm": 5.615932464599609,
|
21896 |
+
"learning_rate": 0.00015534431625323573,
|
21897 |
+
"loss": 1.2428,
|
21898 |
+
"step": 156300
|
21899 |
+
},
|
21900 |
+
{
|
21901 |
+
"epoch": 4.668139611262054,
|
21902 |
+
"grad_norm": 4.396317005157471,
|
21903 |
+
"learning_rate": 0.00015533003045731692,
|
21904 |
+
"loss": 1.2452,
|
21905 |
+
"step": 156350
|
21906 |
+
},
|
21907 |
+
{
|
21908 |
+
"epoch": 4.669632460514137,
|
21909 |
+
"grad_norm": 5.061831951141357,
|
21910 |
+
"learning_rate": 0.00015531574466139805,
|
21911 |
+
"loss": 1.2262,
|
21912 |
+
"step": 156400
|
21913 |
+
},
|
21914 |
+
{
|
21915 |
+
"epoch": 4.67112530976622,
|
21916 |
+
"grad_norm": 4.902180194854736,
|
21917 |
+
"learning_rate": 0.00015530145886547925,
|
21918 |
+
"loss": 1.2485,
|
21919 |
+
"step": 156450
|
21920 |
+
},
|
21921 |
+
{
|
21922 |
+
"epoch": 4.6726181590183025,
|
21923 |
+
"grad_norm": 5.020074367523193,
|
21924 |
+
"learning_rate": 0.0001552871730695604,
|
21925 |
+
"loss": 1.2692,
|
21926 |
+
"step": 156500
|
21927 |
+
},
|
21928 |
+
{
|
21929 |
+
"epoch": 4.674111008270385,
|
21930 |
+
"grad_norm": 4.696333885192871,
|
21931 |
+
"learning_rate": 0.00015527288727364158,
|
21932 |
+
"loss": 1.2462,
|
21933 |
+
"step": 156550
|
21934 |
+
},
|
21935 |
+
{
|
21936 |
+
"epoch": 4.675603857522468,
|
21937 |
+
"grad_norm": 4.284012794494629,
|
21938 |
+
"learning_rate": 0.00015525860147772274,
|
21939 |
+
"loss": 1.1821,
|
21940 |
+
"step": 156600
|
21941 |
+
},
|
21942 |
+
{
|
21943 |
+
"epoch": 4.67709670677455,
|
21944 |
+
"grad_norm": 5.056790828704834,
|
21945 |
+
"learning_rate": 0.0001552443156818039,
|
21946 |
+
"loss": 1.2571,
|
21947 |
+
"step": 156650
|
21948 |
+
},
|
21949 |
+
{
|
21950 |
+
"epoch": 4.678589556026632,
|
21951 |
+
"grad_norm": 5.424617767333984,
|
21952 |
+
"learning_rate": 0.00015523002988588507,
|
21953 |
+
"loss": 1.2369,
|
21954 |
+
"step": 156700
|
21955 |
+
},
|
21956 |
+
{
|
21957 |
+
"epoch": 4.680082405278715,
|
21958 |
+
"grad_norm": 4.2647624015808105,
|
21959 |
+
"learning_rate": 0.00015521574408996623,
|
21960 |
+
"loss": 1.1531,
|
21961 |
+
"step": 156750
|
21962 |
+
},
|
21963 |
+
{
|
21964 |
+
"epoch": 4.681575254530798,
|
21965 |
+
"grad_norm": 5.538541316986084,
|
21966 |
+
"learning_rate": 0.0001552014582940474,
|
21967 |
+
"loss": 1.1512,
|
21968 |
+
"step": 156800
|
21969 |
+
},
|
21970 |
+
{
|
21971 |
+
"epoch": 4.68306810378288,
|
21972 |
+
"grad_norm": 5.385500907897949,
|
21973 |
+
"learning_rate": 0.00015518717249812856,
|
21974 |
+
"loss": 1.1912,
|
21975 |
+
"step": 156850
|
21976 |
+
},
|
21977 |
+
{
|
21978 |
+
"epoch": 4.684560953034962,
|
21979 |
+
"grad_norm": 4.456097602844238,
|
21980 |
+
"learning_rate": 0.00015517288670220973,
|
21981 |
+
"loss": 1.266,
|
21982 |
+
"step": 156900
|
21983 |
+
},
|
21984 |
+
{
|
21985 |
+
"epoch": 4.686053802287045,
|
21986 |
+
"grad_norm": 4.176798343658447,
|
21987 |
+
"learning_rate": 0.00015515860090629092,
|
21988 |
+
"loss": 1.1689,
|
21989 |
+
"step": 156950
|
21990 |
+
},
|
21991 |
+
{
|
21992 |
+
"epoch": 4.687546651539128,
|
21993 |
+
"grad_norm": 4.8275980949401855,
|
21994 |
+
"learning_rate": 0.00015514431511037206,
|
21995 |
+
"loss": 1.2168,
|
21996 |
+
"step": 157000
|
21997 |
+
},
|
21998 |
+
{
|
21999 |
+
"epoch": 4.6890395007912105,
|
22000 |
+
"grad_norm": 4.710529327392578,
|
22001 |
+
"learning_rate": 0.00015513002931445325,
|
22002 |
+
"loss": 1.2356,
|
22003 |
+
"step": 157050
|
22004 |
+
},
|
22005 |
+
{
|
22006 |
+
"epoch": 4.690532350043292,
|
22007 |
+
"grad_norm": 3.7281603813171387,
|
22008 |
+
"learning_rate": 0.0001551157435185344,
|
22009 |
+
"loss": 1.1486,
|
22010 |
+
"step": 157100
|
22011 |
+
},
|
22012 |
+
{
|
22013 |
+
"epoch": 4.692025199295375,
|
22014 |
+
"grad_norm": 5.062285900115967,
|
22015 |
+
"learning_rate": 0.00015510145772261558,
|
22016 |
+
"loss": 1.215,
|
22017 |
+
"step": 157150
|
22018 |
+
},
|
22019 |
+
{
|
22020 |
+
"epoch": 4.693518048547458,
|
22021 |
+
"grad_norm": 3.41444993019104,
|
22022 |
+
"learning_rate": 0.00015508717192669672,
|
22023 |
+
"loss": 1.2052,
|
22024 |
+
"step": 157200
|
22025 |
+
},
|
22026 |
+
{
|
22027 |
+
"epoch": 4.69501089779954,
|
22028 |
+
"grad_norm": 3.7359328269958496,
|
22029 |
+
"learning_rate": 0.0001550728861307779,
|
22030 |
+
"loss": 1.2664,
|
22031 |
+
"step": 157250
|
22032 |
+
},
|
22033 |
+
{
|
22034 |
+
"epoch": 4.696503747051623,
|
22035 |
+
"grad_norm": 4.192359924316406,
|
22036 |
+
"learning_rate": 0.00015505860033485907,
|
22037 |
+
"loss": 1.2171,
|
22038 |
+
"step": 157300
|
22039 |
+
},
|
22040 |
+
{
|
22041 |
+
"epoch": 4.697996596303705,
|
22042 |
+
"grad_norm": 5.198591232299805,
|
22043 |
+
"learning_rate": 0.00015504431453894024,
|
22044 |
+
"loss": 1.2094,
|
22045 |
+
"step": 157350
|
22046 |
+
},
|
22047 |
+
{
|
22048 |
+
"epoch": 4.699489445555788,
|
22049 |
+
"grad_norm": 4.99566650390625,
|
22050 |
+
"learning_rate": 0.0001550300287430214,
|
22051 |
+
"loss": 1.1983,
|
22052 |
+
"step": 157400
|
22053 |
+
},
|
22054 |
+
{
|
22055 |
+
"epoch": 4.70098229480787,
|
22056 |
+
"grad_norm": 4.944712162017822,
|
22057 |
+
"learning_rate": 0.00015501574294710257,
|
22058 |
+
"loss": 1.2355,
|
22059 |
+
"step": 157450
|
22060 |
+
},
|
22061 |
+
{
|
22062 |
+
"epoch": 4.702475144059953,
|
22063 |
+
"grad_norm": 4.957249164581299,
|
22064 |
+
"learning_rate": 0.00015500145715118373,
|
22065 |
+
"loss": 1.1986,
|
22066 |
+
"step": 157500
|
22067 |
+
},
|
22068 |
+
{
|
22069 |
+
"epoch": 4.703967993312036,
|
22070 |
+
"grad_norm": 3.442450523376465,
|
22071 |
+
"learning_rate": 0.0001549871713552649,
|
22072 |
+
"loss": 1.2072,
|
22073 |
+
"step": 157550
|
22074 |
+
},
|
22075 |
+
{
|
22076 |
+
"epoch": 4.705460842564118,
|
22077 |
+
"grad_norm": 4.920107364654541,
|
22078 |
+
"learning_rate": 0.00015497288555934606,
|
22079 |
+
"loss": 1.2014,
|
22080 |
+
"step": 157600
|
22081 |
+
},
|
22082 |
+
{
|
22083 |
+
"epoch": 4.7069536918162,
|
22084 |
+
"grad_norm": 3.697744131088257,
|
22085 |
+
"learning_rate": 0.00015495859976342723,
|
22086 |
+
"loss": 1.1869,
|
22087 |
+
"step": 157650
|
22088 |
+
},
|
22089 |
+
{
|
22090 |
+
"epoch": 4.708446541068283,
|
22091 |
+
"grad_norm": 6.023653984069824,
|
22092 |
+
"learning_rate": 0.0001549443139675084,
|
22093 |
+
"loss": 1.2255,
|
22094 |
+
"step": 157700
|
22095 |
+
},
|
22096 |
+
{
|
22097 |
+
"epoch": 4.709939390320366,
|
22098 |
+
"grad_norm": 4.989515781402588,
|
22099 |
+
"learning_rate": 0.00015493002817158958,
|
22100 |
+
"loss": 1.1685,
|
22101 |
+
"step": 157750
|
22102 |
+
},
|
22103 |
+
{
|
22104 |
+
"epoch": 4.7114322395724475,
|
22105 |
+
"grad_norm": 3.7004759311676025,
|
22106 |
+
"learning_rate": 0.00015491574237567072,
|
22107 |
+
"loss": 1.1584,
|
22108 |
+
"step": 157800
|
22109 |
+
},
|
22110 |
+
{
|
22111 |
+
"epoch": 4.71292508882453,
|
22112 |
+
"grad_norm": 4.318152904510498,
|
22113 |
+
"learning_rate": 0.0001549014565797519,
|
22114 |
+
"loss": 1.2163,
|
22115 |
+
"step": 157850
|
22116 |
+
},
|
22117 |
+
{
|
22118 |
+
"epoch": 4.714417938076613,
|
22119 |
+
"grad_norm": 4.550259590148926,
|
22120 |
+
"learning_rate": 0.00015488717078383305,
|
22121 |
+
"loss": 1.2507,
|
22122 |
+
"step": 157900
|
22123 |
+
},
|
22124 |
+
{
|
22125 |
+
"epoch": 4.715910787328696,
|
22126 |
+
"grad_norm": 4.413512706756592,
|
22127 |
+
"learning_rate": 0.00015487288498791424,
|
22128 |
+
"loss": 1.2037,
|
22129 |
+
"step": 157950
|
22130 |
+
},
|
22131 |
+
{
|
22132 |
+
"epoch": 4.717403636580778,
|
22133 |
+
"grad_norm": 3.8001725673675537,
|
22134 |
+
"learning_rate": 0.00015485859919199538,
|
22135 |
+
"loss": 1.2543,
|
22136 |
+
"step": 158000
|
22137 |
+
},
|
22138 |
+
{
|
22139 |
+
"epoch": 4.718896485832861,
|
22140 |
+
"grad_norm": 3.4299263954162598,
|
22141 |
+
"learning_rate": 0.00015484431339607654,
|
22142 |
+
"loss": 1.1916,
|
22143 |
+
"step": 158050
|
22144 |
+
},
|
22145 |
+
{
|
22146 |
+
"epoch": 4.720389335084943,
|
22147 |
+
"grad_norm": 4.620875835418701,
|
22148 |
+
"learning_rate": 0.00015483002760015773,
|
22149 |
+
"loss": 1.2697,
|
22150 |
+
"step": 158100
|
22151 |
+
},
|
22152 |
+
{
|
22153 |
+
"epoch": 4.721882184337026,
|
22154 |
+
"grad_norm": 4.62977409362793,
|
22155 |
+
"learning_rate": 0.00015481574180423887,
|
22156 |
+
"loss": 1.1801,
|
22157 |
+
"step": 158150
|
22158 |
+
},
|
22159 |
+
{
|
22160 |
+
"epoch": 4.723375033589108,
|
22161 |
+
"grad_norm": 3.916433572769165,
|
22162 |
+
"learning_rate": 0.00015480145600832006,
|
22163 |
+
"loss": 1.1801,
|
22164 |
+
"step": 158200
|
22165 |
+
},
|
22166 |
+
{
|
22167 |
+
"epoch": 4.724867882841191,
|
22168 |
+
"grad_norm": 4.217404842376709,
|
22169 |
+
"learning_rate": 0.0001547871702124012,
|
22170 |
+
"loss": 1.2328,
|
22171 |
+
"step": 158250
|
22172 |
+
},
|
22173 |
+
{
|
22174 |
+
"epoch": 4.726360732093273,
|
22175 |
+
"grad_norm": 6.347287654876709,
|
22176 |
+
"learning_rate": 0.0001547728844164824,
|
22177 |
+
"loss": 1.207,
|
22178 |
+
"step": 158300
|
22179 |
+
},
|
22180 |
+
{
|
22181 |
+
"epoch": 4.7278535813453555,
|
22182 |
+
"grad_norm": 4.39561653137207,
|
22183 |
+
"learning_rate": 0.00015475859862056353,
|
22184 |
+
"loss": 1.1908,
|
22185 |
+
"step": 158350
|
22186 |
+
},
|
22187 |
+
{
|
22188 |
+
"epoch": 4.729346430597438,
|
22189 |
+
"grad_norm": 4.472975730895996,
|
22190 |
+
"learning_rate": 0.00015474431282464472,
|
22191 |
+
"loss": 1.2007,
|
22192 |
+
"step": 158400
|
22193 |
+
},
|
22194 |
+
{
|
22195 |
+
"epoch": 4.730839279849521,
|
22196 |
+
"grad_norm": 4.133266448974609,
|
22197 |
+
"learning_rate": 0.0001547300270287259,
|
22198 |
+
"loss": 1.2425,
|
22199 |
+
"step": 158450
|
22200 |
+
},
|
22201 |
+
{
|
22202 |
+
"epoch": 4.732332129101604,
|
22203 |
+
"grad_norm": 3.9597208499908447,
|
22204 |
+
"learning_rate": 0.00015471574123280705,
|
22205 |
+
"loss": 1.2095,
|
22206 |
+
"step": 158500
|
22207 |
+
},
|
22208 |
+
{
|
22209 |
+
"epoch": 4.733824978353686,
|
22210 |
+
"grad_norm": 4.040492057800293,
|
22211 |
+
"learning_rate": 0.00015470145543688822,
|
22212 |
+
"loss": 1.1701,
|
22213 |
+
"step": 158550
|
22214 |
+
},
|
22215 |
+
{
|
22216 |
+
"epoch": 4.735317827605768,
|
22217 |
+
"grad_norm": 4.925788402557373,
|
22218 |
+
"learning_rate": 0.00015468716964096938,
|
22219 |
+
"loss": 1.2441,
|
22220 |
+
"step": 158600
|
22221 |
+
},
|
22222 |
+
{
|
22223 |
+
"epoch": 4.736810676857851,
|
22224 |
+
"grad_norm": 3.6005380153656006,
|
22225 |
+
"learning_rate": 0.00015467288384505055,
|
22226 |
+
"loss": 1.2654,
|
22227 |
+
"step": 158650
|
22228 |
+
},
|
22229 |
+
{
|
22230 |
+
"epoch": 4.7383035261099336,
|
22231 |
+
"grad_norm": 4.446198463439941,
|
22232 |
+
"learning_rate": 0.0001546585980491317,
|
22233 |
+
"loss": 1.2385,
|
22234 |
+
"step": 158700
|
22235 |
+
},
|
22236 |
+
{
|
22237 |
+
"epoch": 4.739796375362016,
|
22238 |
+
"grad_norm": 4.482244491577148,
|
22239 |
+
"learning_rate": 0.00015464431225321288,
|
22240 |
+
"loss": 1.2554,
|
22241 |
+
"step": 158750
|
22242 |
+
},
|
22243 |
+
{
|
22244 |
+
"epoch": 4.741289224614098,
|
22245 |
+
"grad_norm": 3.8426311016082764,
|
22246 |
+
"learning_rate": 0.00015463002645729404,
|
22247 |
+
"loss": 1.1721,
|
22248 |
+
"step": 158800
|
22249 |
+
},
|
22250 |
+
{
|
22251 |
+
"epoch": 4.742782073866181,
|
22252 |
+
"grad_norm": 5.686476707458496,
|
22253 |
+
"learning_rate": 0.0001546157406613752,
|
22254 |
+
"loss": 1.2192,
|
22255 |
+
"step": 158850
|
22256 |
+
},
|
22257 |
+
{
|
22258 |
+
"epoch": 4.7442749231182635,
|
22259 |
+
"grad_norm": 5.65896463394165,
|
22260 |
+
"learning_rate": 0.0001546014548654564,
|
22261 |
+
"loss": 1.1794,
|
22262 |
+
"step": 158900
|
22263 |
+
},
|
22264 |
+
{
|
22265 |
+
"epoch": 4.745767772370346,
|
22266 |
+
"grad_norm": 7.737217426300049,
|
22267 |
+
"learning_rate": 0.00015458716906953753,
|
22268 |
+
"loss": 1.259,
|
22269 |
+
"step": 158950
|
22270 |
+
},
|
22271 |
+
{
|
22272 |
+
"epoch": 4.747260621622429,
|
22273 |
+
"grad_norm": 5.426517963409424,
|
22274 |
+
"learning_rate": 0.00015457288327361873,
|
22275 |
+
"loss": 1.2547,
|
22276 |
+
"step": 159000
|
22277 |
+
},
|
22278 |
+
{
|
22279 |
+
"epoch": 4.748753470874511,
|
22280 |
+
"grad_norm": 4.5140509605407715,
|
22281 |
+
"learning_rate": 0.00015455859747769986,
|
22282 |
+
"loss": 1.2253,
|
22283 |
+
"step": 159050
|
22284 |
+
},
|
22285 |
+
{
|
22286 |
+
"epoch": 4.750246320126593,
|
22287 |
+
"grad_norm": 4.854976177215576,
|
22288 |
+
"learning_rate": 0.00015454431168178105,
|
22289 |
+
"loss": 1.2273,
|
22290 |
+
"step": 159100
|
22291 |
+
},
|
22292 |
+
{
|
22293 |
+
"epoch": 4.751739169378676,
|
22294 |
+
"grad_norm": 5.603672504425049,
|
22295 |
+
"learning_rate": 0.00015453002588586222,
|
22296 |
+
"loss": 1.2483,
|
22297 |
+
"step": 159150
|
22298 |
+
},
|
22299 |
+
{
|
22300 |
+
"epoch": 4.753232018630759,
|
22301 |
+
"grad_norm": 5.602535247802734,
|
22302 |
+
"learning_rate": 0.00015451574008994338,
|
22303 |
+
"loss": 1.2304,
|
22304 |
+
"step": 159200
|
22305 |
+
},
|
22306 |
+
{
|
22307 |
+
"epoch": 4.7547248678828415,
|
22308 |
+
"grad_norm": 5.0422773361206055,
|
22309 |
+
"learning_rate": 0.00015450145429402455,
|
22310 |
+
"loss": 1.2187,
|
22311 |
+
"step": 159250
|
22312 |
+
},
|
22313 |
+
{
|
22314 |
+
"epoch": 4.756217717134923,
|
22315 |
+
"grad_norm": 4.779214859008789,
|
22316 |
+
"learning_rate": 0.0001544871684981057,
|
22317 |
+
"loss": 1.1922,
|
22318 |
+
"step": 159300
|
22319 |
+
},
|
22320 |
+
{
|
22321 |
+
"epoch": 4.757710566387006,
|
22322 |
+
"grad_norm": 3.4088544845581055,
|
22323 |
+
"learning_rate": 0.00015447288270218688,
|
22324 |
+
"loss": 1.2072,
|
22325 |
+
"step": 159350
|
22326 |
+
},
|
22327 |
+
{
|
22328 |
+
"epoch": 4.759203415639089,
|
22329 |
+
"grad_norm": 5.209439277648926,
|
22330 |
+
"learning_rate": 0.00015445859690626804,
|
22331 |
+
"loss": 1.2119,
|
22332 |
+
"step": 159400
|
22333 |
+
},
|
22334 |
+
{
|
22335 |
+
"epoch": 4.7606962648911715,
|
22336 |
+
"grad_norm": 4.001313209533691,
|
22337 |
+
"learning_rate": 0.0001544443111103492,
|
22338 |
+
"loss": 1.2626,
|
22339 |
+
"step": 159450
|
22340 |
+
},
|
22341 |
+
{
|
22342 |
+
"epoch": 4.762189114143254,
|
22343 |
+
"grad_norm": 3.787076234817505,
|
22344 |
+
"learning_rate": 0.00015443002531443037,
|
22345 |
+
"loss": 1.2197,
|
22346 |
+
"step": 159500
|
22347 |
+
},
|
22348 |
+
{
|
22349 |
+
"epoch": 4.763681963395336,
|
22350 |
+
"grad_norm": 4.906338214874268,
|
22351 |
+
"learning_rate": 0.00015441573951851154,
|
22352 |
+
"loss": 1.2964,
|
22353 |
+
"step": 159550
|
22354 |
+
},
|
22355 |
+
{
|
22356 |
+
"epoch": 4.765174812647419,
|
22357 |
+
"grad_norm": 5.104478359222412,
|
22358 |
+
"learning_rate": 0.00015440145372259273,
|
22359 |
+
"loss": 1.23,
|
22360 |
+
"step": 159600
|
22361 |
+
},
|
22362 |
+
{
|
22363 |
+
"epoch": 4.766667661899501,
|
22364 |
+
"grad_norm": 4.3545918464660645,
|
22365 |
+
"learning_rate": 0.00015438716792667387,
|
22366 |
+
"loss": 1.2015,
|
22367 |
+
"step": 159650
|
22368 |
+
},
|
22369 |
+
{
|
22370 |
+
"epoch": 4.768160511151584,
|
22371 |
+
"grad_norm": 4.524704456329346,
|
22372 |
+
"learning_rate": 0.00015437288213075506,
|
22373 |
+
"loss": 1.2358,
|
22374 |
+
"step": 159700
|
22375 |
+
},
|
22376 |
+
{
|
22377 |
+
"epoch": 4.769653360403666,
|
22378 |
+
"grad_norm": 4.5480637550354,
|
22379 |
+
"learning_rate": 0.0001543585963348362,
|
22380 |
+
"loss": 1.1828,
|
22381 |
+
"step": 159750
|
22382 |
+
},
|
22383 |
+
{
|
22384 |
+
"epoch": 4.771146209655749,
|
22385 |
+
"grad_norm": 4.825510501861572,
|
22386 |
+
"learning_rate": 0.0001543443105389174,
|
22387 |
+
"loss": 1.1657,
|
22388 |
+
"step": 159800
|
22389 |
+
},
|
22390 |
+
{
|
22391 |
+
"epoch": 4.772639058907831,
|
22392 |
+
"grad_norm": 3.6792662143707275,
|
22393 |
+
"learning_rate": 0.00015433002474299852,
|
22394 |
+
"loss": 1.2789,
|
22395 |
+
"step": 159850
|
22396 |
+
},
|
22397 |
+
{
|
22398 |
+
"epoch": 4.774131908159914,
|
22399 |
+
"grad_norm": 4.740036964416504,
|
22400 |
+
"learning_rate": 0.00015431573894707972,
|
22401 |
+
"loss": 1.2292,
|
22402 |
+
"step": 159900
|
22403 |
+
},
|
22404 |
+
{
|
22405 |
+
"epoch": 4.775624757411997,
|
22406 |
+
"grad_norm": 5.538552761077881,
|
22407 |
+
"learning_rate": 0.00015430145315116088,
|
22408 |
+
"loss": 1.1691,
|
22409 |
+
"step": 159950
|
22410 |
+
},
|
22411 |
+
{
|
22412 |
+
"epoch": 4.7771176066640795,
|
22413 |
+
"grad_norm": 4.624987602233887,
|
22414 |
+
"learning_rate": 0.00015428716735524205,
|
22415 |
+
"loss": 1.2157,
|
22416 |
+
"step": 160000
|
22417 |
}
|
22418 |
],
|
22419 |
"logging_steps": 50,
|
|
|
22433 |
"attributes": {}
|
22434 |
}
|
22435 |
},
|
22436 |
+
"total_flos": 4.0396838483022316e+18,
|
22437 |
"train_batch_size": 2,
|
22438 |
"trial_name": null,
|
22439 |
"trial_params": null
|