Training in progress, step 140000, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step140000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step140000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/trainer_state.json +1403 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 42002584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7be691eaf282bf25064ee3473aaeb025e9deec8018cf1bbb96477efef4e9f02
|
3 |
size 42002584
|
last-checkpoint/global_step140000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c197028331b969712006da6a095d71f916f44905e7b83b2ecb270a4fe4656482
|
3 |
+
size 251710672
|
last-checkpoint/global_step140000/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79dc08cbd56da6ba5c99743c1aad9b3100799c6bd1e0fcbaf797d35378ac156f
|
3 |
+
size 153747385
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step140000
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86c469f6100d588a6046fb5482072daba9d88dc1761bd5af2080993ae538413c
|
3 |
size 14244
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -18214,6 +18214,1406 @@
|
|
18214 |
"learning_rate": 0.00016285864490654233,
|
18215 |
"loss": 1.2988,
|
18216 |
"step": 130000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18217 |
}
|
18218 |
],
|
18219 |
"logging_steps": 50,
|
@@ -18233,7 +19633,7 @@
|
|
18233 |
"attributes": {}
|
18234 |
}
|
18235 |
},
|
18236 |
-
"total_flos": 3.
|
18237 |
"train_batch_size": 2,
|
18238 |
"trial_name": null,
|
18239 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 4.179977905831069,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 140000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
18214 |
"learning_rate": 0.00016285864490654233,
|
18215 |
"loss": 1.2988,
|
18216 |
"step": 130000
|
18217 |
+
},
|
18218 |
+
{
|
18219 |
+
"epoch": 3.8829009046666467,
|
18220 |
+
"grad_norm": 3.8731918334960938,
|
18221 |
+
"learning_rate": 0.0001628443591106235,
|
18222 |
+
"loss": 1.2158,
|
18223 |
+
"step": 130050
|
18224 |
+
},
|
18225 |
+
{
|
18226 |
+
"epoch": 3.8843937539187294,
|
18227 |
+
"grad_norm": 4.860089302062988,
|
18228 |
+
"learning_rate": 0.00016283007331470466,
|
18229 |
+
"loss": 1.3152,
|
18230 |
+
"step": 130100
|
18231 |
+
},
|
18232 |
+
{
|
18233 |
+
"epoch": 3.8858866031708117,
|
18234 |
+
"grad_norm": 3.8552567958831787,
|
18235 |
+
"learning_rate": 0.00016281578751878583,
|
18236 |
+
"loss": 1.2038,
|
18237 |
+
"step": 130150
|
18238 |
+
},
|
18239 |
+
{
|
18240 |
+
"epoch": 3.8873794524228944,
|
18241 |
+
"grad_norm": 3.8789255619049072,
|
18242 |
+
"learning_rate": 0.000162801501722867,
|
18243 |
+
"loss": 1.1898,
|
18244 |
+
"step": 130200
|
18245 |
+
},
|
18246 |
+
{
|
18247 |
+
"epoch": 3.8888723016749767,
|
18248 |
+
"grad_norm": 5.833735466003418,
|
18249 |
+
"learning_rate": 0.00016278721592694815,
|
18250 |
+
"loss": 1.327,
|
18251 |
+
"step": 130250
|
18252 |
+
},
|
18253 |
+
{
|
18254 |
+
"epoch": 3.8903651509270594,
|
18255 |
+
"grad_norm": 5.1781463623046875,
|
18256 |
+
"learning_rate": 0.00016277293013102932,
|
18257 |
+
"loss": 1.3002,
|
18258 |
+
"step": 130300
|
18259 |
+
},
|
18260 |
+
{
|
18261 |
+
"epoch": 3.891858000179142,
|
18262 |
+
"grad_norm": 5.102818012237549,
|
18263 |
+
"learning_rate": 0.0001627586443351105,
|
18264 |
+
"loss": 1.257,
|
18265 |
+
"step": 130350
|
18266 |
+
},
|
18267 |
+
{
|
18268 |
+
"epoch": 3.8933508494312243,
|
18269 |
+
"grad_norm": 4.642506122589111,
|
18270 |
+
"learning_rate": 0.00016274435853919165,
|
18271 |
+
"loss": 1.2387,
|
18272 |
+
"step": 130400
|
18273 |
+
},
|
18274 |
+
{
|
18275 |
+
"epoch": 3.894843698683307,
|
18276 |
+
"grad_norm": 4.327921390533447,
|
18277 |
+
"learning_rate": 0.00016273007274327284,
|
18278 |
+
"loss": 1.2563,
|
18279 |
+
"step": 130450
|
18280 |
+
},
|
18281 |
+
{
|
18282 |
+
"epoch": 3.8963365479353893,
|
18283 |
+
"grad_norm": 4.99731969833374,
|
18284 |
+
"learning_rate": 0.00016271578694735398,
|
18285 |
+
"loss": 1.2621,
|
18286 |
+
"step": 130500
|
18287 |
+
},
|
18288 |
+
{
|
18289 |
+
"epoch": 3.897829397187472,
|
18290 |
+
"grad_norm": 3.6538639068603516,
|
18291 |
+
"learning_rate": 0.00016270150115143517,
|
18292 |
+
"loss": 1.2284,
|
18293 |
+
"step": 130550
|
18294 |
+
},
|
18295 |
+
{
|
18296 |
+
"epoch": 3.8993222464395547,
|
18297 |
+
"grad_norm": 4.081072807312012,
|
18298 |
+
"learning_rate": 0.0001626872153555163,
|
18299 |
+
"loss": 1.2901,
|
18300 |
+
"step": 130600
|
18301 |
+
},
|
18302 |
+
{
|
18303 |
+
"epoch": 3.900815095691637,
|
18304 |
+
"grad_norm": 5.384579658508301,
|
18305 |
+
"learning_rate": 0.0001626729295595975,
|
18306 |
+
"loss": 1.2898,
|
18307 |
+
"step": 130650
|
18308 |
+
},
|
18309 |
+
{
|
18310 |
+
"epoch": 3.9023079449437197,
|
18311 |
+
"grad_norm": 4.1314520835876465,
|
18312 |
+
"learning_rate": 0.00016265864376367866,
|
18313 |
+
"loss": 1.2692,
|
18314 |
+
"step": 130700
|
18315 |
+
},
|
18316 |
+
{
|
18317 |
+
"epoch": 3.903800794195802,
|
18318 |
+
"grad_norm": 5.35874080657959,
|
18319 |
+
"learning_rate": 0.00016264435796775983,
|
18320 |
+
"loss": 1.2377,
|
18321 |
+
"step": 130750
|
18322 |
+
},
|
18323 |
+
{
|
18324 |
+
"epoch": 3.9052936434478847,
|
18325 |
+
"grad_norm": 4.041543006896973,
|
18326 |
+
"learning_rate": 0.000162630072171841,
|
18327 |
+
"loss": 1.2757,
|
18328 |
+
"step": 130800
|
18329 |
+
},
|
18330 |
+
{
|
18331 |
+
"epoch": 3.9067864926999674,
|
18332 |
+
"grad_norm": 4.784633636474609,
|
18333 |
+
"learning_rate": 0.00016261578637592216,
|
18334 |
+
"loss": 1.234,
|
18335 |
+
"step": 130850
|
18336 |
+
},
|
18337 |
+
{
|
18338 |
+
"epoch": 3.9082793419520496,
|
18339 |
+
"grad_norm": 4.274779319763184,
|
18340 |
+
"learning_rate": 0.00016260150058000332,
|
18341 |
+
"loss": 1.2506,
|
18342 |
+
"step": 130900
|
18343 |
+
},
|
18344 |
+
{
|
18345 |
+
"epoch": 3.9097721912041323,
|
18346 |
+
"grad_norm": 4.858269214630127,
|
18347 |
+
"learning_rate": 0.0001625872147840845,
|
18348 |
+
"loss": 1.2345,
|
18349 |
+
"step": 130950
|
18350 |
+
},
|
18351 |
+
{
|
18352 |
+
"epoch": 3.9112650404562146,
|
18353 |
+
"grad_norm": 3.8560070991516113,
|
18354 |
+
"learning_rate": 0.00016257292898816565,
|
18355 |
+
"loss": 1.2689,
|
18356 |
+
"step": 131000
|
18357 |
+
},
|
18358 |
+
{
|
18359 |
+
"epoch": 3.9127578897082973,
|
18360 |
+
"grad_norm": 4.95020866394043,
|
18361 |
+
"learning_rate": 0.00016255864319224682,
|
18362 |
+
"loss": 1.2156,
|
18363 |
+
"step": 131050
|
18364 |
+
},
|
18365 |
+
{
|
18366 |
+
"epoch": 3.91425073896038,
|
18367 |
+
"grad_norm": 4.7120795249938965,
|
18368 |
+
"learning_rate": 0.00016254435739632798,
|
18369 |
+
"loss": 1.3119,
|
18370 |
+
"step": 131100
|
18371 |
+
},
|
18372 |
+
{
|
18373 |
+
"epoch": 3.9157435882124623,
|
18374 |
+
"grad_norm": 5.613494873046875,
|
18375 |
+
"learning_rate": 0.00016253007160040917,
|
18376 |
+
"loss": 1.2806,
|
18377 |
+
"step": 131150
|
18378 |
+
},
|
18379 |
+
{
|
18380 |
+
"epoch": 3.917236437464545,
|
18381 |
+
"grad_norm": 5.0516767501831055,
|
18382 |
+
"learning_rate": 0.0001625157858044903,
|
18383 |
+
"loss": 1.2254,
|
18384 |
+
"step": 131200
|
18385 |
+
},
|
18386 |
+
{
|
18387 |
+
"epoch": 3.9187292867166272,
|
18388 |
+
"grad_norm": 4.780202388763428,
|
18389 |
+
"learning_rate": 0.0001625015000085715,
|
18390 |
+
"loss": 1.2915,
|
18391 |
+
"step": 131250
|
18392 |
+
},
|
18393 |
+
{
|
18394 |
+
"epoch": 3.92022213596871,
|
18395 |
+
"grad_norm": 5.579587459564209,
|
18396 |
+
"learning_rate": 0.00016248721421265264,
|
18397 |
+
"loss": 1.2509,
|
18398 |
+
"step": 131300
|
18399 |
+
},
|
18400 |
+
{
|
18401 |
+
"epoch": 3.9217149852207926,
|
18402 |
+
"grad_norm": 4.543426990509033,
|
18403 |
+
"learning_rate": 0.00016247292841673383,
|
18404 |
+
"loss": 1.2974,
|
18405 |
+
"step": 131350
|
18406 |
+
},
|
18407 |
+
{
|
18408 |
+
"epoch": 3.923207834472875,
|
18409 |
+
"grad_norm": 5.310088634490967,
|
18410 |
+
"learning_rate": 0.000162458642620815,
|
18411 |
+
"loss": 1.2247,
|
18412 |
+
"step": 131400
|
18413 |
+
},
|
18414 |
+
{
|
18415 |
+
"epoch": 3.9247006837249576,
|
18416 |
+
"grad_norm": 4.900899887084961,
|
18417 |
+
"learning_rate": 0.00016244435682489616,
|
18418 |
+
"loss": 1.3038,
|
18419 |
+
"step": 131450
|
18420 |
+
},
|
18421 |
+
{
|
18422 |
+
"epoch": 3.92619353297704,
|
18423 |
+
"grad_norm": 4.816722869873047,
|
18424 |
+
"learning_rate": 0.00016243007102897732,
|
18425 |
+
"loss": 1.2484,
|
18426 |
+
"step": 131500
|
18427 |
+
},
|
18428 |
+
{
|
18429 |
+
"epoch": 3.9276863822291226,
|
18430 |
+
"grad_norm": 3.618678092956543,
|
18431 |
+
"learning_rate": 0.0001624157852330585,
|
18432 |
+
"loss": 1.3475,
|
18433 |
+
"step": 131550
|
18434 |
+
},
|
18435 |
+
{
|
18436 |
+
"epoch": 3.9291792314812053,
|
18437 |
+
"grad_norm": 5.137662410736084,
|
18438 |
+
"learning_rate": 0.00016240149943713965,
|
18439 |
+
"loss": 1.2443,
|
18440 |
+
"step": 131600
|
18441 |
+
},
|
18442 |
+
{
|
18443 |
+
"epoch": 3.9306720807332876,
|
18444 |
+
"grad_norm": 4.798673152923584,
|
18445 |
+
"learning_rate": 0.00016238721364122082,
|
18446 |
+
"loss": 1.2654,
|
18447 |
+
"step": 131650
|
18448 |
+
},
|
18449 |
+
{
|
18450 |
+
"epoch": 3.93216492998537,
|
18451 |
+
"grad_norm": 4.472255229949951,
|
18452 |
+
"learning_rate": 0.00016237292784530198,
|
18453 |
+
"loss": 1.2686,
|
18454 |
+
"step": 131700
|
18455 |
+
},
|
18456 |
+
{
|
18457 |
+
"epoch": 3.9336577792374525,
|
18458 |
+
"grad_norm": 4.61024284362793,
|
18459 |
+
"learning_rate": 0.00016235864204938315,
|
18460 |
+
"loss": 1.2491,
|
18461 |
+
"step": 131750
|
18462 |
+
},
|
18463 |
+
{
|
18464 |
+
"epoch": 3.9351506284895352,
|
18465 |
+
"grad_norm": 4.1179280281066895,
|
18466 |
+
"learning_rate": 0.0001623443562534643,
|
18467 |
+
"loss": 1.2044,
|
18468 |
+
"step": 131800
|
18469 |
+
},
|
18470 |
+
{
|
18471 |
+
"epoch": 3.9366434777416175,
|
18472 |
+
"grad_norm": 6.594708442687988,
|
18473 |
+
"learning_rate": 0.00016233007045754548,
|
18474 |
+
"loss": 1.2111,
|
18475 |
+
"step": 131850
|
18476 |
+
},
|
18477 |
+
{
|
18478 |
+
"epoch": 3.9381363269937,
|
18479 |
+
"grad_norm": 8.489596366882324,
|
18480 |
+
"learning_rate": 0.00016231578466162664,
|
18481 |
+
"loss": 1.2011,
|
18482 |
+
"step": 131900
|
18483 |
+
},
|
18484 |
+
{
|
18485 |
+
"epoch": 3.9396291762457825,
|
18486 |
+
"grad_norm": 5.426854133605957,
|
18487 |
+
"learning_rate": 0.0001623014988657078,
|
18488 |
+
"loss": 1.268,
|
18489 |
+
"step": 131950
|
18490 |
+
},
|
18491 |
+
{
|
18492 |
+
"epoch": 3.941122025497865,
|
18493 |
+
"grad_norm": 4.155928611755371,
|
18494 |
+
"learning_rate": 0.00016228721306978897,
|
18495 |
+
"loss": 1.2722,
|
18496 |
+
"step": 132000
|
18497 |
+
},
|
18498 |
+
{
|
18499 |
+
"epoch": 3.942614874749948,
|
18500 |
+
"grad_norm": 4.766868591308594,
|
18501 |
+
"learning_rate": 0.00016227292727387014,
|
18502 |
+
"loss": 1.288,
|
18503 |
+
"step": 132050
|
18504 |
+
},
|
18505 |
+
{
|
18506 |
+
"epoch": 3.94410772400203,
|
18507 |
+
"grad_norm": 4.16912317276001,
|
18508 |
+
"learning_rate": 0.0001622586414779513,
|
18509 |
+
"loss": 1.2261,
|
18510 |
+
"step": 132100
|
18511 |
+
},
|
18512 |
+
{
|
18513 |
+
"epoch": 3.945600573254113,
|
18514 |
+
"grad_norm": 6.41418981552124,
|
18515 |
+
"learning_rate": 0.00016224435568203247,
|
18516 |
+
"loss": 1.2414,
|
18517 |
+
"step": 132150
|
18518 |
+
},
|
18519 |
+
{
|
18520 |
+
"epoch": 3.947093422506195,
|
18521 |
+
"grad_norm": 4.736821174621582,
|
18522 |
+
"learning_rate": 0.00016223006988611366,
|
18523 |
+
"loss": 1.2481,
|
18524 |
+
"step": 132200
|
18525 |
+
},
|
18526 |
+
{
|
18527 |
+
"epoch": 3.948586271758278,
|
18528 |
+
"grad_norm": 4.794166564941406,
|
18529 |
+
"learning_rate": 0.0001622157840901948,
|
18530 |
+
"loss": 1.2831,
|
18531 |
+
"step": 132250
|
18532 |
+
},
|
18533 |
+
{
|
18534 |
+
"epoch": 3.9500791210103605,
|
18535 |
+
"grad_norm": 6.948697090148926,
|
18536 |
+
"learning_rate": 0.000162201498294276,
|
18537 |
+
"loss": 1.2545,
|
18538 |
+
"step": 132300
|
18539 |
+
},
|
18540 |
+
{
|
18541 |
+
"epoch": 3.951571970262443,
|
18542 |
+
"grad_norm": 5.114023208618164,
|
18543 |
+
"learning_rate": 0.00016218721249835712,
|
18544 |
+
"loss": 1.2809,
|
18545 |
+
"step": 132350
|
18546 |
+
},
|
18547 |
+
{
|
18548 |
+
"epoch": 3.9530648195145255,
|
18549 |
+
"grad_norm": 4.972701072692871,
|
18550 |
+
"learning_rate": 0.00016217292670243832,
|
18551 |
+
"loss": 1.2143,
|
18552 |
+
"step": 132400
|
18553 |
+
},
|
18554 |
+
{
|
18555 |
+
"epoch": 3.9545576687666077,
|
18556 |
+
"grad_norm": 3.860616445541382,
|
18557 |
+
"learning_rate": 0.00016215864090651945,
|
18558 |
+
"loss": 1.2209,
|
18559 |
+
"step": 132450
|
18560 |
+
},
|
18561 |
+
{
|
18562 |
+
"epoch": 3.9560505180186905,
|
18563 |
+
"grad_norm": 3.6322524547576904,
|
18564 |
+
"learning_rate": 0.00016214435511060065,
|
18565 |
+
"loss": 1.2252,
|
18566 |
+
"step": 132500
|
18567 |
+
},
|
18568 |
+
{
|
18569 |
+
"epoch": 3.957543367270773,
|
18570 |
+
"grad_norm": 5.153745174407959,
|
18571 |
+
"learning_rate": 0.0001621300693146818,
|
18572 |
+
"loss": 1.2748,
|
18573 |
+
"step": 132550
|
18574 |
+
},
|
18575 |
+
{
|
18576 |
+
"epoch": 3.9590362165228554,
|
18577 |
+
"grad_norm": 4.3787617683410645,
|
18578 |
+
"learning_rate": 0.00016211578351876297,
|
18579 |
+
"loss": 1.2634,
|
18580 |
+
"step": 132600
|
18581 |
+
},
|
18582 |
+
{
|
18583 |
+
"epoch": 3.960529065774938,
|
18584 |
+
"grad_norm": 4.335618019104004,
|
18585 |
+
"learning_rate": 0.00016210149772284414,
|
18586 |
+
"loss": 1.2661,
|
18587 |
+
"step": 132650
|
18588 |
+
},
|
18589 |
+
{
|
18590 |
+
"epoch": 3.9620219150270204,
|
18591 |
+
"grad_norm": 4.789446830749512,
|
18592 |
+
"learning_rate": 0.0001620872119269253,
|
18593 |
+
"loss": 1.2877,
|
18594 |
+
"step": 132700
|
18595 |
+
},
|
18596 |
+
{
|
18597 |
+
"epoch": 3.963514764279103,
|
18598 |
+
"grad_norm": 5.508213996887207,
|
18599 |
+
"learning_rate": 0.00016207292613100647,
|
18600 |
+
"loss": 1.2759,
|
18601 |
+
"step": 132750
|
18602 |
+
},
|
18603 |
+
{
|
18604 |
+
"epoch": 3.965007613531186,
|
18605 |
+
"grad_norm": 3.3553807735443115,
|
18606 |
+
"learning_rate": 0.00016205864033508763,
|
18607 |
+
"loss": 1.2907,
|
18608 |
+
"step": 132800
|
18609 |
+
},
|
18610 |
+
{
|
18611 |
+
"epoch": 3.966500462783268,
|
18612 |
+
"grad_norm": 4.773813247680664,
|
18613 |
+
"learning_rate": 0.0001620443545391688,
|
18614 |
+
"loss": 1.3115,
|
18615 |
+
"step": 132850
|
18616 |
+
},
|
18617 |
+
{
|
18618 |
+
"epoch": 3.9679933120353508,
|
18619 |
+
"grad_norm": 4.2718939781188965,
|
18620 |
+
"learning_rate": 0.00016203006874324996,
|
18621 |
+
"loss": 1.2467,
|
18622 |
+
"step": 132900
|
18623 |
+
},
|
18624 |
+
{
|
18625 |
+
"epoch": 3.969486161287433,
|
18626 |
+
"grad_norm": 4.36405086517334,
|
18627 |
+
"learning_rate": 0.00016201578294733113,
|
18628 |
+
"loss": 1.2536,
|
18629 |
+
"step": 132950
|
18630 |
+
},
|
18631 |
+
{
|
18632 |
+
"epoch": 3.9709790105395157,
|
18633 |
+
"grad_norm": 5.21968936920166,
|
18634 |
+
"learning_rate": 0.00016200149715141232,
|
18635 |
+
"loss": 1.3317,
|
18636 |
+
"step": 133000
|
18637 |
+
},
|
18638 |
+
{
|
18639 |
+
"epoch": 3.9724718597915984,
|
18640 |
+
"grad_norm": 3.792954683303833,
|
18641 |
+
"learning_rate": 0.00016198721135549346,
|
18642 |
+
"loss": 1.2041,
|
18643 |
+
"step": 133050
|
18644 |
+
},
|
18645 |
+
{
|
18646 |
+
"epoch": 3.9739647090436807,
|
18647 |
+
"grad_norm": 4.445356369018555,
|
18648 |
+
"learning_rate": 0.00016197292555957465,
|
18649 |
+
"loss": 1.2283,
|
18650 |
+
"step": 133100
|
18651 |
+
},
|
18652 |
+
{
|
18653 |
+
"epoch": 3.9754575582957634,
|
18654 |
+
"grad_norm": 4.6043548583984375,
|
18655 |
+
"learning_rate": 0.00016195863976365579,
|
18656 |
+
"loss": 1.328,
|
18657 |
+
"step": 133150
|
18658 |
+
},
|
18659 |
+
{
|
18660 |
+
"epoch": 3.9769504075478457,
|
18661 |
+
"grad_norm": 4.0124053955078125,
|
18662 |
+
"learning_rate": 0.00016194435396773698,
|
18663 |
+
"loss": 1.1772,
|
18664 |
+
"step": 133200
|
18665 |
+
},
|
18666 |
+
{
|
18667 |
+
"epoch": 3.9784432567999284,
|
18668 |
+
"grad_norm": 4.683640480041504,
|
18669 |
+
"learning_rate": 0.00016193006817181811,
|
18670 |
+
"loss": 1.2748,
|
18671 |
+
"step": 133250
|
18672 |
+
},
|
18673 |
+
{
|
18674 |
+
"epoch": 3.979936106052011,
|
18675 |
+
"grad_norm": 5.253026008605957,
|
18676 |
+
"learning_rate": 0.0001619157823758993,
|
18677 |
+
"loss": 1.2193,
|
18678 |
+
"step": 133300
|
18679 |
+
},
|
18680 |
+
{
|
18681 |
+
"epoch": 3.9814289553040934,
|
18682 |
+
"grad_norm": 4.60040283203125,
|
18683 |
+
"learning_rate": 0.00016190149657998047,
|
18684 |
+
"loss": 1.2518,
|
18685 |
+
"step": 133350
|
18686 |
+
},
|
18687 |
+
{
|
18688 |
+
"epoch": 3.982921804556176,
|
18689 |
+
"grad_norm": 5.973727226257324,
|
18690 |
+
"learning_rate": 0.00016188721078406164,
|
18691 |
+
"loss": 1.242,
|
18692 |
+
"step": 133400
|
18693 |
+
},
|
18694 |
+
{
|
18695 |
+
"epoch": 3.9844146538082583,
|
18696 |
+
"grad_norm": 5.09537410736084,
|
18697 |
+
"learning_rate": 0.0001618729249881428,
|
18698 |
+
"loss": 1.2484,
|
18699 |
+
"step": 133450
|
18700 |
+
},
|
18701 |
+
{
|
18702 |
+
"epoch": 3.985907503060341,
|
18703 |
+
"grad_norm": 4.428155899047852,
|
18704 |
+
"learning_rate": 0.00016185863919222397,
|
18705 |
+
"loss": 1.2112,
|
18706 |
+
"step": 133500
|
18707 |
+
},
|
18708 |
+
{
|
18709 |
+
"epoch": 3.9874003523124237,
|
18710 |
+
"grad_norm": 4.2854838371276855,
|
18711 |
+
"learning_rate": 0.00016184435339630513,
|
18712 |
+
"loss": 1.2402,
|
18713 |
+
"step": 133550
|
18714 |
+
},
|
18715 |
+
{
|
18716 |
+
"epoch": 3.988893201564506,
|
18717 |
+
"grad_norm": 4.689759254455566,
|
18718 |
+
"learning_rate": 0.0001618300676003863,
|
18719 |
+
"loss": 1.2705,
|
18720 |
+
"step": 133600
|
18721 |
+
},
|
18722 |
+
{
|
18723 |
+
"epoch": 3.9903860508165887,
|
18724 |
+
"grad_norm": 4.272946357727051,
|
18725 |
+
"learning_rate": 0.00016181578180446746,
|
18726 |
+
"loss": 1.2105,
|
18727 |
+
"step": 133650
|
18728 |
+
},
|
18729 |
+
{
|
18730 |
+
"epoch": 3.991878900068671,
|
18731 |
+
"grad_norm": 4.82036018371582,
|
18732 |
+
"learning_rate": 0.00016180149600854862,
|
18733 |
+
"loss": 1.2379,
|
18734 |
+
"step": 133700
|
18735 |
+
},
|
18736 |
+
{
|
18737 |
+
"epoch": 3.9933717493207537,
|
18738 |
+
"grad_norm": 5.448759078979492,
|
18739 |
+
"learning_rate": 0.0001617872102126298,
|
18740 |
+
"loss": 1.2077,
|
18741 |
+
"step": 133750
|
18742 |
+
},
|
18743 |
+
{
|
18744 |
+
"epoch": 3.9948645985728364,
|
18745 |
+
"grad_norm": 3.519653797149658,
|
18746 |
+
"learning_rate": 0.00016177292441671098,
|
18747 |
+
"loss": 1.2057,
|
18748 |
+
"step": 133800
|
18749 |
+
},
|
18750 |
+
{
|
18751 |
+
"epoch": 3.9963574478249186,
|
18752 |
+
"grad_norm": 4.229268550872803,
|
18753 |
+
"learning_rate": 0.00016175863862079212,
|
18754 |
+
"loss": 1.3109,
|
18755 |
+
"step": 133850
|
18756 |
+
},
|
18757 |
+
{
|
18758 |
+
"epoch": 3.997850297077001,
|
18759 |
+
"grad_norm": 3.772096633911133,
|
18760 |
+
"learning_rate": 0.0001617443528248733,
|
18761 |
+
"loss": 1.3218,
|
18762 |
+
"step": 133900
|
18763 |
+
},
|
18764 |
+
{
|
18765 |
+
"epoch": 3.9993431463290836,
|
18766 |
+
"grad_norm": 3.7670326232910156,
|
18767 |
+
"learning_rate": 0.00016173006702895445,
|
18768 |
+
"loss": 1.2882,
|
18769 |
+
"step": 133950
|
18770 |
+
},
|
18771 |
+
{
|
18772 |
+
"epoch": 4.000835995581166,
|
18773 |
+
"grad_norm": 4.392240524291992,
|
18774 |
+
"learning_rate": 0.00016171578123303564,
|
18775 |
+
"loss": 1.2225,
|
18776 |
+
"step": 134000
|
18777 |
+
},
|
18778 |
+
{
|
18779 |
+
"epoch": 4.002328844833249,
|
18780 |
+
"grad_norm": 4.192978382110596,
|
18781 |
+
"learning_rate": 0.00016170149543711678,
|
18782 |
+
"loss": 1.1469,
|
18783 |
+
"step": 134050
|
18784 |
+
},
|
18785 |
+
{
|
18786 |
+
"epoch": 4.003821694085331,
|
18787 |
+
"grad_norm": 4.702342987060547,
|
18788 |
+
"learning_rate": 0.00016168720964119797,
|
18789 |
+
"loss": 1.1452,
|
18790 |
+
"step": 134100
|
18791 |
+
},
|
18792 |
+
{
|
18793 |
+
"epoch": 4.0053145433374135,
|
18794 |
+
"grad_norm": 4.7361555099487305,
|
18795 |
+
"learning_rate": 0.00016167292384527913,
|
18796 |
+
"loss": 1.1871,
|
18797 |
+
"step": 134150
|
18798 |
+
},
|
18799 |
+
{
|
18800 |
+
"epoch": 4.006807392589496,
|
18801 |
+
"grad_norm": 3.4626450538635254,
|
18802 |
+
"learning_rate": 0.0001616586380493603,
|
18803 |
+
"loss": 1.159,
|
18804 |
+
"step": 134200
|
18805 |
+
},
|
18806 |
+
{
|
18807 |
+
"epoch": 4.008300241841579,
|
18808 |
+
"grad_norm": 4.5900654792785645,
|
18809 |
+
"learning_rate": 0.00016164435225344146,
|
18810 |
+
"loss": 1.2012,
|
18811 |
+
"step": 134250
|
18812 |
+
},
|
18813 |
+
{
|
18814 |
+
"epoch": 4.009793091093662,
|
18815 |
+
"grad_norm": 6.448458194732666,
|
18816 |
+
"learning_rate": 0.00016163006645752263,
|
18817 |
+
"loss": 1.1656,
|
18818 |
+
"step": 134300
|
18819 |
+
},
|
18820 |
+
{
|
18821 |
+
"epoch": 4.0112859403457435,
|
18822 |
+
"grad_norm": 4.073904991149902,
|
18823 |
+
"learning_rate": 0.0001616157806616038,
|
18824 |
+
"loss": 1.131,
|
18825 |
+
"step": 134350
|
18826 |
+
},
|
18827 |
+
{
|
18828 |
+
"epoch": 4.012778789597826,
|
18829 |
+
"grad_norm": 5.392414569854736,
|
18830 |
+
"learning_rate": 0.00016160149486568496,
|
18831 |
+
"loss": 1.1462,
|
18832 |
+
"step": 134400
|
18833 |
+
},
|
18834 |
+
{
|
18835 |
+
"epoch": 4.014271638849909,
|
18836 |
+
"grad_norm": 4.959211349487305,
|
18837 |
+
"learning_rate": 0.00016158720906976612,
|
18838 |
+
"loss": 1.1275,
|
18839 |
+
"step": 134450
|
18840 |
+
},
|
18841 |
+
{
|
18842 |
+
"epoch": 4.015764488101992,
|
18843 |
+
"grad_norm": 4.862436771392822,
|
18844 |
+
"learning_rate": 0.00016157292327384729,
|
18845 |
+
"loss": 1.1395,
|
18846 |
+
"step": 134500
|
18847 |
+
},
|
18848 |
+
{
|
18849 |
+
"epoch": 4.017257337354074,
|
18850 |
+
"grad_norm": 4.025382995605469,
|
18851 |
+
"learning_rate": 0.00016155863747792845,
|
18852 |
+
"loss": 1.1529,
|
18853 |
+
"step": 134550
|
18854 |
+
},
|
18855 |
+
{
|
18856 |
+
"epoch": 4.018750186606156,
|
18857 |
+
"grad_norm": 5.5243449211120605,
|
18858 |
+
"learning_rate": 0.00016154435168200961,
|
18859 |
+
"loss": 1.1552,
|
18860 |
+
"step": 134600
|
18861 |
+
},
|
18862 |
+
{
|
18863 |
+
"epoch": 4.020243035858239,
|
18864 |
+
"grad_norm": 5.48097038269043,
|
18865 |
+
"learning_rate": 0.00016153006588609078,
|
18866 |
+
"loss": 1.1542,
|
18867 |
+
"step": 134650
|
18868 |
+
},
|
18869 |
+
{
|
18870 |
+
"epoch": 4.0217358851103215,
|
18871 |
+
"grad_norm": 5.017364978790283,
|
18872 |
+
"learning_rate": 0.00016151578009017194,
|
18873 |
+
"loss": 1.1294,
|
18874 |
+
"step": 134700
|
18875 |
+
},
|
18876 |
+
{
|
18877 |
+
"epoch": 4.023228734362404,
|
18878 |
+
"grad_norm": 3.903317451477051,
|
18879 |
+
"learning_rate": 0.0001615014942942531,
|
18880 |
+
"loss": 1.1743,
|
18881 |
+
"step": 134750
|
18882 |
+
},
|
18883 |
+
{
|
18884 |
+
"epoch": 4.024721583614487,
|
18885 |
+
"grad_norm": 5.144746780395508,
|
18886 |
+
"learning_rate": 0.00016148720849833427,
|
18887 |
+
"loss": 1.184,
|
18888 |
+
"step": 134800
|
18889 |
+
},
|
18890 |
+
{
|
18891 |
+
"epoch": 4.026214432866569,
|
18892 |
+
"grad_norm": 3.201718330383301,
|
18893 |
+
"learning_rate": 0.00016147292270241547,
|
18894 |
+
"loss": 1.1294,
|
18895 |
+
"step": 134850
|
18896 |
+
},
|
18897 |
+
{
|
18898 |
+
"epoch": 4.0277072821186515,
|
18899 |
+
"grad_norm": 5.051429748535156,
|
18900 |
+
"learning_rate": 0.0001614586369064966,
|
18901 |
+
"loss": 1.1625,
|
18902 |
+
"step": 134900
|
18903 |
+
},
|
18904 |
+
{
|
18905 |
+
"epoch": 4.029200131370734,
|
18906 |
+
"grad_norm": 4.083528995513916,
|
18907 |
+
"learning_rate": 0.0001614443511105778,
|
18908 |
+
"loss": 1.1865,
|
18909 |
+
"step": 134950
|
18910 |
+
},
|
18911 |
+
{
|
18912 |
+
"epoch": 4.030692980622817,
|
18913 |
+
"grad_norm": 4.694931983947754,
|
18914 |
+
"learning_rate": 0.00016143006531465893,
|
18915 |
+
"loss": 1.1607,
|
18916 |
+
"step": 135000
|
18917 |
+
},
|
18918 |
+
{
|
18919 |
+
"epoch": 4.0321858298749,
|
18920 |
+
"grad_norm": 3.7577016353607178,
|
18921 |
+
"learning_rate": 0.00016141577951874012,
|
18922 |
+
"loss": 1.2009,
|
18923 |
+
"step": 135050
|
18924 |
+
},
|
18925 |
+
{
|
18926 |
+
"epoch": 4.033678679126981,
|
18927 |
+
"grad_norm": 3.913961887359619,
|
18928 |
+
"learning_rate": 0.00016140149372282126,
|
18929 |
+
"loss": 1.148,
|
18930 |
+
"step": 135100
|
18931 |
+
},
|
18932 |
+
{
|
18933 |
+
"epoch": 4.035171528379064,
|
18934 |
+
"grad_norm": 4.2388129234313965,
|
18935 |
+
"learning_rate": 0.00016138720792690245,
|
18936 |
+
"loss": 1.1673,
|
18937 |
+
"step": 135150
|
18938 |
+
},
|
18939 |
+
{
|
18940 |
+
"epoch": 4.036664377631147,
|
18941 |
+
"grad_norm": 4.703933238983154,
|
18942 |
+
"learning_rate": 0.00016137292213098362,
|
18943 |
+
"loss": 1.1249,
|
18944 |
+
"step": 135200
|
18945 |
+
},
|
18946 |
+
{
|
18947 |
+
"epoch": 4.0381572268832295,
|
18948 |
+
"grad_norm": 5.607854843139648,
|
18949 |
+
"learning_rate": 0.00016135863633506478,
|
18950 |
+
"loss": 1.1609,
|
18951 |
+
"step": 135250
|
18952 |
+
},
|
18953 |
+
{
|
18954 |
+
"epoch": 4.039650076135312,
|
18955 |
+
"grad_norm": 4.3828840255737305,
|
18956 |
+
"learning_rate": 0.00016134435053914595,
|
18957 |
+
"loss": 1.1788,
|
18958 |
+
"step": 135300
|
18959 |
+
},
|
18960 |
+
{
|
18961 |
+
"epoch": 4.041142925387394,
|
18962 |
+
"grad_norm": 5.778615474700928,
|
18963 |
+
"learning_rate": 0.0001613300647432271,
|
18964 |
+
"loss": 1.2101,
|
18965 |
+
"step": 135350
|
18966 |
+
},
|
18967 |
+
{
|
18968 |
+
"epoch": 4.042635774639477,
|
18969 |
+
"grad_norm": 4.011424541473389,
|
18970 |
+
"learning_rate": 0.00016131577894730828,
|
18971 |
+
"loss": 1.1991,
|
18972 |
+
"step": 135400
|
18973 |
+
},
|
18974 |
+
{
|
18975 |
+
"epoch": 4.0441286238915595,
|
18976 |
+
"grad_norm": 5.844552993774414,
|
18977 |
+
"learning_rate": 0.00016130149315138944,
|
18978 |
+
"loss": 1.2172,
|
18979 |
+
"step": 135450
|
18980 |
+
},
|
18981 |
+
{
|
18982 |
+
"epoch": 4.045621473143642,
|
18983 |
+
"grad_norm": 4.851437091827393,
|
18984 |
+
"learning_rate": 0.0001612872073554706,
|
18985 |
+
"loss": 1.2379,
|
18986 |
+
"step": 135500
|
18987 |
+
},
|
18988 |
+
{
|
18989 |
+
"epoch": 4.047114322395725,
|
18990 |
+
"grad_norm": 6.846874713897705,
|
18991 |
+
"learning_rate": 0.00016127292155955177,
|
18992 |
+
"loss": 1.1666,
|
18993 |
+
"step": 135550
|
18994 |
+
},
|
18995 |
+
{
|
18996 |
+
"epoch": 4.048607171647807,
|
18997 |
+
"grad_norm": 3.9584429264068604,
|
18998 |
+
"learning_rate": 0.00016125863576363294,
|
18999 |
+
"loss": 1.191,
|
19000 |
+
"step": 135600
|
19001 |
+
},
|
19002 |
+
{
|
19003 |
+
"epoch": 4.050100020899889,
|
19004 |
+
"grad_norm": 3.547799587249756,
|
19005 |
+
"learning_rate": 0.00016124434996771413,
|
19006 |
+
"loss": 1.234,
|
19007 |
+
"step": 135650
|
19008 |
+
},
|
19009 |
+
{
|
19010 |
+
"epoch": 4.051592870151972,
|
19011 |
+
"grad_norm": 4.4900641441345215,
|
19012 |
+
"learning_rate": 0.00016123006417179526,
|
19013 |
+
"loss": 1.1759,
|
19014 |
+
"step": 135700
|
19015 |
+
},
|
19016 |
+
{
|
19017 |
+
"epoch": 4.053085719404055,
|
19018 |
+
"grad_norm": 4.560009479522705,
|
19019 |
+
"learning_rate": 0.00016121577837587646,
|
19020 |
+
"loss": 1.1759,
|
19021 |
+
"step": 135750
|
19022 |
+
},
|
19023 |
+
{
|
19024 |
+
"epoch": 4.0545785686561375,
|
19025 |
+
"grad_norm": 4.02714204788208,
|
19026 |
+
"learning_rate": 0.0001612014925799576,
|
19027 |
+
"loss": 1.1788,
|
19028 |
+
"step": 135800
|
19029 |
+
},
|
19030 |
+
{
|
19031 |
+
"epoch": 4.056071417908219,
|
19032 |
+
"grad_norm": 4.854091644287109,
|
19033 |
+
"learning_rate": 0.00016118720678403879,
|
19034 |
+
"loss": 1.1621,
|
19035 |
+
"step": 135850
|
19036 |
+
},
|
19037 |
+
{
|
19038 |
+
"epoch": 4.057564267160302,
|
19039 |
+
"grad_norm": 4.28786563873291,
|
19040 |
+
"learning_rate": 0.00016117292098811992,
|
19041 |
+
"loss": 1.1694,
|
19042 |
+
"step": 135900
|
19043 |
+
},
|
19044 |
+
{
|
19045 |
+
"epoch": 4.059057116412385,
|
19046 |
+
"grad_norm": 5.054505825042725,
|
19047 |
+
"learning_rate": 0.00016115863519220111,
|
19048 |
+
"loss": 1.2124,
|
19049 |
+
"step": 135950
|
19050 |
+
},
|
19051 |
+
{
|
19052 |
+
"epoch": 4.0605499656644675,
|
19053 |
+
"grad_norm": 4.706223011016846,
|
19054 |
+
"learning_rate": 0.00016114434939628228,
|
19055 |
+
"loss": 1.1867,
|
19056 |
+
"step": 136000
|
19057 |
+
},
|
19058 |
+
{
|
19059 |
+
"epoch": 4.06204281491655,
|
19060 |
+
"grad_norm": 3.443118095397949,
|
19061 |
+
"learning_rate": 0.00016113006360036344,
|
19062 |
+
"loss": 1.1398,
|
19063 |
+
"step": 136050
|
19064 |
+
},
|
19065 |
+
{
|
19066 |
+
"epoch": 4.063535664168632,
|
19067 |
+
"grad_norm": 5.108642578125,
|
19068 |
+
"learning_rate": 0.0001611157778044446,
|
19069 |
+
"loss": 1.1902,
|
19070 |
+
"step": 136100
|
19071 |
+
},
|
19072 |
+
{
|
19073 |
+
"epoch": 4.065028513420715,
|
19074 |
+
"grad_norm": 5.258908271789551,
|
19075 |
+
"learning_rate": 0.00016110149200852577,
|
19076 |
+
"loss": 1.2093,
|
19077 |
+
"step": 136150
|
19078 |
+
},
|
19079 |
+
{
|
19080 |
+
"epoch": 4.066521362672797,
|
19081 |
+
"grad_norm": 3.8509016036987305,
|
19082 |
+
"learning_rate": 0.00016108720621260694,
|
19083 |
+
"loss": 1.1354,
|
19084 |
+
"step": 136200
|
19085 |
+
},
|
19086 |
+
{
|
19087 |
+
"epoch": 4.06801421192488,
|
19088 |
+
"grad_norm": 5.995665073394775,
|
19089 |
+
"learning_rate": 0.0001610729204166881,
|
19090 |
+
"loss": 1.1516,
|
19091 |
+
"step": 136250
|
19092 |
+
},
|
19093 |
+
{
|
19094 |
+
"epoch": 4.069507061176962,
|
19095 |
+
"grad_norm": 4.4564738273620605,
|
19096 |
+
"learning_rate": 0.00016105863462076927,
|
19097 |
+
"loss": 1.1405,
|
19098 |
+
"step": 136300
|
19099 |
+
},
|
19100 |
+
{
|
19101 |
+
"epoch": 4.070999910429045,
|
19102 |
+
"grad_norm": 7.378493309020996,
|
19103 |
+
"learning_rate": 0.00016104434882485043,
|
19104 |
+
"loss": 1.1817,
|
19105 |
+
"step": 136350
|
19106 |
+
},
|
19107 |
+
{
|
19108 |
+
"epoch": 4.072492759681127,
|
19109 |
+
"grad_norm": 4.1566009521484375,
|
19110 |
+
"learning_rate": 0.0001610300630289316,
|
19111 |
+
"loss": 1.1988,
|
19112 |
+
"step": 136400
|
19113 |
+
},
|
19114 |
+
{
|
19115 |
+
"epoch": 4.07398560893321,
|
19116 |
+
"grad_norm": 4.735561370849609,
|
19117 |
+
"learning_rate": 0.0001610157772330128,
|
19118 |
+
"loss": 1.1847,
|
19119 |
+
"step": 136450
|
19120 |
+
},
|
19121 |
+
{
|
19122 |
+
"epoch": 4.075478458185293,
|
19123 |
+
"grad_norm": 6.040157318115234,
|
19124 |
+
"learning_rate": 0.00016100149143709393,
|
19125 |
+
"loss": 1.1762,
|
19126 |
+
"step": 136500
|
19127 |
+
},
|
19128 |
+
{
|
19129 |
+
"epoch": 4.076971307437375,
|
19130 |
+
"grad_norm": 4.695454120635986,
|
19131 |
+
"learning_rate": 0.00016098720564117512,
|
19132 |
+
"loss": 1.2151,
|
19133 |
+
"step": 136550
|
19134 |
+
},
|
19135 |
+
{
|
19136 |
+
"epoch": 4.078464156689457,
|
19137 |
+
"grad_norm": 5.111282825469971,
|
19138 |
+
"learning_rate": 0.00016097291984525626,
|
19139 |
+
"loss": 1.2581,
|
19140 |
+
"step": 136600
|
19141 |
+
},
|
19142 |
+
{
|
19143 |
+
"epoch": 4.07995700594154,
|
19144 |
+
"grad_norm": 6.601406574249268,
|
19145 |
+
"learning_rate": 0.00016095863404933745,
|
19146 |
+
"loss": 1.1902,
|
19147 |
+
"step": 136650
|
19148 |
+
},
|
19149 |
+
{
|
19150 |
+
"epoch": 4.081449855193623,
|
19151 |
+
"grad_norm": 4.173478126525879,
|
19152 |
+
"learning_rate": 0.00016094434825341858,
|
19153 |
+
"loss": 1.1809,
|
19154 |
+
"step": 136700
|
19155 |
+
},
|
19156 |
+
{
|
19157 |
+
"epoch": 4.082942704445705,
|
19158 |
+
"grad_norm": 4.189050674438477,
|
19159 |
+
"learning_rate": 0.00016093006245749978,
|
19160 |
+
"loss": 1.1837,
|
19161 |
+
"step": 136750
|
19162 |
+
},
|
19163 |
+
{
|
19164 |
+
"epoch": 4.084435553697787,
|
19165 |
+
"grad_norm": 4.181232929229736,
|
19166 |
+
"learning_rate": 0.00016091577666158094,
|
19167 |
+
"loss": 1.124,
|
19168 |
+
"step": 136800
|
19169 |
+
},
|
19170 |
+
{
|
19171 |
+
"epoch": 4.08592840294987,
|
19172 |
+
"grad_norm": 6.015085697174072,
|
19173 |
+
"learning_rate": 0.0001609014908656621,
|
19174 |
+
"loss": 1.2253,
|
19175 |
+
"step": 136850
|
19176 |
+
},
|
19177 |
+
{
|
19178 |
+
"epoch": 4.087421252201953,
|
19179 |
+
"grad_norm": 4.773750305175781,
|
19180 |
+
"learning_rate": 0.00016088720506974327,
|
19181 |
+
"loss": 1.1498,
|
19182 |
+
"step": 136900
|
19183 |
+
},
|
19184 |
+
{
|
19185 |
+
"epoch": 4.088914101454035,
|
19186 |
+
"grad_norm": 4.635052680969238,
|
19187 |
+
"learning_rate": 0.00016087291927382443,
|
19188 |
+
"loss": 1.1946,
|
19189 |
+
"step": 136950
|
19190 |
+
},
|
19191 |
+
{
|
19192 |
+
"epoch": 4.090406950706118,
|
19193 |
+
"grad_norm": 3.5755093097686768,
|
19194 |
+
"learning_rate": 0.0001608586334779056,
|
19195 |
+
"loss": 1.178,
|
19196 |
+
"step": 137000
|
19197 |
+
},
|
19198 |
+
{
|
19199 |
+
"epoch": 4.0918997999582,
|
19200 |
+
"grad_norm": 5.496055603027344,
|
19201 |
+
"learning_rate": 0.00016084434768198676,
|
19202 |
+
"loss": 1.1409,
|
19203 |
+
"step": 137050
|
19204 |
+
},
|
19205 |
+
{
|
19206 |
+
"epoch": 4.093392649210283,
|
19207 |
+
"grad_norm": 4.117304801940918,
|
19208 |
+
"learning_rate": 0.00016083006188606793,
|
19209 |
+
"loss": 1.2,
|
19210 |
+
"step": 137100
|
19211 |
+
},
|
19212 |
+
{
|
19213 |
+
"epoch": 4.094885498462365,
|
19214 |
+
"grad_norm": 5.538839817047119,
|
19215 |
+
"learning_rate": 0.0001608157760901491,
|
19216 |
+
"loss": 1.1778,
|
19217 |
+
"step": 137150
|
19218 |
+
},
|
19219 |
+
{
|
19220 |
+
"epoch": 4.096378347714448,
|
19221 |
+
"grad_norm": 4.6166558265686035,
|
19222 |
+
"learning_rate": 0.00016080149029423026,
|
19223 |
+
"loss": 1.1797,
|
19224 |
+
"step": 137200
|
19225 |
+
},
|
19226 |
+
{
|
19227 |
+
"epoch": 4.097871196966531,
|
19228 |
+
"grad_norm": 4.904277324676514,
|
19229 |
+
"learning_rate": 0.00016078720449831142,
|
19230 |
+
"loss": 1.1951,
|
19231 |
+
"step": 137250
|
19232 |
+
},
|
19233 |
+
{
|
19234 |
+
"epoch": 4.0993640462186125,
|
19235 |
+
"grad_norm": 4.139499664306641,
|
19236 |
+
"learning_rate": 0.0001607729187023926,
|
19237 |
+
"loss": 1.1909,
|
19238 |
+
"step": 137300
|
19239 |
+
},
|
19240 |
+
{
|
19241 |
+
"epoch": 4.100856895470695,
|
19242 |
+
"grad_norm": 4.454082489013672,
|
19243 |
+
"learning_rate": 0.00016075863290647375,
|
19244 |
+
"loss": 1.1944,
|
19245 |
+
"step": 137350
|
19246 |
+
},
|
19247 |
+
{
|
19248 |
+
"epoch": 4.102349744722778,
|
19249 |
+
"grad_norm": 6.1854963302612305,
|
19250 |
+
"learning_rate": 0.00016074434711055492,
|
19251 |
+
"loss": 1.1934,
|
19252 |
+
"step": 137400
|
19253 |
+
},
|
19254 |
+
{
|
19255 |
+
"epoch": 4.103842593974861,
|
19256 |
+
"grad_norm": 3.4759559631347656,
|
19257 |
+
"learning_rate": 0.00016073006131463608,
|
19258 |
+
"loss": 1.1489,
|
19259 |
+
"step": 137450
|
19260 |
+
},
|
19261 |
+
{
|
19262 |
+
"epoch": 4.105335443226943,
|
19263 |
+
"grad_norm": 5.522613525390625,
|
19264 |
+
"learning_rate": 0.00016071577551871727,
|
19265 |
+
"loss": 1.1765,
|
19266 |
+
"step": 137500
|
19267 |
+
},
|
19268 |
+
{
|
19269 |
+
"epoch": 4.106828292479025,
|
19270 |
+
"grad_norm": 4.545987606048584,
|
19271 |
+
"learning_rate": 0.0001607014897227984,
|
19272 |
+
"loss": 1.2346,
|
19273 |
+
"step": 137550
|
19274 |
+
},
|
19275 |
+
{
|
19276 |
+
"epoch": 4.108321141731108,
|
19277 |
+
"grad_norm": 3.9791605472564697,
|
19278 |
+
"learning_rate": 0.0001606872039268796,
|
19279 |
+
"loss": 1.1124,
|
19280 |
+
"step": 137600
|
19281 |
+
},
|
19282 |
+
{
|
19283 |
+
"epoch": 4.109813990983191,
|
19284 |
+
"grad_norm": 5.259634971618652,
|
19285 |
+
"learning_rate": 0.00016067291813096074,
|
19286 |
+
"loss": 1.2323,
|
19287 |
+
"step": 137650
|
19288 |
+
},
|
19289 |
+
{
|
19290 |
+
"epoch": 4.111306840235273,
|
19291 |
+
"grad_norm": 3.549207925796509,
|
19292 |
+
"learning_rate": 0.00016065863233504193,
|
19293 |
+
"loss": 1.2269,
|
19294 |
+
"step": 137700
|
19295 |
+
},
|
19296 |
+
{
|
19297 |
+
"epoch": 4.112799689487356,
|
19298 |
+
"grad_norm": 4.890096664428711,
|
19299 |
+
"learning_rate": 0.00016064434653912307,
|
19300 |
+
"loss": 1.142,
|
19301 |
+
"step": 137750
|
19302 |
+
},
|
19303 |
+
{
|
19304 |
+
"epoch": 4.114292538739438,
|
19305 |
+
"grad_norm": 3.7870566844940186,
|
19306 |
+
"learning_rate": 0.00016063006074320426,
|
19307 |
+
"loss": 1.1446,
|
19308 |
+
"step": 137800
|
19309 |
+
},
|
19310 |
+
{
|
19311 |
+
"epoch": 4.1157853879915205,
|
19312 |
+
"grad_norm": 5.0174689292907715,
|
19313 |
+
"learning_rate": 0.00016061577494728543,
|
19314 |
+
"loss": 1.2022,
|
19315 |
+
"step": 137850
|
19316 |
+
},
|
19317 |
+
{
|
19318 |
+
"epoch": 4.117278237243603,
|
19319 |
+
"grad_norm": 4.542319297790527,
|
19320 |
+
"learning_rate": 0.0001606014891513666,
|
19321 |
+
"loss": 1.1809,
|
19322 |
+
"step": 137900
|
19323 |
+
},
|
19324 |
+
{
|
19325 |
+
"epoch": 4.118771086495686,
|
19326 |
+
"grad_norm": 5.590012073516846,
|
19327 |
+
"learning_rate": 0.00016058720335544776,
|
19328 |
+
"loss": 1.1811,
|
19329 |
+
"step": 137950
|
19330 |
+
},
|
19331 |
+
{
|
19332 |
+
"epoch": 4.120263935747769,
|
19333 |
+
"grad_norm": 4.569715976715088,
|
19334 |
+
"learning_rate": 0.00016057291755952892,
|
19335 |
+
"loss": 1.1715,
|
19336 |
+
"step": 138000
|
19337 |
+
},
|
19338 |
+
{
|
19339 |
+
"epoch": 4.12175678499985,
|
19340 |
+
"grad_norm": 3.725158214569092,
|
19341 |
+
"learning_rate": 0.00016055863176361008,
|
19342 |
+
"loss": 1.1825,
|
19343 |
+
"step": 138050
|
19344 |
+
},
|
19345 |
+
{
|
19346 |
+
"epoch": 4.123249634251933,
|
19347 |
+
"grad_norm": 3.941608428955078,
|
19348 |
+
"learning_rate": 0.00016054434596769125,
|
19349 |
+
"loss": 1.1482,
|
19350 |
+
"step": 138100
|
19351 |
+
},
|
19352 |
+
{
|
19353 |
+
"epoch": 4.124742483504016,
|
19354 |
+
"grad_norm": 4.545751094818115,
|
19355 |
+
"learning_rate": 0.00016053006017177241,
|
19356 |
+
"loss": 1.2089,
|
19357 |
+
"step": 138150
|
19358 |
+
},
|
19359 |
+
{
|
19360 |
+
"epoch": 4.1262353327560986,
|
19361 |
+
"grad_norm": 6.809751033782959,
|
19362 |
+
"learning_rate": 0.00016051577437585358,
|
19363 |
+
"loss": 1.1664,
|
19364 |
+
"step": 138200
|
19365 |
+
},
|
19366 |
+
{
|
19367 |
+
"epoch": 4.12772818200818,
|
19368 |
+
"grad_norm": 5.444116592407227,
|
19369 |
+
"learning_rate": 0.00016050148857993474,
|
19370 |
+
"loss": 1.2127,
|
19371 |
+
"step": 138250
|
19372 |
+
},
|
19373 |
+
{
|
19374 |
+
"epoch": 4.129221031260263,
|
19375 |
+
"grad_norm": 4.276831150054932,
|
19376 |
+
"learning_rate": 0.00016048720278401593,
|
19377 |
+
"loss": 1.2138,
|
19378 |
+
"step": 138300
|
19379 |
+
},
|
19380 |
+
{
|
19381 |
+
"epoch": 4.130713880512346,
|
19382 |
+
"grad_norm": 3.9716556072235107,
|
19383 |
+
"learning_rate": 0.00016047291698809707,
|
19384 |
+
"loss": 1.2237,
|
19385 |
+
"step": 138350
|
19386 |
+
},
|
19387 |
+
{
|
19388 |
+
"epoch": 4.1322067297644285,
|
19389 |
+
"grad_norm": 3.9793570041656494,
|
19390 |
+
"learning_rate": 0.00016045863119217826,
|
19391 |
+
"loss": 1.1997,
|
19392 |
+
"step": 138400
|
19393 |
+
},
|
19394 |
+
{
|
19395 |
+
"epoch": 4.133699579016511,
|
19396 |
+
"grad_norm": 4.590352535247803,
|
19397 |
+
"learning_rate": 0.0001604443453962594,
|
19398 |
+
"loss": 1.1631,
|
19399 |
+
"step": 138450
|
19400 |
+
},
|
19401 |
+
{
|
19402 |
+
"epoch": 4.135192428268593,
|
19403 |
+
"grad_norm": 3.9549121856689453,
|
19404 |
+
"learning_rate": 0.0001604300596003406,
|
19405 |
+
"loss": 1.1518,
|
19406 |
+
"step": 138500
|
19407 |
+
},
|
19408 |
+
{
|
19409 |
+
"epoch": 4.136685277520676,
|
19410 |
+
"grad_norm": 5.076075077056885,
|
19411 |
+
"learning_rate": 0.00016041577380442173,
|
19412 |
+
"loss": 1.1806,
|
19413 |
+
"step": 138550
|
19414 |
+
},
|
19415 |
+
{
|
19416 |
+
"epoch": 4.138178126772758,
|
19417 |
+
"grad_norm": 6.228057384490967,
|
19418 |
+
"learning_rate": 0.00016040148800850292,
|
19419 |
+
"loss": 1.1547,
|
19420 |
+
"step": 138600
|
19421 |
+
},
|
19422 |
+
{
|
19423 |
+
"epoch": 4.139670976024841,
|
19424 |
+
"grad_norm": 4.276813507080078,
|
19425 |
+
"learning_rate": 0.0001603872022125841,
|
19426 |
+
"loss": 1.1351,
|
19427 |
+
"step": 138650
|
19428 |
+
},
|
19429 |
+
{
|
19430 |
+
"epoch": 4.141163825276924,
|
19431 |
+
"grad_norm": 5.215205669403076,
|
19432 |
+
"learning_rate": 0.00016037291641666525,
|
19433 |
+
"loss": 1.2021,
|
19434 |
+
"step": 138700
|
19435 |
+
},
|
19436 |
+
{
|
19437 |
+
"epoch": 4.142656674529006,
|
19438 |
+
"grad_norm": 4.386962413787842,
|
19439 |
+
"learning_rate": 0.00016035863062074642,
|
19440 |
+
"loss": 1.2346,
|
19441 |
+
"step": 138750
|
19442 |
+
},
|
19443 |
+
{
|
19444 |
+
"epoch": 4.144149523781088,
|
19445 |
+
"grad_norm": 4.739329814910889,
|
19446 |
+
"learning_rate": 0.00016034434482482758,
|
19447 |
+
"loss": 1.1971,
|
19448 |
+
"step": 138800
|
19449 |
+
},
|
19450 |
+
{
|
19451 |
+
"epoch": 4.145642373033171,
|
19452 |
+
"grad_norm": 3.994267225265503,
|
19453 |
+
"learning_rate": 0.00016033005902890875,
|
19454 |
+
"loss": 1.2259,
|
19455 |
+
"step": 138850
|
19456 |
+
},
|
19457 |
+
{
|
19458 |
+
"epoch": 4.147135222285254,
|
19459 |
+
"grad_norm": 4.481820106506348,
|
19460 |
+
"learning_rate": 0.0001603157732329899,
|
19461 |
+
"loss": 1.2428,
|
19462 |
+
"step": 138900
|
19463 |
+
},
|
19464 |
+
{
|
19465 |
+
"epoch": 4.1486280715373365,
|
19466 |
+
"grad_norm": 5.454226016998291,
|
19467 |
+
"learning_rate": 0.00016030148743707108,
|
19468 |
+
"loss": 1.1596,
|
19469 |
+
"step": 138950
|
19470 |
+
},
|
19471 |
+
{
|
19472 |
+
"epoch": 4.150120920789418,
|
19473 |
+
"grad_norm": 5.072449684143066,
|
19474 |
+
"learning_rate": 0.00016028720164115224,
|
19475 |
+
"loss": 1.1755,
|
19476 |
+
"step": 139000
|
19477 |
+
},
|
19478 |
+
{
|
19479 |
+
"epoch": 4.151613770041501,
|
19480 |
+
"grad_norm": 4.940500259399414,
|
19481 |
+
"learning_rate": 0.0001602729158452334,
|
19482 |
+
"loss": 1.1765,
|
19483 |
+
"step": 139050
|
19484 |
+
},
|
19485 |
+
{
|
19486 |
+
"epoch": 4.153106619293584,
|
19487 |
+
"grad_norm": 4.294040203094482,
|
19488 |
+
"learning_rate": 0.0001602586300493146,
|
19489 |
+
"loss": 1.1638,
|
19490 |
+
"step": 139100
|
19491 |
+
},
|
19492 |
+
{
|
19493 |
+
"epoch": 4.154599468545666,
|
19494 |
+
"grad_norm": 3.98103928565979,
|
19495 |
+
"learning_rate": 0.00016024434425339573,
|
19496 |
+
"loss": 1.192,
|
19497 |
+
"step": 139150
|
19498 |
+
},
|
19499 |
+
{
|
19500 |
+
"epoch": 4.156092317797749,
|
19501 |
+
"grad_norm": 5.18140983581543,
|
19502 |
+
"learning_rate": 0.00016023005845747693,
|
19503 |
+
"loss": 1.2121,
|
19504 |
+
"step": 139200
|
19505 |
+
},
|
19506 |
+
{
|
19507 |
+
"epoch": 4.157585167049831,
|
19508 |
+
"grad_norm": 4.342568874359131,
|
19509 |
+
"learning_rate": 0.00016021577266155806,
|
19510 |
+
"loss": 1.2056,
|
19511 |
+
"step": 139250
|
19512 |
+
},
|
19513 |
+
{
|
19514 |
+
"epoch": 4.159078016301914,
|
19515 |
+
"grad_norm": 4.284645080566406,
|
19516 |
+
"learning_rate": 0.00016020148686563926,
|
19517 |
+
"loss": 1.1864,
|
19518 |
+
"step": 139300
|
19519 |
+
},
|
19520 |
+
{
|
19521 |
+
"epoch": 4.160570865553996,
|
19522 |
+
"grad_norm": 5.542572498321533,
|
19523 |
+
"learning_rate": 0.0001601872010697204,
|
19524 |
+
"loss": 1.1903,
|
19525 |
+
"step": 139350
|
19526 |
+
},
|
19527 |
+
{
|
19528 |
+
"epoch": 4.162063714806079,
|
19529 |
+
"grad_norm": 4.752786159515381,
|
19530 |
+
"learning_rate": 0.00016017291527380158,
|
19531 |
+
"loss": 1.2012,
|
19532 |
+
"step": 139400
|
19533 |
+
},
|
19534 |
+
{
|
19535 |
+
"epoch": 4.163556564058162,
|
19536 |
+
"grad_norm": 4.298015117645264,
|
19537 |
+
"learning_rate": 0.00016015862947788275,
|
19538 |
+
"loss": 1.2108,
|
19539 |
+
"step": 139450
|
19540 |
+
},
|
19541 |
+
{
|
19542 |
+
"epoch": 4.165049413310244,
|
19543 |
+
"grad_norm": 4.048568248748779,
|
19544 |
+
"learning_rate": 0.00016014434368196391,
|
19545 |
+
"loss": 1.2191,
|
19546 |
+
"step": 139500
|
19547 |
+
},
|
19548 |
+
{
|
19549 |
+
"epoch": 4.166542262562326,
|
19550 |
+
"grad_norm": 4.178681373596191,
|
19551 |
+
"learning_rate": 0.00016013005788604508,
|
19552 |
+
"loss": 1.1892,
|
19553 |
+
"step": 139550
|
19554 |
+
},
|
19555 |
+
{
|
19556 |
+
"epoch": 4.168035111814409,
|
19557 |
+
"grad_norm": 6.201132297515869,
|
19558 |
+
"learning_rate": 0.00016011577209012622,
|
19559 |
+
"loss": 1.1917,
|
19560 |
+
"step": 139600
|
19561 |
+
},
|
19562 |
+
{
|
19563 |
+
"epoch": 4.169527961066492,
|
19564 |
+
"grad_norm": 4.938151836395264,
|
19565 |
+
"learning_rate": 0.0001601014862942074,
|
19566 |
+
"loss": 1.1954,
|
19567 |
+
"step": 139650
|
19568 |
+
},
|
19569 |
+
{
|
19570 |
+
"epoch": 4.171020810318574,
|
19571 |
+
"grad_norm": 6.060943126678467,
|
19572 |
+
"learning_rate": 0.00016008720049828855,
|
19573 |
+
"loss": 1.1471,
|
19574 |
+
"step": 139700
|
19575 |
+
},
|
19576 |
+
{
|
19577 |
+
"epoch": 4.172513659570656,
|
19578 |
+
"grad_norm": 5.0269880294799805,
|
19579 |
+
"learning_rate": 0.00016007291470236974,
|
19580 |
+
"loss": 1.1868,
|
19581 |
+
"step": 139750
|
19582 |
+
},
|
19583 |
+
{
|
19584 |
+
"epoch": 4.174006508822739,
|
19585 |
+
"grad_norm": 4.509561538696289,
|
19586 |
+
"learning_rate": 0.0001600586289064509,
|
19587 |
+
"loss": 1.1906,
|
19588 |
+
"step": 139800
|
19589 |
+
},
|
19590 |
+
{
|
19591 |
+
"epoch": 4.175499358074822,
|
19592 |
+
"grad_norm": 4.10978364944458,
|
19593 |
+
"learning_rate": 0.00016004434311053207,
|
19594 |
+
"loss": 1.1515,
|
19595 |
+
"step": 139850
|
19596 |
+
},
|
19597 |
+
{
|
19598 |
+
"epoch": 4.176992207326904,
|
19599 |
+
"grad_norm": 3.7452332973480225,
|
19600 |
+
"learning_rate": 0.00016003005731461323,
|
19601 |
+
"loss": 1.1807,
|
19602 |
+
"step": 139900
|
19603 |
+
},
|
19604 |
+
{
|
19605 |
+
"epoch": 4.178485056578987,
|
19606 |
+
"grad_norm": 4.911968231201172,
|
19607 |
+
"learning_rate": 0.0001600157715186944,
|
19608 |
+
"loss": 1.1568,
|
19609 |
+
"step": 139950
|
19610 |
+
},
|
19611 |
+
{
|
19612 |
+
"epoch": 4.179977905831069,
|
19613 |
+
"grad_norm": 4.606587886810303,
|
19614 |
+
"learning_rate": 0.00016000148572277556,
|
19615 |
+
"loss": 1.2099,
|
19616 |
+
"step": 140000
|
19617 |
}
|
19618 |
],
|
19619 |
"logging_steps": 50,
|
|
|
19633 |
"attributes": {}
|
19634 |
}
|
19635 |
},
|
19636 |
+
"total_flos": 3.536269939317932e+18,
|
19637 |
"train_batch_size": 2,
|
19638 |
"trial_name": null,
|
19639 |
"trial_params": null
|