Training in progress, step 40000, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step40000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step40000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/trainer_state.json +1403 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 42002584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8ff350cb611f06224861beaded1ce40d9fe8bfeadaf0b0a892a03fd5d5020a2
|
3 |
size 42002584
|
last-checkpoint/global_step40000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfa9e54916a8686bb306eadba9f61d544cf521787dae09ea59e28ab3ad50188e
|
3 |
+
size 251710672
|
last-checkpoint/global_step40000/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c1ba8191513f58c930b0d730cf052323c7a5d0728ee0724ebc8904044c2ce128
|
3 |
+
size 153747385
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step40000
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ac4c1fbd7af888371177ba36b2926feb8e2e859d541c8a12caaa62f63cad240
|
3 |
size 14244
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4214,6 +4214,1406 @@
|
|
4214 |
"learning_rate": 0.00019143023674420996,
|
4215 |
"loss": 1.4109,
|
4216 |
"step": 30000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4217 |
}
|
4218 |
],
|
4219 |
"logging_steps": 50,
|
@@ -4233,7 +5633,7 @@
|
|
4233 |
"attributes": {}
|
4234 |
}
|
4235 |
},
|
4236 |
-
"total_flos":
|
4237 |
"train_batch_size": 2,
|
4238 |
"trial_name": null,
|
4239 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.1942794016660199,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 40000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4214 |
"learning_rate": 0.00019143023674420996,
|
4215 |
"loss": 1.4109,
|
4216 |
"step": 30000
|
4217 |
+
},
|
4218 |
+
{
|
4219 |
+
"epoch": 0.8972024005015974,
|
4220 |
+
"grad_norm": 4.6257758140563965,
|
4221 |
+
"learning_rate": 0.00019141595094829115,
|
4222 |
+
"loss": 1.3679,
|
4223 |
+
"step": 30050
|
4224 |
+
},
|
4225 |
+
{
|
4226 |
+
"epoch": 0.8986952497536799,
|
4227 |
+
"grad_norm": 4.422520637512207,
|
4228 |
+
"learning_rate": 0.0001914016651523723,
|
4229 |
+
"loss": 1.3893,
|
4230 |
+
"step": 30100
|
4231 |
+
},
|
4232 |
+
{
|
4233 |
+
"epoch": 0.9001880990057624,
|
4234 |
+
"grad_norm": 4.911538124084473,
|
4235 |
+
"learning_rate": 0.00019138737935645348,
|
4236 |
+
"loss": 1.3346,
|
4237 |
+
"step": 30150
|
4238 |
+
},
|
4239 |
+
{
|
4240 |
+
"epoch": 0.901680948257845,
|
4241 |
+
"grad_norm": 6.410045623779297,
|
4242 |
+
"learning_rate": 0.00019137309356053464,
|
4243 |
+
"loss": 1.3651,
|
4244 |
+
"step": 30200
|
4245 |
+
},
|
4246 |
+
{
|
4247 |
+
"epoch": 0.9031737975099274,
|
4248 |
+
"grad_norm": 3.8565444946289062,
|
4249 |
+
"learning_rate": 0.0001913588077646158,
|
4250 |
+
"loss": 1.3436,
|
4251 |
+
"step": 30250
|
4252 |
+
},
|
4253 |
+
{
|
4254 |
+
"epoch": 0.9046666467620099,
|
4255 |
+
"grad_norm": 4.632288455963135,
|
4256 |
+
"learning_rate": 0.00019134452196869697,
|
4257 |
+
"loss": 1.3858,
|
4258 |
+
"step": 30300
|
4259 |
+
},
|
4260 |
+
{
|
4261 |
+
"epoch": 0.9061594960140925,
|
4262 |
+
"grad_norm": 5.382070064544678,
|
4263 |
+
"learning_rate": 0.00019133023617277814,
|
4264 |
+
"loss": 1.3872,
|
4265 |
+
"step": 30350
|
4266 |
+
},
|
4267 |
+
{
|
4268 |
+
"epoch": 0.907652345266175,
|
4269 |
+
"grad_norm": 4.425014019012451,
|
4270 |
+
"learning_rate": 0.0001913159503768593,
|
4271 |
+
"loss": 1.4488,
|
4272 |
+
"step": 30400
|
4273 |
+
},
|
4274 |
+
{
|
4275 |
+
"epoch": 0.9091451945182576,
|
4276 |
+
"grad_norm": 3.7266223430633545,
|
4277 |
+
"learning_rate": 0.00019130166458094047,
|
4278 |
+
"loss": 1.376,
|
4279 |
+
"step": 30450
|
4280 |
+
},
|
4281 |
+
{
|
4282 |
+
"epoch": 0.9106380437703401,
|
4283 |
+
"grad_norm": 7.25547981262207,
|
4284 |
+
"learning_rate": 0.00019128737878502163,
|
4285 |
+
"loss": 1.3488,
|
4286 |
+
"step": 30500
|
4287 |
+
},
|
4288 |
+
{
|
4289 |
+
"epoch": 0.9121308930224226,
|
4290 |
+
"grad_norm": 5.824578285217285,
|
4291 |
+
"learning_rate": 0.0001912730929891028,
|
4292 |
+
"loss": 1.4001,
|
4293 |
+
"step": 30550
|
4294 |
+
},
|
4295 |
+
{
|
4296 |
+
"epoch": 0.9136237422745052,
|
4297 |
+
"grad_norm": 5.808544635772705,
|
4298 |
+
"learning_rate": 0.00019125880719318396,
|
4299 |
+
"loss": 1.3132,
|
4300 |
+
"step": 30600
|
4301 |
+
},
|
4302 |
+
{
|
4303 |
+
"epoch": 0.9151165915265876,
|
4304 |
+
"grad_norm": 4.343785285949707,
|
4305 |
+
"learning_rate": 0.00019124452139726515,
|
4306 |
+
"loss": 1.3592,
|
4307 |
+
"step": 30650
|
4308 |
+
},
|
4309 |
+
{
|
4310 |
+
"epoch": 0.9166094407786701,
|
4311 |
+
"grad_norm": 5.0626325607299805,
|
4312 |
+
"learning_rate": 0.0001912302356013463,
|
4313 |
+
"loss": 1.4418,
|
4314 |
+
"step": 30700
|
4315 |
+
},
|
4316 |
+
{
|
4317 |
+
"epoch": 0.9181022900307527,
|
4318 |
+
"grad_norm": 4.336055755615234,
|
4319 |
+
"learning_rate": 0.00019121594980542748,
|
4320 |
+
"loss": 1.3624,
|
4321 |
+
"step": 30750
|
4322 |
+
},
|
4323 |
+
{
|
4324 |
+
"epoch": 0.9195951392828352,
|
4325 |
+
"grad_norm": 6.215260982513428,
|
4326 |
+
"learning_rate": 0.00019120166400950862,
|
4327 |
+
"loss": 1.4053,
|
4328 |
+
"step": 30800
|
4329 |
+
},
|
4330 |
+
{
|
4331 |
+
"epoch": 0.9210879885349177,
|
4332 |
+
"grad_norm": 4.496364116668701,
|
4333 |
+
"learning_rate": 0.0001911873782135898,
|
4334 |
+
"loss": 1.4117,
|
4335 |
+
"step": 30850
|
4336 |
+
},
|
4337 |
+
{
|
4338 |
+
"epoch": 0.9225808377870003,
|
4339 |
+
"grad_norm": 4.023138046264648,
|
4340 |
+
"learning_rate": 0.00019117309241767095,
|
4341 |
+
"loss": 1.3783,
|
4342 |
+
"step": 30900
|
4343 |
+
},
|
4344 |
+
{
|
4345 |
+
"epoch": 0.9240736870390828,
|
4346 |
+
"grad_norm": 3.8177783489227295,
|
4347 |
+
"learning_rate": 0.00019115880662175214,
|
4348 |
+
"loss": 1.3127,
|
4349 |
+
"step": 30950
|
4350 |
+
},
|
4351 |
+
{
|
4352 |
+
"epoch": 0.9255665362911654,
|
4353 |
+
"grad_norm": 3.893087387084961,
|
4354 |
+
"learning_rate": 0.0001911445208258333,
|
4355 |
+
"loss": 1.3359,
|
4356 |
+
"step": 31000
|
4357 |
+
},
|
4358 |
+
{
|
4359 |
+
"epoch": 0.9270593855432478,
|
4360 |
+
"grad_norm": 3.9967384338378906,
|
4361 |
+
"learning_rate": 0.00019113023502991447,
|
4362 |
+
"loss": 1.3897,
|
4363 |
+
"step": 31050
|
4364 |
+
},
|
4365 |
+
{
|
4366 |
+
"epoch": 0.9285522347953303,
|
4367 |
+
"grad_norm": 3.2351789474487305,
|
4368 |
+
"learning_rate": 0.00019111594923399563,
|
4369 |
+
"loss": 1.3406,
|
4370 |
+
"step": 31100
|
4371 |
+
},
|
4372 |
+
{
|
4373 |
+
"epoch": 0.9300450840474129,
|
4374 |
+
"grad_norm": 4.571500778198242,
|
4375 |
+
"learning_rate": 0.0001911016634380768,
|
4376 |
+
"loss": 1.3783,
|
4377 |
+
"step": 31150
|
4378 |
+
},
|
4379 |
+
{
|
4380 |
+
"epoch": 0.9315379332994954,
|
4381 |
+
"grad_norm": 6.241518020629883,
|
4382 |
+
"learning_rate": 0.00019108737764215796,
|
4383 |
+
"loss": 1.3659,
|
4384 |
+
"step": 31200
|
4385 |
+
},
|
4386 |
+
{
|
4387 |
+
"epoch": 0.9330307825515779,
|
4388 |
+
"grad_norm": 3.2547826766967773,
|
4389 |
+
"learning_rate": 0.00019107309184623913,
|
4390 |
+
"loss": 1.3137,
|
4391 |
+
"step": 31250
|
4392 |
+
},
|
4393 |
+
{
|
4394 |
+
"epoch": 0.9345236318036605,
|
4395 |
+
"grad_norm": 5.386812686920166,
|
4396 |
+
"learning_rate": 0.0001910588060503203,
|
4397 |
+
"loss": 1.3974,
|
4398 |
+
"step": 31300
|
4399 |
+
},
|
4400 |
+
{
|
4401 |
+
"epoch": 0.936016481055743,
|
4402 |
+
"grad_norm": 4.239005088806152,
|
4403 |
+
"learning_rate": 0.00019104452025440146,
|
4404 |
+
"loss": 1.3789,
|
4405 |
+
"step": 31350
|
4406 |
+
},
|
4407 |
+
{
|
4408 |
+
"epoch": 0.9375093303078255,
|
4409 |
+
"grad_norm": 3.7252326011657715,
|
4410 |
+
"learning_rate": 0.00019103023445848262,
|
4411 |
+
"loss": 1.3622,
|
4412 |
+
"step": 31400
|
4413 |
+
},
|
4414 |
+
{
|
4415 |
+
"epoch": 0.9390021795599081,
|
4416 |
+
"grad_norm": 5.890969276428223,
|
4417 |
+
"learning_rate": 0.00019101594866256381,
|
4418 |
+
"loss": 1.3905,
|
4419 |
+
"step": 31450
|
4420 |
+
},
|
4421 |
+
{
|
4422 |
+
"epoch": 0.9404950288119905,
|
4423 |
+
"grad_norm": 5.796853542327881,
|
4424 |
+
"learning_rate": 0.00019100166286664495,
|
4425 |
+
"loss": 1.3797,
|
4426 |
+
"step": 31500
|
4427 |
+
},
|
4428 |
+
{
|
4429 |
+
"epoch": 0.9419878780640731,
|
4430 |
+
"grad_norm": 4.130026817321777,
|
4431 |
+
"learning_rate": 0.00019098737707072614,
|
4432 |
+
"loss": 1.3365,
|
4433 |
+
"step": 31550
|
4434 |
+
},
|
4435 |
+
{
|
4436 |
+
"epoch": 0.9434807273161556,
|
4437 |
+
"grad_norm": 3.65081524848938,
|
4438 |
+
"learning_rate": 0.00019097309127480728,
|
4439 |
+
"loss": 1.4347,
|
4440 |
+
"step": 31600
|
4441 |
+
},
|
4442 |
+
{
|
4443 |
+
"epoch": 0.9449735765682381,
|
4444 |
+
"grad_norm": 4.91404390335083,
|
4445 |
+
"learning_rate": 0.00019095880547888847,
|
4446 |
+
"loss": 1.4056,
|
4447 |
+
"step": 31650
|
4448 |
+
},
|
4449 |
+
{
|
4450 |
+
"epoch": 0.9464664258203207,
|
4451 |
+
"grad_norm": 4.7272114753723145,
|
4452 |
+
"learning_rate": 0.00019094451968296964,
|
4453 |
+
"loss": 1.2828,
|
4454 |
+
"step": 31700
|
4455 |
+
},
|
4456 |
+
{
|
4457 |
+
"epoch": 0.9479592750724032,
|
4458 |
+
"grad_norm": 5.111660957336426,
|
4459 |
+
"learning_rate": 0.0001909302338870508,
|
4460 |
+
"loss": 1.3486,
|
4461 |
+
"step": 31750
|
4462 |
+
},
|
4463 |
+
{
|
4464 |
+
"epoch": 0.9494521243244857,
|
4465 |
+
"grad_norm": 3.1206741333007812,
|
4466 |
+
"learning_rate": 0.00019091594809113197,
|
4467 |
+
"loss": 1.3416,
|
4468 |
+
"step": 31800
|
4469 |
+
},
|
4470 |
+
{
|
4471 |
+
"epoch": 0.9509449735765683,
|
4472 |
+
"grad_norm": 4.359163761138916,
|
4473 |
+
"learning_rate": 0.00019090166229521313,
|
4474 |
+
"loss": 1.3471,
|
4475 |
+
"step": 31850
|
4476 |
+
},
|
4477 |
+
{
|
4478 |
+
"epoch": 0.9524378228286507,
|
4479 |
+
"grad_norm": 4.667807102203369,
|
4480 |
+
"learning_rate": 0.0001908873764992943,
|
4481 |
+
"loss": 1.4042,
|
4482 |
+
"step": 31900
|
4483 |
+
},
|
4484 |
+
{
|
4485 |
+
"epoch": 0.9539306720807332,
|
4486 |
+
"grad_norm": 4.410369873046875,
|
4487 |
+
"learning_rate": 0.00019087309070337546,
|
4488 |
+
"loss": 1.3909,
|
4489 |
+
"step": 31950
|
4490 |
+
},
|
4491 |
+
{
|
4492 |
+
"epoch": 0.9554235213328158,
|
4493 |
+
"grad_norm": 4.987631320953369,
|
4494 |
+
"learning_rate": 0.00019085880490745663,
|
4495 |
+
"loss": 1.3548,
|
4496 |
+
"step": 32000
|
4497 |
+
},
|
4498 |
+
{
|
4499 |
+
"epoch": 0.9569163705848983,
|
4500 |
+
"grad_norm": 4.66121768951416,
|
4501 |
+
"learning_rate": 0.0001908445191115378,
|
4502 |
+
"loss": 1.3753,
|
4503 |
+
"step": 32050
|
4504 |
+
},
|
4505 |
+
{
|
4506 |
+
"epoch": 0.9584092198369809,
|
4507 |
+
"grad_norm": 6.010061740875244,
|
4508 |
+
"learning_rate": 0.00019083023331561896,
|
4509 |
+
"loss": 1.4128,
|
4510 |
+
"step": 32100
|
4511 |
+
},
|
4512 |
+
{
|
4513 |
+
"epoch": 0.9599020690890634,
|
4514 |
+
"grad_norm": 3.118246078491211,
|
4515 |
+
"learning_rate": 0.00019081594751970012,
|
4516 |
+
"loss": 1.3625,
|
4517 |
+
"step": 32150
|
4518 |
+
},
|
4519 |
+
{
|
4520 |
+
"epoch": 0.9613949183411459,
|
4521 |
+
"grad_norm": 3.821732759475708,
|
4522 |
+
"learning_rate": 0.00019080166172378128,
|
4523 |
+
"loss": 1.3691,
|
4524 |
+
"step": 32200
|
4525 |
+
},
|
4526 |
+
{
|
4527 |
+
"epoch": 0.9628877675932285,
|
4528 |
+
"grad_norm": 5.321105480194092,
|
4529 |
+
"learning_rate": 0.00019078737592786245,
|
4530 |
+
"loss": 1.3499,
|
4531 |
+
"step": 32250
|
4532 |
+
},
|
4533 |
+
{
|
4534 |
+
"epoch": 0.964380616845311,
|
4535 |
+
"grad_norm": 5.963992595672607,
|
4536 |
+
"learning_rate": 0.00019077309013194361,
|
4537 |
+
"loss": 1.3145,
|
4538 |
+
"step": 32300
|
4539 |
+
},
|
4540 |
+
{
|
4541 |
+
"epoch": 0.9658734660973934,
|
4542 |
+
"grad_norm": 4.071386337280273,
|
4543 |
+
"learning_rate": 0.00019075880433602478,
|
4544 |
+
"loss": 1.3215,
|
4545 |
+
"step": 32350
|
4546 |
+
},
|
4547 |
+
{
|
4548 |
+
"epoch": 0.967366315349476,
|
4549 |
+
"grad_norm": 3.7641048431396484,
|
4550 |
+
"learning_rate": 0.00019074451854010594,
|
4551 |
+
"loss": 1.3846,
|
4552 |
+
"step": 32400
|
4553 |
+
},
|
4554 |
+
{
|
4555 |
+
"epoch": 0.9688591646015585,
|
4556 |
+
"grad_norm": 5.985085487365723,
|
4557 |
+
"learning_rate": 0.0001907302327441871,
|
4558 |
+
"loss": 1.3383,
|
4559 |
+
"step": 32450
|
4560 |
+
},
|
4561 |
+
{
|
4562 |
+
"epoch": 0.970352013853641,
|
4563 |
+
"grad_norm": 6.602528095245361,
|
4564 |
+
"learning_rate": 0.0001907159469482683,
|
4565 |
+
"loss": 1.4195,
|
4566 |
+
"step": 32500
|
4567 |
+
},
|
4568 |
+
{
|
4569 |
+
"epoch": 0.9718448631057236,
|
4570 |
+
"grad_norm": 5.370469570159912,
|
4571 |
+
"learning_rate": 0.00019070166115234944,
|
4572 |
+
"loss": 1.3485,
|
4573 |
+
"step": 32550
|
4574 |
+
},
|
4575 |
+
{
|
4576 |
+
"epoch": 0.9733377123578061,
|
4577 |
+
"grad_norm": 4.588457107543945,
|
4578 |
+
"learning_rate": 0.00019068737535643063,
|
4579 |
+
"loss": 1.3896,
|
4580 |
+
"step": 32600
|
4581 |
+
},
|
4582 |
+
{
|
4583 |
+
"epoch": 0.9748305616098887,
|
4584 |
+
"grad_norm": 6.389341354370117,
|
4585 |
+
"learning_rate": 0.00019067308956051177,
|
4586 |
+
"loss": 1.3118,
|
4587 |
+
"step": 32650
|
4588 |
+
},
|
4589 |
+
{
|
4590 |
+
"epoch": 0.9763234108619712,
|
4591 |
+
"grad_norm": 4.051975250244141,
|
4592 |
+
"learning_rate": 0.00019065880376459296,
|
4593 |
+
"loss": 1.4065,
|
4594 |
+
"step": 32700
|
4595 |
+
},
|
4596 |
+
{
|
4597 |
+
"epoch": 0.9778162601140536,
|
4598 |
+
"grad_norm": 5.474813938140869,
|
4599 |
+
"learning_rate": 0.0001906445179686741,
|
4600 |
+
"loss": 1.3967,
|
4601 |
+
"step": 32750
|
4602 |
+
},
|
4603 |
+
{
|
4604 |
+
"epoch": 0.9793091093661362,
|
4605 |
+
"grad_norm": 4.033454895019531,
|
4606 |
+
"learning_rate": 0.0001906302321727553,
|
4607 |
+
"loss": 1.3212,
|
4608 |
+
"step": 32800
|
4609 |
+
},
|
4610 |
+
{
|
4611 |
+
"epoch": 0.9808019586182187,
|
4612 |
+
"grad_norm": 4.46946382522583,
|
4613 |
+
"learning_rate": 0.00019061594637683645,
|
4614 |
+
"loss": 1.3917,
|
4615 |
+
"step": 32850
|
4616 |
+
},
|
4617 |
+
{
|
4618 |
+
"epoch": 0.9822948078703012,
|
4619 |
+
"grad_norm": 4.98955774307251,
|
4620 |
+
"learning_rate": 0.00019060166058091762,
|
4621 |
+
"loss": 1.3814,
|
4622 |
+
"step": 32900
|
4623 |
+
},
|
4624 |
+
{
|
4625 |
+
"epoch": 0.9837876571223838,
|
4626 |
+
"grad_norm": 4.879435062408447,
|
4627 |
+
"learning_rate": 0.00019058737478499878,
|
4628 |
+
"loss": 1.3486,
|
4629 |
+
"step": 32950
|
4630 |
+
},
|
4631 |
+
{
|
4632 |
+
"epoch": 0.9852805063744663,
|
4633 |
+
"grad_norm": 4.0941925048828125,
|
4634 |
+
"learning_rate": 0.00019057308898907995,
|
4635 |
+
"loss": 1.3289,
|
4636 |
+
"step": 33000
|
4637 |
+
},
|
4638 |
+
{
|
4639 |
+
"epoch": 0.9867733556265488,
|
4640 |
+
"grad_norm": 5.2992706298828125,
|
4641 |
+
"learning_rate": 0.0001905588031931611,
|
4642 |
+
"loss": 1.3458,
|
4643 |
+
"step": 33050
|
4644 |
+
},
|
4645 |
+
{
|
4646 |
+
"epoch": 0.9882662048786314,
|
4647 |
+
"grad_norm": 4.284411430358887,
|
4648 |
+
"learning_rate": 0.00019054451739724228,
|
4649 |
+
"loss": 1.3932,
|
4650 |
+
"step": 33100
|
4651 |
+
},
|
4652 |
+
{
|
4653 |
+
"epoch": 0.9897590541307139,
|
4654 |
+
"grad_norm": 4.969634532928467,
|
4655 |
+
"learning_rate": 0.00019053023160132344,
|
4656 |
+
"loss": 1.4044,
|
4657 |
+
"step": 33150
|
4658 |
+
},
|
4659 |
+
{
|
4660 |
+
"epoch": 0.9912519033827965,
|
4661 |
+
"grad_norm": 5.532953262329102,
|
4662 |
+
"learning_rate": 0.0001905159458054046,
|
4663 |
+
"loss": 1.3658,
|
4664 |
+
"step": 33200
|
4665 |
+
},
|
4666 |
+
{
|
4667 |
+
"epoch": 0.9927447526348789,
|
4668 |
+
"grad_norm": 4.402670383453369,
|
4669 |
+
"learning_rate": 0.00019050166000948577,
|
4670 |
+
"loss": 1.343,
|
4671 |
+
"step": 33250
|
4672 |
+
},
|
4673 |
+
{
|
4674 |
+
"epoch": 0.9942376018869614,
|
4675 |
+
"grad_norm": 4.458900451660156,
|
4676 |
+
"learning_rate": 0.00019048737421356696,
|
4677 |
+
"loss": 1.4002,
|
4678 |
+
"step": 33300
|
4679 |
+
},
|
4680 |
+
{
|
4681 |
+
"epoch": 0.995730451139044,
|
4682 |
+
"grad_norm": 3.934340238571167,
|
4683 |
+
"learning_rate": 0.0001904730884176481,
|
4684 |
+
"loss": 1.413,
|
4685 |
+
"step": 33350
|
4686 |
+
},
|
4687 |
+
{
|
4688 |
+
"epoch": 0.9972233003911265,
|
4689 |
+
"grad_norm": 4.274466514587402,
|
4690 |
+
"learning_rate": 0.0001904588026217293,
|
4691 |
+
"loss": 1.3552,
|
4692 |
+
"step": 33400
|
4693 |
+
},
|
4694 |
+
{
|
4695 |
+
"epoch": 0.998716149643209,
|
4696 |
+
"grad_norm": 4.774006366729736,
|
4697 |
+
"learning_rate": 0.00019044451682581043,
|
4698 |
+
"loss": 1.3825,
|
4699 |
+
"step": 33450
|
4700 |
+
},
|
4701 |
+
{
|
4702 |
+
"epoch": 1.0002089988952916,
|
4703 |
+
"grad_norm": 3.870762825012207,
|
4704 |
+
"learning_rate": 0.00019043023102989162,
|
4705 |
+
"loss": 1.3715,
|
4706 |
+
"step": 33500
|
4707 |
+
},
|
4708 |
+
{
|
4709 |
+
"epoch": 1.001701848147374,
|
4710 |
+
"grad_norm": 3.595686912536621,
|
4711 |
+
"learning_rate": 0.00019041594523397276,
|
4712 |
+
"loss": 1.3352,
|
4713 |
+
"step": 33550
|
4714 |
+
},
|
4715 |
+
{
|
4716 |
+
"epoch": 1.0031946973994565,
|
4717 |
+
"grad_norm": 5.492071151733398,
|
4718 |
+
"learning_rate": 0.00019040165943805395,
|
4719 |
+
"loss": 1.2927,
|
4720 |
+
"step": 33600
|
4721 |
+
},
|
4722 |
+
{
|
4723 |
+
"epoch": 1.004687546651539,
|
4724 |
+
"grad_norm": 5.381584167480469,
|
4725 |
+
"learning_rate": 0.00019038737364213511,
|
4726 |
+
"loss": 1.2545,
|
4727 |
+
"step": 33650
|
4728 |
+
},
|
4729 |
+
{
|
4730 |
+
"epoch": 1.0061803959036217,
|
4731 |
+
"grad_norm": 5.983656883239746,
|
4732 |
+
"learning_rate": 0.00019037308784621628,
|
4733 |
+
"loss": 1.2393,
|
4734 |
+
"step": 33700
|
4735 |
+
},
|
4736 |
+
{
|
4737 |
+
"epoch": 1.0076732451557042,
|
4738 |
+
"grad_norm": 4.956115245819092,
|
4739 |
+
"learning_rate": 0.00019035880205029744,
|
4740 |
+
"loss": 1.2931,
|
4741 |
+
"step": 33750
|
4742 |
+
},
|
4743 |
+
{
|
4744 |
+
"epoch": 1.0091660944077867,
|
4745 |
+
"grad_norm": 4.604572296142578,
|
4746 |
+
"learning_rate": 0.0001903445162543786,
|
4747 |
+
"loss": 1.2786,
|
4748 |
+
"step": 33800
|
4749 |
+
},
|
4750 |
+
{
|
4751 |
+
"epoch": 1.0106589436598692,
|
4752 |
+
"grad_norm": 5.043067932128906,
|
4753 |
+
"learning_rate": 0.00019033023045845977,
|
4754 |
+
"loss": 1.2933,
|
4755 |
+
"step": 33850
|
4756 |
+
},
|
4757 |
+
{
|
4758 |
+
"epoch": 1.0121517929119517,
|
4759 |
+
"grad_norm": 4.53336238861084,
|
4760 |
+
"learning_rate": 0.00019031594466254094,
|
4761 |
+
"loss": 1.3114,
|
4762 |
+
"step": 33900
|
4763 |
+
},
|
4764 |
+
{
|
4765 |
+
"epoch": 1.0136446421640344,
|
4766 |
+
"grad_norm": 5.106605052947998,
|
4767 |
+
"learning_rate": 0.0001903016588666221,
|
4768 |
+
"loss": 1.2994,
|
4769 |
+
"step": 33950
|
4770 |
+
},
|
4771 |
+
{
|
4772 |
+
"epoch": 1.0151374914161169,
|
4773 |
+
"grad_norm": 5.796351909637451,
|
4774 |
+
"learning_rate": 0.00019028737307070327,
|
4775 |
+
"loss": 1.2555,
|
4776 |
+
"step": 34000
|
4777 |
+
},
|
4778 |
+
{
|
4779 |
+
"epoch": 1.0166303406681994,
|
4780 |
+
"grad_norm": 4.970977306365967,
|
4781 |
+
"learning_rate": 0.00019027308727478443,
|
4782 |
+
"loss": 1.3343,
|
4783 |
+
"step": 34050
|
4784 |
+
},
|
4785 |
+
{
|
4786 |
+
"epoch": 1.0181231899202818,
|
4787 |
+
"grad_norm": 4.233397960662842,
|
4788 |
+
"learning_rate": 0.00019025880147886562,
|
4789 |
+
"loss": 1.2485,
|
4790 |
+
"step": 34100
|
4791 |
+
},
|
4792 |
+
{
|
4793 |
+
"epoch": 1.0196160391723643,
|
4794 |
+
"grad_norm": 4.012045383453369,
|
4795 |
+
"learning_rate": 0.00019024451568294676,
|
4796 |
+
"loss": 1.2816,
|
4797 |
+
"step": 34150
|
4798 |
+
},
|
4799 |
+
{
|
4800 |
+
"epoch": 1.0211088884244468,
|
4801 |
+
"grad_norm": 4.715073585510254,
|
4802 |
+
"learning_rate": 0.00019023022988702795,
|
4803 |
+
"loss": 1.2541,
|
4804 |
+
"step": 34200
|
4805 |
+
},
|
4806 |
+
{
|
4807 |
+
"epoch": 1.0226017376765295,
|
4808 |
+
"grad_norm": 3.7285947799682617,
|
4809 |
+
"learning_rate": 0.0001902159440911091,
|
4810 |
+
"loss": 1.3094,
|
4811 |
+
"step": 34250
|
4812 |
+
},
|
4813 |
+
{
|
4814 |
+
"epoch": 1.024094586928612,
|
4815 |
+
"grad_norm": 4.012038707733154,
|
4816 |
+
"learning_rate": 0.00019020165829519028,
|
4817 |
+
"loss": 1.2791,
|
4818 |
+
"step": 34300
|
4819 |
+
},
|
4820 |
+
{
|
4821 |
+
"epoch": 1.0255874361806945,
|
4822 |
+
"grad_norm": 4.04899263381958,
|
4823 |
+
"learning_rate": 0.00019018737249927142,
|
4824 |
+
"loss": 1.3085,
|
4825 |
+
"step": 34350
|
4826 |
+
},
|
4827 |
+
{
|
4828 |
+
"epoch": 1.027080285432777,
|
4829 |
+
"grad_norm": 4.380834102630615,
|
4830 |
+
"learning_rate": 0.0001901730867033526,
|
4831 |
+
"loss": 1.3212,
|
4832 |
+
"step": 34400
|
4833 |
+
},
|
4834 |
+
{
|
4835 |
+
"epoch": 1.0285731346848594,
|
4836 |
+
"grad_norm": 5.275148868560791,
|
4837 |
+
"learning_rate": 0.00019015880090743378,
|
4838 |
+
"loss": 1.2749,
|
4839 |
+
"step": 34450
|
4840 |
+
},
|
4841 |
+
{
|
4842 |
+
"epoch": 1.0300659839369422,
|
4843 |
+
"grad_norm": 4.162272930145264,
|
4844 |
+
"learning_rate": 0.00019014451511151494,
|
4845 |
+
"loss": 1.296,
|
4846 |
+
"step": 34500
|
4847 |
+
},
|
4848 |
+
{
|
4849 |
+
"epoch": 1.0315588331890246,
|
4850 |
+
"grad_norm": 4.128006935119629,
|
4851 |
+
"learning_rate": 0.0001901302293155961,
|
4852 |
+
"loss": 1.2547,
|
4853 |
+
"step": 34550
|
4854 |
+
},
|
4855 |
+
{
|
4856 |
+
"epoch": 1.0330516824411071,
|
4857 |
+
"grad_norm": 3.930121421813965,
|
4858 |
+
"learning_rate": 0.00019011594351967727,
|
4859 |
+
"loss": 1.2831,
|
4860 |
+
"step": 34600
|
4861 |
+
},
|
4862 |
+
{
|
4863 |
+
"epoch": 1.0345445316931896,
|
4864 |
+
"grad_norm": 4.515873908996582,
|
4865 |
+
"learning_rate": 0.00019010165772375843,
|
4866 |
+
"loss": 1.2747,
|
4867 |
+
"step": 34650
|
4868 |
+
},
|
4869 |
+
{
|
4870 |
+
"epoch": 1.036037380945272,
|
4871 |
+
"grad_norm": 4.484467506408691,
|
4872 |
+
"learning_rate": 0.0001900873719278396,
|
4873 |
+
"loss": 1.289,
|
4874 |
+
"step": 34700
|
4875 |
+
},
|
4876 |
+
{
|
4877 |
+
"epoch": 1.0375302301973546,
|
4878 |
+
"grad_norm": 3.8937742710113525,
|
4879 |
+
"learning_rate": 0.00019007308613192076,
|
4880 |
+
"loss": 1.2939,
|
4881 |
+
"step": 34750
|
4882 |
+
},
|
4883 |
+
{
|
4884 |
+
"epoch": 1.0390230794494373,
|
4885 |
+
"grad_norm": 4.367059230804443,
|
4886 |
+
"learning_rate": 0.00019005880033600193,
|
4887 |
+
"loss": 1.2522,
|
4888 |
+
"step": 34800
|
4889 |
+
},
|
4890 |
+
{
|
4891 |
+
"epoch": 1.0405159287015198,
|
4892 |
+
"grad_norm": 5.143396854400635,
|
4893 |
+
"learning_rate": 0.0001900445145400831,
|
4894 |
+
"loss": 1.2996,
|
4895 |
+
"step": 34850
|
4896 |
+
},
|
4897 |
+
{
|
4898 |
+
"epoch": 1.0420087779536022,
|
4899 |
+
"grad_norm": 5.060800552368164,
|
4900 |
+
"learning_rate": 0.00019003022874416426,
|
4901 |
+
"loss": 1.3122,
|
4902 |
+
"step": 34900
|
4903 |
+
},
|
4904 |
+
{
|
4905 |
+
"epoch": 1.0435016272056847,
|
4906 |
+
"grad_norm": 4.775914192199707,
|
4907 |
+
"learning_rate": 0.00019001594294824542,
|
4908 |
+
"loss": 1.2595,
|
4909 |
+
"step": 34950
|
4910 |
+
},
|
4911 |
+
{
|
4912 |
+
"epoch": 1.0449944764577672,
|
4913 |
+
"grad_norm": 5.256932258605957,
|
4914 |
+
"learning_rate": 0.0001900016571523266,
|
4915 |
+
"loss": 1.3148,
|
4916 |
+
"step": 35000
|
4917 |
+
},
|
4918 |
+
{
|
4919 |
+
"epoch": 1.04648732570985,
|
4920 |
+
"grad_norm": 4.253088474273682,
|
4921 |
+
"learning_rate": 0.00018998737135640775,
|
4922 |
+
"loss": 1.2575,
|
4923 |
+
"step": 35050
|
4924 |
+
},
|
4925 |
+
{
|
4926 |
+
"epoch": 1.0479801749619324,
|
4927 |
+
"grad_norm": 4.784812927246094,
|
4928 |
+
"learning_rate": 0.00018997308556048892,
|
4929 |
+
"loss": 1.2911,
|
4930 |
+
"step": 35100
|
4931 |
+
},
|
4932 |
+
{
|
4933 |
+
"epoch": 1.049473024214015,
|
4934 |
+
"grad_norm": 3.9015183448791504,
|
4935 |
+
"learning_rate": 0.0001899587997645701,
|
4936 |
+
"loss": 1.2647,
|
4937 |
+
"step": 35150
|
4938 |
+
},
|
4939 |
+
{
|
4940 |
+
"epoch": 1.0509658734660974,
|
4941 |
+
"grad_norm": 4.729675769805908,
|
4942 |
+
"learning_rate": 0.00018994451396865125,
|
4943 |
+
"loss": 1.2876,
|
4944 |
+
"step": 35200
|
4945 |
+
},
|
4946 |
+
{
|
4947 |
+
"epoch": 1.0524587227181799,
|
4948 |
+
"grad_norm": 4.921074867248535,
|
4949 |
+
"learning_rate": 0.00018993022817273244,
|
4950 |
+
"loss": 1.2956,
|
4951 |
+
"step": 35250
|
4952 |
+
},
|
4953 |
+
{
|
4954 |
+
"epoch": 1.0539515719702623,
|
4955 |
+
"grad_norm": 4.466576099395752,
|
4956 |
+
"learning_rate": 0.00018991594237681357,
|
4957 |
+
"loss": 1.2938,
|
4958 |
+
"step": 35300
|
4959 |
+
},
|
4960 |
+
{
|
4961 |
+
"epoch": 1.055444421222345,
|
4962 |
+
"grad_norm": 4.142183780670166,
|
4963 |
+
"learning_rate": 0.00018990165658089477,
|
4964 |
+
"loss": 1.282,
|
4965 |
+
"step": 35350
|
4966 |
+
},
|
4967 |
+
{
|
4968 |
+
"epoch": 1.0569372704744275,
|
4969 |
+
"grad_norm": 4.372234344482422,
|
4970 |
+
"learning_rate": 0.0001898873707849759,
|
4971 |
+
"loss": 1.2793,
|
4972 |
+
"step": 35400
|
4973 |
+
},
|
4974 |
+
{
|
4975 |
+
"epoch": 1.05843011972651,
|
4976 |
+
"grad_norm": 5.349823474884033,
|
4977 |
+
"learning_rate": 0.0001898730849890571,
|
4978 |
+
"loss": 1.3678,
|
4979 |
+
"step": 35450
|
4980 |
+
},
|
4981 |
+
{
|
4982 |
+
"epoch": 1.0599229689785925,
|
4983 |
+
"grad_norm": 4.57612419128418,
|
4984 |
+
"learning_rate": 0.00018985879919313826,
|
4985 |
+
"loss": 1.3256,
|
4986 |
+
"step": 35500
|
4987 |
+
},
|
4988 |
+
{
|
4989 |
+
"epoch": 1.061415818230675,
|
4990 |
+
"grad_norm": 5.3342156410217285,
|
4991 |
+
"learning_rate": 0.00018984451339721942,
|
4992 |
+
"loss": 1.2897,
|
4993 |
+
"step": 35550
|
4994 |
+
},
|
4995 |
+
{
|
4996 |
+
"epoch": 1.0629086674827577,
|
4997 |
+
"grad_norm": 5.361871242523193,
|
4998 |
+
"learning_rate": 0.0001898302276013006,
|
4999 |
+
"loss": 1.2933,
|
5000 |
+
"step": 35600
|
5001 |
+
},
|
5002 |
+
{
|
5003 |
+
"epoch": 1.0644015167348402,
|
5004 |
+
"grad_norm": 3.6657068729400635,
|
5005 |
+
"learning_rate": 0.00018981594180538175,
|
5006 |
+
"loss": 1.3359,
|
5007 |
+
"step": 35650
|
5008 |
+
},
|
5009 |
+
{
|
5010 |
+
"epoch": 1.0658943659869227,
|
5011 |
+
"grad_norm": 4.428374290466309,
|
5012 |
+
"learning_rate": 0.00018980165600946292,
|
5013 |
+
"loss": 1.3278,
|
5014 |
+
"step": 35700
|
5015 |
+
},
|
5016 |
+
{
|
5017 |
+
"epoch": 1.0673872152390051,
|
5018 |
+
"grad_norm": 5.079537391662598,
|
5019 |
+
"learning_rate": 0.00018978737021354408,
|
5020 |
+
"loss": 1.2699,
|
5021 |
+
"step": 35750
|
5022 |
+
},
|
5023 |
+
{
|
5024 |
+
"epoch": 1.0688800644910876,
|
5025 |
+
"grad_norm": 5.303153038024902,
|
5026 |
+
"learning_rate": 0.00018977308441762525,
|
5027 |
+
"loss": 1.2985,
|
5028 |
+
"step": 35800
|
5029 |
+
},
|
5030 |
+
{
|
5031 |
+
"epoch": 1.0703729137431701,
|
5032 |
+
"grad_norm": 4.319901943206787,
|
5033 |
+
"learning_rate": 0.0001897587986217064,
|
5034 |
+
"loss": 1.2661,
|
5035 |
+
"step": 35850
|
5036 |
+
},
|
5037 |
+
{
|
5038 |
+
"epoch": 1.0718657629952528,
|
5039 |
+
"grad_norm": 6.0582451820373535,
|
5040 |
+
"learning_rate": 0.00018974451282578758,
|
5041 |
+
"loss": 1.2837,
|
5042 |
+
"step": 35900
|
5043 |
+
},
|
5044 |
+
{
|
5045 |
+
"epoch": 1.0733586122473353,
|
5046 |
+
"grad_norm": 5.738245964050293,
|
5047 |
+
"learning_rate": 0.00018973022702986877,
|
5048 |
+
"loss": 1.3075,
|
5049 |
+
"step": 35950
|
5050 |
+
},
|
5051 |
+
{
|
5052 |
+
"epoch": 1.0748514614994178,
|
5053 |
+
"grad_norm": 4.602933883666992,
|
5054 |
+
"learning_rate": 0.0001897159412339499,
|
5055 |
+
"loss": 1.2585,
|
5056 |
+
"step": 36000
|
5057 |
+
},
|
5058 |
+
{
|
5059 |
+
"epoch": 1.0763443107515003,
|
5060 |
+
"grad_norm": 4.273153305053711,
|
5061 |
+
"learning_rate": 0.0001897016554380311,
|
5062 |
+
"loss": 1.3382,
|
5063 |
+
"step": 36050
|
5064 |
+
},
|
5065 |
+
{
|
5066 |
+
"epoch": 1.0778371600035828,
|
5067 |
+
"grad_norm": 4.225873947143555,
|
5068 |
+
"learning_rate": 0.00018968736964211224,
|
5069 |
+
"loss": 1.2964,
|
5070 |
+
"step": 36100
|
5071 |
+
},
|
5072 |
+
{
|
5073 |
+
"epoch": 1.0793300092556652,
|
5074 |
+
"grad_norm": 4.828727722167969,
|
5075 |
+
"learning_rate": 0.00018967308384619343,
|
5076 |
+
"loss": 1.3734,
|
5077 |
+
"step": 36150
|
5078 |
+
},
|
5079 |
+
{
|
5080 |
+
"epoch": 1.080822858507748,
|
5081 |
+
"grad_norm": 3.7251532077789307,
|
5082 |
+
"learning_rate": 0.00018965879805027457,
|
5083 |
+
"loss": 1.2706,
|
5084 |
+
"step": 36200
|
5085 |
+
},
|
5086 |
+
{
|
5087 |
+
"epoch": 1.0823157077598304,
|
5088 |
+
"grad_norm": 4.92055606842041,
|
5089 |
+
"learning_rate": 0.00018964451225435576,
|
5090 |
+
"loss": 1.3187,
|
5091 |
+
"step": 36250
|
5092 |
+
},
|
5093 |
+
{
|
5094 |
+
"epoch": 1.083808557011913,
|
5095 |
+
"grad_norm": 4.827970027923584,
|
5096 |
+
"learning_rate": 0.00018963022645843692,
|
5097 |
+
"loss": 1.2574,
|
5098 |
+
"step": 36300
|
5099 |
+
},
|
5100 |
+
{
|
5101 |
+
"epoch": 1.0853014062639954,
|
5102 |
+
"grad_norm": 4.742151737213135,
|
5103 |
+
"learning_rate": 0.00018961594066251809,
|
5104 |
+
"loss": 1.3076,
|
5105 |
+
"step": 36350
|
5106 |
+
},
|
5107 |
+
{
|
5108 |
+
"epoch": 1.086794255516078,
|
5109 |
+
"grad_norm": 3.70794939994812,
|
5110 |
+
"learning_rate": 0.00018960165486659925,
|
5111 |
+
"loss": 1.3075,
|
5112 |
+
"step": 36400
|
5113 |
+
},
|
5114 |
+
{
|
5115 |
+
"epoch": 1.0882871047681606,
|
5116 |
+
"grad_norm": 4.062809944152832,
|
5117 |
+
"learning_rate": 0.00018958736907068042,
|
5118 |
+
"loss": 1.2628,
|
5119 |
+
"step": 36450
|
5120 |
+
},
|
5121 |
+
{
|
5122 |
+
"epoch": 1.089779954020243,
|
5123 |
+
"grad_norm": 5.072466850280762,
|
5124 |
+
"learning_rate": 0.00018957308327476158,
|
5125 |
+
"loss": 1.3278,
|
5126 |
+
"step": 36500
|
5127 |
+
},
|
5128 |
+
{
|
5129 |
+
"epoch": 1.0912728032723256,
|
5130 |
+
"grad_norm": 4.530171871185303,
|
5131 |
+
"learning_rate": 0.00018955879747884274,
|
5132 |
+
"loss": 1.3233,
|
5133 |
+
"step": 36550
|
5134 |
+
},
|
5135 |
+
{
|
5136 |
+
"epoch": 1.092765652524408,
|
5137 |
+
"grad_norm": 3.9391825199127197,
|
5138 |
+
"learning_rate": 0.0001895445116829239,
|
5139 |
+
"loss": 1.2973,
|
5140 |
+
"step": 36600
|
5141 |
+
},
|
5142 |
+
{
|
5143 |
+
"epoch": 1.0942585017764905,
|
5144 |
+
"grad_norm": 6.774008274078369,
|
5145 |
+
"learning_rate": 0.00018953022588700507,
|
5146 |
+
"loss": 1.2801,
|
5147 |
+
"step": 36650
|
5148 |
+
},
|
5149 |
+
{
|
5150 |
+
"epoch": 1.0957513510285732,
|
5151 |
+
"grad_norm": 4.565464496612549,
|
5152 |
+
"learning_rate": 0.00018951594009108624,
|
5153 |
+
"loss": 1.3718,
|
5154 |
+
"step": 36700
|
5155 |
+
},
|
5156 |
+
{
|
5157 |
+
"epoch": 1.0972442002806557,
|
5158 |
+
"grad_norm": 5.68241548538208,
|
5159 |
+
"learning_rate": 0.00018950165429516743,
|
5160 |
+
"loss": 1.2928,
|
5161 |
+
"step": 36750
|
5162 |
+
},
|
5163 |
+
{
|
5164 |
+
"epoch": 1.0987370495327382,
|
5165 |
+
"grad_norm": 4.235448360443115,
|
5166 |
+
"learning_rate": 0.00018948736849924857,
|
5167 |
+
"loss": 1.3097,
|
5168 |
+
"step": 36800
|
5169 |
+
},
|
5170 |
+
{
|
5171 |
+
"epoch": 1.1002298987848207,
|
5172 |
+
"grad_norm": 4.672369003295898,
|
5173 |
+
"learning_rate": 0.00018947308270332976,
|
5174 |
+
"loss": 1.2875,
|
5175 |
+
"step": 36850
|
5176 |
+
},
|
5177 |
+
{
|
5178 |
+
"epoch": 1.1017227480369032,
|
5179 |
+
"grad_norm": 6.449750900268555,
|
5180 |
+
"learning_rate": 0.0001894587969074109,
|
5181 |
+
"loss": 1.2823,
|
5182 |
+
"step": 36900
|
5183 |
+
},
|
5184 |
+
{
|
5185 |
+
"epoch": 1.1032155972889857,
|
5186 |
+
"grad_norm": 5.093149662017822,
|
5187 |
+
"learning_rate": 0.0001894445111114921,
|
5188 |
+
"loss": 1.315,
|
5189 |
+
"step": 36950
|
5190 |
+
},
|
5191 |
+
{
|
5192 |
+
"epoch": 1.1047084465410684,
|
5193 |
+
"grad_norm": 5.433828353881836,
|
5194 |
+
"learning_rate": 0.00018943022531557323,
|
5195 |
+
"loss": 1.2781,
|
5196 |
+
"step": 37000
|
5197 |
+
},
|
5198 |
+
{
|
5199 |
+
"epoch": 1.1062012957931509,
|
5200 |
+
"grad_norm": 5.176681995391846,
|
5201 |
+
"learning_rate": 0.00018941593951965442,
|
5202 |
+
"loss": 1.2652,
|
5203 |
+
"step": 37050
|
5204 |
+
},
|
5205 |
+
{
|
5206 |
+
"epoch": 1.1076941450452333,
|
5207 |
+
"grad_norm": 4.09026575088501,
|
5208 |
+
"learning_rate": 0.00018940165372373558,
|
5209 |
+
"loss": 1.3045,
|
5210 |
+
"step": 37100
|
5211 |
+
},
|
5212 |
+
{
|
5213 |
+
"epoch": 1.1091869942973158,
|
5214 |
+
"grad_norm": 5.223085880279541,
|
5215 |
+
"learning_rate": 0.00018938736792781675,
|
5216 |
+
"loss": 1.3009,
|
5217 |
+
"step": 37150
|
5218 |
+
},
|
5219 |
+
{
|
5220 |
+
"epoch": 1.1106798435493983,
|
5221 |
+
"grad_norm": 5.105752944946289,
|
5222 |
+
"learning_rate": 0.0001893730821318979,
|
5223 |
+
"loss": 1.2915,
|
5224 |
+
"step": 37200
|
5225 |
+
},
|
5226 |
+
{
|
5227 |
+
"epoch": 1.1121726928014808,
|
5228 |
+
"grad_norm": 3.945962905883789,
|
5229 |
+
"learning_rate": 0.00018935879633597908,
|
5230 |
+
"loss": 1.341,
|
5231 |
+
"step": 37250
|
5232 |
+
},
|
5233 |
+
{
|
5234 |
+
"epoch": 1.1136655420535635,
|
5235 |
+
"grad_norm": 4.856802463531494,
|
5236 |
+
"learning_rate": 0.00018934451054006024,
|
5237 |
+
"loss": 1.3318,
|
5238 |
+
"step": 37300
|
5239 |
+
},
|
5240 |
+
{
|
5241 |
+
"epoch": 1.115158391305646,
|
5242 |
+
"grad_norm": 4.858597755432129,
|
5243 |
+
"learning_rate": 0.0001893302247441414,
|
5244 |
+
"loss": 1.2756,
|
5245 |
+
"step": 37350
|
5246 |
+
},
|
5247 |
+
{
|
5248 |
+
"epoch": 1.1166512405577285,
|
5249 |
+
"grad_norm": 4.594838619232178,
|
5250 |
+
"learning_rate": 0.00018931593894822257,
|
5251 |
+
"loss": 1.2712,
|
5252 |
+
"step": 37400
|
5253 |
+
},
|
5254 |
+
{
|
5255 |
+
"epoch": 1.118144089809811,
|
5256 |
+
"grad_norm": 4.745405673980713,
|
5257 |
+
"learning_rate": 0.00018930165315230374,
|
5258 |
+
"loss": 1.2878,
|
5259 |
+
"step": 37450
|
5260 |
+
},
|
5261 |
+
{
|
5262 |
+
"epoch": 1.1196369390618934,
|
5263 |
+
"grad_norm": 4.112718105316162,
|
5264 |
+
"learning_rate": 0.0001892873673563849,
|
5265 |
+
"loss": 1.3337,
|
5266 |
+
"step": 37500
|
5267 |
+
},
|
5268 |
+
{
|
5269 |
+
"epoch": 1.1211297883139761,
|
5270 |
+
"grad_norm": 4.297769069671631,
|
5271 |
+
"learning_rate": 0.00018927308156046607,
|
5272 |
+
"loss": 1.3324,
|
5273 |
+
"step": 37550
|
5274 |
+
},
|
5275 |
+
{
|
5276 |
+
"epoch": 1.1226226375660586,
|
5277 |
+
"grad_norm": 5.46763801574707,
|
5278 |
+
"learning_rate": 0.00018925879576454723,
|
5279 |
+
"loss": 1.276,
|
5280 |
+
"step": 37600
|
5281 |
+
},
|
5282 |
+
{
|
5283 |
+
"epoch": 1.124115486818141,
|
5284 |
+
"grad_norm": 5.648976802825928,
|
5285 |
+
"learning_rate": 0.0001892445099686284,
|
5286 |
+
"loss": 1.3238,
|
5287 |
+
"step": 37650
|
5288 |
+
},
|
5289 |
+
{
|
5290 |
+
"epoch": 1.1256083360702236,
|
5291 |
+
"grad_norm": 4.185100078582764,
|
5292 |
+
"learning_rate": 0.00018923022417270956,
|
5293 |
+
"loss": 1.2848,
|
5294 |
+
"step": 37700
|
5295 |
+
},
|
5296 |
+
{
|
5297 |
+
"epoch": 1.127101185322306,
|
5298 |
+
"grad_norm": 4.676313877105713,
|
5299 |
+
"learning_rate": 0.00018921593837679072,
|
5300 |
+
"loss": 1.3038,
|
5301 |
+
"step": 37750
|
5302 |
+
},
|
5303 |
+
{
|
5304 |
+
"epoch": 1.1285940345743888,
|
5305 |
+
"grad_norm": 4.426568508148193,
|
5306 |
+
"learning_rate": 0.00018920165258087192,
|
5307 |
+
"loss": 1.3343,
|
5308 |
+
"step": 37800
|
5309 |
+
},
|
5310 |
+
{
|
5311 |
+
"epoch": 1.1300868838264713,
|
5312 |
+
"grad_norm": 4.887205600738525,
|
5313 |
+
"learning_rate": 0.00018918736678495305,
|
5314 |
+
"loss": 1.3456,
|
5315 |
+
"step": 37850
|
5316 |
+
},
|
5317 |
+
{
|
5318 |
+
"epoch": 1.1315797330785538,
|
5319 |
+
"grad_norm": 5.455615043640137,
|
5320 |
+
"learning_rate": 0.00018917308098903424,
|
5321 |
+
"loss": 1.3258,
|
5322 |
+
"step": 37900
|
5323 |
+
},
|
5324 |
+
{
|
5325 |
+
"epoch": 1.1330725823306362,
|
5326 |
+
"grad_norm": 5.676678657531738,
|
5327 |
+
"learning_rate": 0.00018915879519311538,
|
5328 |
+
"loss": 1.3702,
|
5329 |
+
"step": 37950
|
5330 |
+
},
|
5331 |
+
{
|
5332 |
+
"epoch": 1.1345654315827187,
|
5333 |
+
"grad_norm": 3.9528415203094482,
|
5334 |
+
"learning_rate": 0.00018914450939719657,
|
5335 |
+
"loss": 1.3097,
|
5336 |
+
"step": 38000
|
5337 |
+
},
|
5338 |
+
{
|
5339 |
+
"epoch": 1.1360582808348014,
|
5340 |
+
"grad_norm": 4.538026332855225,
|
5341 |
+
"learning_rate": 0.0001891302236012777,
|
5342 |
+
"loss": 1.316,
|
5343 |
+
"step": 38050
|
5344 |
+
},
|
5345 |
+
{
|
5346 |
+
"epoch": 1.137551130086884,
|
5347 |
+
"grad_norm": 4.46422815322876,
|
5348 |
+
"learning_rate": 0.0001891159378053589,
|
5349 |
+
"loss": 1.2479,
|
5350 |
+
"step": 38100
|
5351 |
+
},
|
5352 |
+
{
|
5353 |
+
"epoch": 1.1390439793389664,
|
5354 |
+
"grad_norm": 4.823958396911621,
|
5355 |
+
"learning_rate": 0.00018910165200944007,
|
5356 |
+
"loss": 1.2616,
|
5357 |
+
"step": 38150
|
5358 |
+
},
|
5359 |
+
{
|
5360 |
+
"epoch": 1.1405368285910489,
|
5361 |
+
"grad_norm": 5.7128167152404785,
|
5362 |
+
"learning_rate": 0.00018908736621352123,
|
5363 |
+
"loss": 1.2853,
|
5364 |
+
"step": 38200
|
5365 |
+
},
|
5366 |
+
{
|
5367 |
+
"epoch": 1.1420296778431314,
|
5368 |
+
"grad_norm": 3.5420877933502197,
|
5369 |
+
"learning_rate": 0.0001890730804176024,
|
5370 |
+
"loss": 1.3008,
|
5371 |
+
"step": 38250
|
5372 |
+
},
|
5373 |
+
{
|
5374 |
+
"epoch": 1.1435225270952138,
|
5375 |
+
"grad_norm": 4.617981910705566,
|
5376 |
+
"learning_rate": 0.00018905879462168356,
|
5377 |
+
"loss": 1.2888,
|
5378 |
+
"step": 38300
|
5379 |
+
},
|
5380 |
+
{
|
5381 |
+
"epoch": 1.1450153763472963,
|
5382 |
+
"grad_norm": 4.542544364929199,
|
5383 |
+
"learning_rate": 0.00018904450882576473,
|
5384 |
+
"loss": 1.3009,
|
5385 |
+
"step": 38350
|
5386 |
+
},
|
5387 |
+
{
|
5388 |
+
"epoch": 1.146508225599379,
|
5389 |
+
"grad_norm": 4.301428318023682,
|
5390 |
+
"learning_rate": 0.0001890302230298459,
|
5391 |
+
"loss": 1.2834,
|
5392 |
+
"step": 38400
|
5393 |
+
},
|
5394 |
+
{
|
5395 |
+
"epoch": 1.1480010748514615,
|
5396 |
+
"grad_norm": 4.136764049530029,
|
5397 |
+
"learning_rate": 0.00018901593723392706,
|
5398 |
+
"loss": 1.3895,
|
5399 |
+
"step": 38450
|
5400 |
+
},
|
5401 |
+
{
|
5402 |
+
"epoch": 1.149493924103544,
|
5403 |
+
"grad_norm": 5.59256649017334,
|
5404 |
+
"learning_rate": 0.00018900165143800822,
|
5405 |
+
"loss": 1.295,
|
5406 |
+
"step": 38500
|
5407 |
+
},
|
5408 |
+
{
|
5409 |
+
"epoch": 1.1509867733556265,
|
5410 |
+
"grad_norm": 3.4925365447998047,
|
5411 |
+
"learning_rate": 0.00018898736564208939,
|
5412 |
+
"loss": 1.3385,
|
5413 |
+
"step": 38550
|
5414 |
+
},
|
5415 |
+
{
|
5416 |
+
"epoch": 1.152479622607709,
|
5417 |
+
"grad_norm": 4.884555816650391,
|
5418 |
+
"learning_rate": 0.00018897307984617058,
|
5419 |
+
"loss": 1.2776,
|
5420 |
+
"step": 38600
|
5421 |
+
},
|
5422 |
+
{
|
5423 |
+
"epoch": 1.1539724718597917,
|
5424 |
+
"grad_norm": 3.866908550262451,
|
5425 |
+
"learning_rate": 0.00018895879405025171,
|
5426 |
+
"loss": 1.3173,
|
5427 |
+
"step": 38650
|
5428 |
+
},
|
5429 |
+
{
|
5430 |
+
"epoch": 1.1554653211118742,
|
5431 |
+
"grad_norm": 4.8104939460754395,
|
5432 |
+
"learning_rate": 0.0001889445082543329,
|
5433 |
+
"loss": 1.2988,
|
5434 |
+
"step": 38700
|
5435 |
+
},
|
5436 |
+
{
|
5437 |
+
"epoch": 1.1569581703639567,
|
5438 |
+
"grad_norm": 3.814675807952881,
|
5439 |
+
"learning_rate": 0.00018893022245841404,
|
5440 |
+
"loss": 1.348,
|
5441 |
+
"step": 38750
|
5442 |
+
},
|
5443 |
+
{
|
5444 |
+
"epoch": 1.1584510196160391,
|
5445 |
+
"grad_norm": 5.720306873321533,
|
5446 |
+
"learning_rate": 0.00018891593666249524,
|
5447 |
+
"loss": 1.2844,
|
5448 |
+
"step": 38800
|
5449 |
+
},
|
5450 |
+
{
|
5451 |
+
"epoch": 1.1599438688681216,
|
5452 |
+
"grad_norm": 4.06850528717041,
|
5453 |
+
"learning_rate": 0.00018890165086657637,
|
5454 |
+
"loss": 1.3514,
|
5455 |
+
"step": 38850
|
5456 |
+
},
|
5457 |
+
{
|
5458 |
+
"epoch": 1.1614367181202043,
|
5459 |
+
"grad_norm": 6.193358898162842,
|
5460 |
+
"learning_rate": 0.00018888736507065757,
|
5461 |
+
"loss": 1.3587,
|
5462 |
+
"step": 38900
|
5463 |
+
},
|
5464 |
+
{
|
5465 |
+
"epoch": 1.1629295673722868,
|
5466 |
+
"grad_norm": 4.8998212814331055,
|
5467 |
+
"learning_rate": 0.00018887307927473873,
|
5468 |
+
"loss": 1.317,
|
5469 |
+
"step": 38950
|
5470 |
+
},
|
5471 |
+
{
|
5472 |
+
"epoch": 1.1644224166243693,
|
5473 |
+
"grad_norm": 4.4293107986450195,
|
5474 |
+
"learning_rate": 0.0001888587934788199,
|
5475 |
+
"loss": 1.3473,
|
5476 |
+
"step": 39000
|
5477 |
+
},
|
5478 |
+
{
|
5479 |
+
"epoch": 1.1659152658764518,
|
5480 |
+
"grad_norm": 6.795536994934082,
|
5481 |
+
"learning_rate": 0.00018884450768290106,
|
5482 |
+
"loss": 1.2793,
|
5483 |
+
"step": 39050
|
5484 |
+
},
|
5485 |
+
{
|
5486 |
+
"epoch": 1.1674081151285343,
|
5487 |
+
"grad_norm": 4.158294200897217,
|
5488 |
+
"learning_rate": 0.00018883022188698222,
|
5489 |
+
"loss": 1.3175,
|
5490 |
+
"step": 39100
|
5491 |
+
},
|
5492 |
+
{
|
5493 |
+
"epoch": 1.168900964380617,
|
5494 |
+
"grad_norm": 5.839204788208008,
|
5495 |
+
"learning_rate": 0.0001888159360910634,
|
5496 |
+
"loss": 1.2931,
|
5497 |
+
"step": 39150
|
5498 |
+
},
|
5499 |
+
{
|
5500 |
+
"epoch": 1.1703938136326995,
|
5501 |
+
"grad_norm": 6.633917331695557,
|
5502 |
+
"learning_rate": 0.00018880165029514455,
|
5503 |
+
"loss": 1.3053,
|
5504 |
+
"step": 39200
|
5505 |
+
},
|
5506 |
+
{
|
5507 |
+
"epoch": 1.171886662884782,
|
5508 |
+
"grad_norm": 4.409125328063965,
|
5509 |
+
"learning_rate": 0.00018878736449922572,
|
5510 |
+
"loss": 1.3074,
|
5511 |
+
"step": 39250
|
5512 |
+
},
|
5513 |
+
{
|
5514 |
+
"epoch": 1.1733795121368644,
|
5515 |
+
"grad_norm": 4.820318698883057,
|
5516 |
+
"learning_rate": 0.00018877307870330688,
|
5517 |
+
"loss": 1.329,
|
5518 |
+
"step": 39300
|
5519 |
+
},
|
5520 |
+
{
|
5521 |
+
"epoch": 1.174872361388947,
|
5522 |
+
"grad_norm": 5.104337215423584,
|
5523 |
+
"learning_rate": 0.00018875879290738805,
|
5524 |
+
"loss": 1.2628,
|
5525 |
+
"step": 39350
|
5526 |
+
},
|
5527 |
+
{
|
5528 |
+
"epoch": 1.1763652106410294,
|
5529 |
+
"grad_norm": 5.449405670166016,
|
5530 |
+
"learning_rate": 0.00018874450711146924,
|
5531 |
+
"loss": 1.2672,
|
5532 |
+
"step": 39400
|
5533 |
+
},
|
5534 |
+
{
|
5535 |
+
"epoch": 1.1778580598931119,
|
5536 |
+
"grad_norm": 5.3521504402160645,
|
5537 |
+
"learning_rate": 0.00018873022131555038,
|
5538 |
+
"loss": 1.3236,
|
5539 |
+
"step": 39450
|
5540 |
+
},
|
5541 |
+
{
|
5542 |
+
"epoch": 1.1793509091451946,
|
5543 |
+
"grad_norm": 5.748175144195557,
|
5544 |
+
"learning_rate": 0.00018871593551963157,
|
5545 |
+
"loss": 1.284,
|
5546 |
+
"step": 39500
|
5547 |
+
},
|
5548 |
+
{
|
5549 |
+
"epoch": 1.180843758397277,
|
5550 |
+
"grad_norm": 4.418118953704834,
|
5551 |
+
"learning_rate": 0.0001887016497237127,
|
5552 |
+
"loss": 1.299,
|
5553 |
+
"step": 39550
|
5554 |
+
},
|
5555 |
+
{
|
5556 |
+
"epoch": 1.1823366076493596,
|
5557 |
+
"grad_norm": 4.267107009887695,
|
5558 |
+
"learning_rate": 0.0001886873639277939,
|
5559 |
+
"loss": 1.3287,
|
5560 |
+
"step": 39600
|
5561 |
+
},
|
5562 |
+
{
|
5563 |
+
"epoch": 1.183829456901442,
|
5564 |
+
"grad_norm": 4.607259273529053,
|
5565 |
+
"learning_rate": 0.00018867307813187503,
|
5566 |
+
"loss": 1.3087,
|
5567 |
+
"step": 39650
|
5568 |
+
},
|
5569 |
+
{
|
5570 |
+
"epoch": 1.1853223061535245,
|
5571 |
+
"grad_norm": 4.254667282104492,
|
5572 |
+
"learning_rate": 0.00018865879233595623,
|
5573 |
+
"loss": 1.3015,
|
5574 |
+
"step": 39700
|
5575 |
+
},
|
5576 |
+
{
|
5577 |
+
"epoch": 1.1868151554056072,
|
5578 |
+
"grad_norm": 5.307114124298096,
|
5579 |
+
"learning_rate": 0.0001886445065400374,
|
5580 |
+
"loss": 1.2802,
|
5581 |
+
"step": 39750
|
5582 |
+
},
|
5583 |
+
{
|
5584 |
+
"epoch": 1.1883080046576897,
|
5585 |
+
"grad_norm": 3.9305639266967773,
|
5586 |
+
"learning_rate": 0.00018863022074411856,
|
5587 |
+
"loss": 1.2975,
|
5588 |
+
"step": 39800
|
5589 |
+
},
|
5590 |
+
{
|
5591 |
+
"epoch": 1.1898008539097722,
|
5592 |
+
"grad_norm": 4.650544166564941,
|
5593 |
+
"learning_rate": 0.00018861593494819972,
|
5594 |
+
"loss": 1.3239,
|
5595 |
+
"step": 39850
|
5596 |
+
},
|
5597 |
+
{
|
5598 |
+
"epoch": 1.1912937031618547,
|
5599 |
+
"grad_norm": 4.182717800140381,
|
5600 |
+
"learning_rate": 0.00018860164915228086,
|
5601 |
+
"loss": 1.3948,
|
5602 |
+
"step": 39900
|
5603 |
+
},
|
5604 |
+
{
|
5605 |
+
"epoch": 1.1927865524139372,
|
5606 |
+
"grad_norm": 5.322524547576904,
|
5607 |
+
"learning_rate": 0.00018858736335636205,
|
5608 |
+
"loss": 1.3411,
|
5609 |
+
"step": 39950
|
5610 |
+
},
|
5611 |
+
{
|
5612 |
+
"epoch": 1.1942794016660199,
|
5613 |
+
"grad_norm": 5.221969127655029,
|
5614 |
+
"learning_rate": 0.0001885730775604432,
|
5615 |
+
"loss": 1.3047,
|
5616 |
+
"step": 40000
|
5617 |
}
|
5618 |
],
|
5619 |
"logging_steps": 50,
|
|
|
5633 |
"attributes": {}
|
5634 |
}
|
5635 |
},
|
5636 |
+
"total_flos": 1.011899217623384e+18,
|
5637 |
"train_batch_size": 2,
|
5638 |
"trial_name": null,
|
5639 |
"trial_params": null
|