Training in progress, step 2821, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 677271474
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e71acb3aaa1cbdb9a2608831f36ed6bd2ad90dc878ebc4f574cf1abe8b87ef8a
|
3 |
size 677271474
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1354738888
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22b9676249e1f23320872babd4fdfdfb7911603e377235deeb7766fd71f1fb3d
|
3 |
size 1354738888
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ed5a074d47e09b0dc453a6efb17fbfeca03488f605d01a314d755493b7d1cbd
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1cd6b5f2dd07fcbe6072acae46c112970452c1ec1fd434ff82fc7fc43f301d19
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -18410,6 +18410,1350 @@
|
|
18410 |
"learning_rate": 1.22350548971622e-06,
|
18411 |
"loss": 8.0504,
|
18412 |
"step": 2629
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18413 |
}
|
18414 |
],
|
18415 |
"logging_steps": 1,
|
@@ -18424,12 +19768,12 @@
|
|
18424 |
"should_evaluate": false,
|
18425 |
"should_log": false,
|
18426 |
"should_save": true,
|
18427 |
-
"should_training_stop":
|
18428 |
},
|
18429 |
"attributes": {}
|
18430 |
}
|
18431 |
},
|
18432 |
-
"total_flos":
|
18433 |
"train_batch_size": 4,
|
18434 |
"trial_name": null,
|
18435 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9997342074953486,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2821,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
18410 |
"learning_rate": 1.22350548971622e-06,
|
18411 |
"loss": 8.0504,
|
18412 |
"step": 2629
|
18413 |
+
},
|
18414 |
+
{
|
18415 |
+
"epoch": 0.9320457163108,
|
18416 |
+
"grad_norm": 10.457837104797363,
|
18417 |
+
"learning_rate": 1.2108453883011905e-06,
|
18418 |
+
"loss": 7.9261,
|
18419 |
+
"step": 2630
|
18420 |
+
},
|
18421 |
+
{
|
18422 |
+
"epoch": 0.9324001063170019,
|
18423 |
+
"grad_norm": 10.277976989746094,
|
18424 |
+
"learning_rate": 1.1982503246958044e-06,
|
18425 |
+
"loss": 7.3617,
|
18426 |
+
"step": 2631
|
18427 |
+
},
|
18428 |
+
{
|
18429 |
+
"epoch": 0.9327544963232037,
|
18430 |
+
"grad_norm": 10.135127067565918,
|
18431 |
+
"learning_rate": 1.1857203156897646e-06,
|
18432 |
+
"loss": 6.6617,
|
18433 |
+
"step": 2632
|
18434 |
+
},
|
18435 |
+
{
|
18436 |
+
"epoch": 0.9331088863294055,
|
18437 |
+
"grad_norm": 9.504755973815918,
|
18438 |
+
"learning_rate": 1.1732553779860544e-06,
|
18439 |
+
"loss": 6.8578,
|
18440 |
+
"step": 2633
|
18441 |
+
},
|
18442 |
+
{
|
18443 |
+
"epoch": 0.9334632763356073,
|
18444 |
+
"grad_norm": 9.920090675354004,
|
18445 |
+
"learning_rate": 1.1608555282009048e-06,
|
18446 |
+
"loss": 6.782,
|
18447 |
+
"step": 2634
|
18448 |
+
},
|
18449 |
+
{
|
18450 |
+
"epoch": 0.9338176663418092,
|
18451 |
+
"grad_norm": 10.654485702514648,
|
18452 |
+
"learning_rate": 1.148520782863799e-06,
|
18453 |
+
"loss": 7.0663,
|
18454 |
+
"step": 2635
|
18455 |
+
},
|
18456 |
+
{
|
18457 |
+
"epoch": 0.934172056348011,
|
18458 |
+
"grad_norm": 11.514944076538086,
|
18459 |
+
"learning_rate": 1.1362511584174173e-06,
|
18460 |
+
"loss": 7.9396,
|
18461 |
+
"step": 2636
|
18462 |
+
},
|
18463 |
+
{
|
18464 |
+
"epoch": 0.9345264463542128,
|
18465 |
+
"grad_norm": 9.572590827941895,
|
18466 |
+
"learning_rate": 1.1240466712176379e-06,
|
18467 |
+
"loss": 7.6735,
|
18468 |
+
"step": 2637
|
18469 |
+
},
|
18470 |
+
{
|
18471 |
+
"epoch": 0.9348808363604146,
|
18472 |
+
"grad_norm": 10.75777816772461,
|
18473 |
+
"learning_rate": 1.1119073375335132e-06,
|
18474 |
+
"loss": 6.4615,
|
18475 |
+
"step": 2638
|
18476 |
+
},
|
18477 |
+
{
|
18478 |
+
"epoch": 0.9352352263666165,
|
18479 |
+
"grad_norm": 10.495604515075684,
|
18480 |
+
"learning_rate": 1.099833173547249e-06,
|
18481 |
+
"loss": 6.7071,
|
18482 |
+
"step": 2639
|
18483 |
+
},
|
18484 |
+
{
|
18485 |
+
"epoch": 0.9355896163728182,
|
18486 |
+
"grad_norm": 10.387083053588867,
|
18487 |
+
"learning_rate": 1.0878241953541478e-06,
|
18488 |
+
"loss": 6.2487,
|
18489 |
+
"step": 2640
|
18490 |
+
},
|
18491 |
+
{
|
18492 |
+
"epoch": 0.9359440063790201,
|
18493 |
+
"grad_norm": 11.0549955368042,
|
18494 |
+
"learning_rate": 1.0758804189626492e-06,
|
18495 |
+
"loss": 7.5193,
|
18496 |
+
"step": 2641
|
18497 |
+
},
|
18498 |
+
{
|
18499 |
+
"epoch": 0.9362983963852219,
|
18500 |
+
"grad_norm": 9.914316177368164,
|
18501 |
+
"learning_rate": 1.0640018602942614e-06,
|
18502 |
+
"loss": 7.0707,
|
18503 |
+
"step": 2642
|
18504 |
+
},
|
18505 |
+
{
|
18506 |
+
"epoch": 0.9366527863914238,
|
18507 |
+
"grad_norm": 9.522320747375488,
|
18508 |
+
"learning_rate": 1.0521885351835625e-06,
|
18509 |
+
"loss": 6.3052,
|
18510 |
+
"step": 2643
|
18511 |
+
},
|
18512 |
+
{
|
18513 |
+
"epoch": 0.9370071763976255,
|
18514 |
+
"grad_norm": 10.41584300994873,
|
18515 |
+
"learning_rate": 1.0404404593781559e-06,
|
18516 |
+
"loss": 8.4116,
|
18517 |
+
"step": 2644
|
18518 |
+
},
|
18519 |
+
{
|
18520 |
+
"epoch": 0.9373615664038274,
|
18521 |
+
"grad_norm": 10.059273719787598,
|
18522 |
+
"learning_rate": 1.0287576485386864e-06,
|
18523 |
+
"loss": 6.749,
|
18524 |
+
"step": 2645
|
18525 |
+
},
|
18526 |
+
{
|
18527 |
+
"epoch": 0.9377159564100292,
|
18528 |
+
"grad_norm": 11.211931228637695,
|
18529 |
+
"learning_rate": 1.0171401182387852e-06,
|
18530 |
+
"loss": 7.9731,
|
18531 |
+
"step": 2646
|
18532 |
+
},
|
18533 |
+
{
|
18534 |
+
"epoch": 0.9380703464162311,
|
18535 |
+
"grad_norm": 10.676755905151367,
|
18536 |
+
"learning_rate": 1.005587883965059e-06,
|
18537 |
+
"loss": 7.7199,
|
18538 |
+
"step": 2647
|
18539 |
+
},
|
18540 |
+
{
|
18541 |
+
"epoch": 0.9384247364224328,
|
18542 |
+
"grad_norm": 10.626141548156738,
|
18543 |
+
"learning_rate": 9.941009611170838e-07,
|
18544 |
+
"loss": 7.326,
|
18545 |
+
"step": 2648
|
18546 |
+
},
|
18547 |
+
{
|
18548 |
+
"epoch": 0.9387791264286347,
|
18549 |
+
"grad_norm": 11.591911315917969,
|
18550 |
+
"learning_rate": 9.826793650073606e-07,
|
18551 |
+
"loss": 7.0318,
|
18552 |
+
"step": 2649
|
18553 |
+
},
|
18554 |
+
{
|
18555 |
+
"epoch": 0.9391335164348366,
|
18556 |
+
"grad_norm": 11.885016441345215,
|
18557 |
+
"learning_rate": 9.713231108613274e-07,
|
18558 |
+
"loss": 6.9919,
|
18559 |
+
"step": 2650
|
18560 |
+
},
|
18561 |
+
{
|
18562 |
+
"epoch": 0.9394879064410384,
|
18563 |
+
"grad_norm": 6.760797500610352,
|
18564 |
+
"learning_rate": 9.600322138172968e-07,
|
18565 |
+
"loss": 9.4879,
|
18566 |
+
"step": 2651
|
18567 |
+
},
|
18568 |
+
{
|
18569 |
+
"epoch": 0.9398422964472402,
|
18570 |
+
"grad_norm": 7.657442569732666,
|
18571 |
+
"learning_rate": 9.488066889264624e-07,
|
18572 |
+
"loss": 9.0172,
|
18573 |
+
"step": 2652
|
18574 |
+
},
|
18575 |
+
{
|
18576 |
+
"epoch": 0.940196686453442,
|
18577 |
+
"grad_norm": 8.542561531066895,
|
18578 |
+
"learning_rate": 9.376465511528876e-07,
|
18579 |
+
"loss": 9.5194,
|
18580 |
+
"step": 2653
|
18581 |
+
},
|
18582 |
+
{
|
18583 |
+
"epoch": 0.9405510764596439,
|
18584 |
+
"grad_norm": 8.692965507507324,
|
18585 |
+
"learning_rate": 9.265518153734498e-07,
|
18586 |
+
"loss": 8.0484,
|
18587 |
+
"step": 2654
|
18588 |
+
},
|
18589 |
+
{
|
18590 |
+
"epoch": 0.9409054664658456,
|
18591 |
+
"grad_norm": 9.451211929321289,
|
18592 |
+
"learning_rate": 9.155224963778685e-07,
|
18593 |
+
"loss": 8.7355,
|
18594 |
+
"step": 2655
|
18595 |
+
},
|
18596 |
+
{
|
18597 |
+
"epoch": 0.9412598564720475,
|
18598 |
+
"grad_norm": 9.57262134552002,
|
18599 |
+
"learning_rate": 9.045586088686497e-07,
|
18600 |
+
"loss": 7.5628,
|
18601 |
+
"step": 2656
|
18602 |
+
},
|
18603 |
+
{
|
18604 |
+
"epoch": 0.9416142464782493,
|
18605 |
+
"grad_norm": 10.283690452575684,
|
18606 |
+
"learning_rate": 8.936601674610634e-07,
|
18607 |
+
"loss": 7.0982,
|
18608 |
+
"step": 2657
|
18609 |
+
},
|
18610 |
+
{
|
18611 |
+
"epoch": 0.9419686364844512,
|
18612 |
+
"grad_norm": 10.907401084899902,
|
18613 |
+
"learning_rate": 8.82827186683155e-07,
|
18614 |
+
"loss": 8.2248,
|
18615 |
+
"step": 2658
|
18616 |
+
},
|
18617 |
+
{
|
18618 |
+
"epoch": 0.9423230264906529,
|
18619 |
+
"grad_norm": 12.737578392028809,
|
18620 |
+
"learning_rate": 8.720596809757064e-07,
|
18621 |
+
"loss": 9.0023,
|
18622 |
+
"step": 2659
|
18623 |
+
},
|
18624 |
+
{
|
18625 |
+
"epoch": 0.9426774164968548,
|
18626 |
+
"grad_norm": 13.0936279296875,
|
18627 |
+
"learning_rate": 8.613576646922083e-07,
|
18628 |
+
"loss": 7.4829,
|
18629 |
+
"step": 2660
|
18630 |
+
},
|
18631 |
+
{
|
18632 |
+
"epoch": 0.9430318065030566,
|
18633 |
+
"grad_norm": 10.905577659606934,
|
18634 |
+
"learning_rate": 8.5072115209886e-07,
|
18635 |
+
"loss": 7.4625,
|
18636 |
+
"step": 2661
|
18637 |
+
},
|
18638 |
+
{
|
18639 |
+
"epoch": 0.9433861965092585,
|
18640 |
+
"grad_norm": 8.523266792297363,
|
18641 |
+
"learning_rate": 8.401501573745363e-07,
|
18642 |
+
"loss": 7.4627,
|
18643 |
+
"step": 2662
|
18644 |
+
},
|
18645 |
+
{
|
18646 |
+
"epoch": 0.9437405865154602,
|
18647 |
+
"grad_norm": 9.642807960510254,
|
18648 |
+
"learning_rate": 8.296446946107817e-07,
|
18649 |
+
"loss": 8.6972,
|
18650 |
+
"step": 2663
|
18651 |
+
},
|
18652 |
+
{
|
18653 |
+
"epoch": 0.9440949765216621,
|
18654 |
+
"grad_norm": 9.578781127929688,
|
18655 |
+
"learning_rate": 8.192047778117828e-07,
|
18656 |
+
"loss": 7.3822,
|
18657 |
+
"step": 2664
|
18658 |
+
},
|
18659 |
+
{
|
18660 |
+
"epoch": 0.9444493665278639,
|
18661 |
+
"grad_norm": 9.447463989257812,
|
18662 |
+
"learning_rate": 8.088304208943409e-07,
|
18663 |
+
"loss": 8.0613,
|
18664 |
+
"step": 2665
|
18665 |
+
},
|
18666 |
+
{
|
18667 |
+
"epoch": 0.9448037565340658,
|
18668 |
+
"grad_norm": 8.89419937133789,
|
18669 |
+
"learning_rate": 7.985216376878823e-07,
|
18670 |
+
"loss": 7.4979,
|
18671 |
+
"step": 2666
|
18672 |
+
},
|
18673 |
+
{
|
18674 |
+
"epoch": 0.9451581465402675,
|
18675 |
+
"grad_norm": 8.579427719116211,
|
18676 |
+
"learning_rate": 7.882784419343925e-07,
|
18677 |
+
"loss": 8.4803,
|
18678 |
+
"step": 2667
|
18679 |
+
},
|
18680 |
+
{
|
18681 |
+
"epoch": 0.9455125365464694,
|
18682 |
+
"grad_norm": 9.972301483154297,
|
18683 |
+
"learning_rate": 7.781008472884543e-07,
|
18684 |
+
"loss": 8.2678,
|
18685 |
+
"step": 2668
|
18686 |
+
},
|
18687 |
+
{
|
18688 |
+
"epoch": 0.9458669265526712,
|
18689 |
+
"grad_norm": 9.519344329833984,
|
18690 |
+
"learning_rate": 7.679888673171875e-07,
|
18691 |
+
"loss": 8.1661,
|
18692 |
+
"step": 2669
|
18693 |
+
},
|
18694 |
+
{
|
18695 |
+
"epoch": 0.9462213165588731,
|
18696 |
+
"grad_norm": 10.169025421142578,
|
18697 |
+
"learning_rate": 7.579425155002484e-07,
|
18698 |
+
"loss": 7.7365,
|
18699 |
+
"step": 2670
|
18700 |
+
},
|
18701 |
+
{
|
18702 |
+
"epoch": 0.9465757065650748,
|
18703 |
+
"grad_norm": 9.821728706359863,
|
18704 |
+
"learning_rate": 7.479618052298132e-07,
|
18705 |
+
"loss": 7.0132,
|
18706 |
+
"step": 2671
|
18707 |
+
},
|
18708 |
+
{
|
18709 |
+
"epoch": 0.9469300965712767,
|
18710 |
+
"grad_norm": 10.060563087463379,
|
18711 |
+
"learning_rate": 7.380467498105448e-07,
|
18712 |
+
"loss": 8.3833,
|
18713 |
+
"step": 2672
|
18714 |
+
},
|
18715 |
+
{
|
18716 |
+
"epoch": 0.9472844865774785,
|
18717 |
+
"grad_norm": 10.54714584350586,
|
18718 |
+
"learning_rate": 7.281973624595928e-07,
|
18719 |
+
"loss": 8.1672,
|
18720 |
+
"step": 2673
|
18721 |
+
},
|
18722 |
+
{
|
18723 |
+
"epoch": 0.9476388765836803,
|
18724 |
+
"grad_norm": 9.718517303466797,
|
18725 |
+
"learning_rate": 7.184136563065714e-07,
|
18726 |
+
"loss": 8.3017,
|
18727 |
+
"step": 2674
|
18728 |
+
},
|
18729 |
+
{
|
18730 |
+
"epoch": 0.9479932665898821,
|
18731 |
+
"grad_norm": 9.251363754272461,
|
18732 |
+
"learning_rate": 7.086956443935255e-07,
|
18733 |
+
"loss": 6.7159,
|
18734 |
+
"step": 2675
|
18735 |
+
},
|
18736 |
+
{
|
18737 |
+
"epoch": 0.948347656596084,
|
18738 |
+
"grad_norm": 10.097101211547852,
|
18739 |
+
"learning_rate": 6.990433396749429e-07,
|
18740 |
+
"loss": 7.9204,
|
18741 |
+
"step": 2676
|
18742 |
+
},
|
18743 |
+
{
|
18744 |
+
"epoch": 0.9487020466022859,
|
18745 |
+
"grad_norm": 9.57497501373291,
|
18746 |
+
"learning_rate": 6.894567550177145e-07,
|
18747 |
+
"loss": 8.0954,
|
18748 |
+
"step": 2677
|
18749 |
+
},
|
18750 |
+
{
|
18751 |
+
"epoch": 0.9490564366084876,
|
18752 |
+
"grad_norm": 9.721705436706543,
|
18753 |
+
"learning_rate": 6.799359032011343e-07,
|
18754 |
+
"loss": 7.5147,
|
18755 |
+
"step": 2678
|
18756 |
+
},
|
18757 |
+
{
|
18758 |
+
"epoch": 0.9494108266146895,
|
18759 |
+
"grad_norm": 9.056941986083984,
|
18760 |
+
"learning_rate": 6.704807969168447e-07,
|
18761 |
+
"loss": 7.4122,
|
18762 |
+
"step": 2679
|
18763 |
+
},
|
18764 |
+
{
|
18765 |
+
"epoch": 0.9497652166208913,
|
18766 |
+
"grad_norm": 9.660868644714355,
|
18767 |
+
"learning_rate": 6.610914487688691e-07,
|
18768 |
+
"loss": 8.4362,
|
18769 |
+
"step": 2680
|
18770 |
+
},
|
18771 |
+
{
|
18772 |
+
"epoch": 0.9501196066270932,
|
18773 |
+
"grad_norm": 8.478535652160645,
|
18774 |
+
"learning_rate": 6.517678712735786e-07,
|
18775 |
+
"loss": 6.5174,
|
18776 |
+
"step": 2681
|
18777 |
+
},
|
18778 |
+
{
|
18779 |
+
"epoch": 0.9504739966332949,
|
18780 |
+
"grad_norm": 10.852248191833496,
|
18781 |
+
"learning_rate": 6.425100768596481e-07,
|
18782 |
+
"loss": 7.8922,
|
18783 |
+
"step": 2682
|
18784 |
+
},
|
18785 |
+
{
|
18786 |
+
"epoch": 0.9508283866394968,
|
18787 |
+
"grad_norm": 10.073144912719727,
|
18788 |
+
"learning_rate": 6.333180778680725e-07,
|
18789 |
+
"loss": 7.5269,
|
18790 |
+
"step": 2683
|
18791 |
+
},
|
18792 |
+
{
|
18793 |
+
"epoch": 0.9511827766456986,
|
18794 |
+
"grad_norm": 8.856159210205078,
|
18795 |
+
"learning_rate": 6.241918865521446e-07,
|
18796 |
+
"loss": 7.8673,
|
18797 |
+
"step": 2684
|
18798 |
+
},
|
18799 |
+
{
|
18800 |
+
"epoch": 0.9515371666519005,
|
18801 |
+
"grad_norm": 10.096376419067383,
|
18802 |
+
"learning_rate": 6.151315150774162e-07,
|
18803 |
+
"loss": 7.0726,
|
18804 |
+
"step": 2685
|
18805 |
+
},
|
18806 |
+
{
|
18807 |
+
"epoch": 0.9518915566581022,
|
18808 |
+
"grad_norm": 10.148407936096191,
|
18809 |
+
"learning_rate": 6.06136975521715e-07,
|
18810 |
+
"loss": 7.3849,
|
18811 |
+
"step": 2686
|
18812 |
+
},
|
18813 |
+
{
|
18814 |
+
"epoch": 0.9522459466643041,
|
18815 |
+
"grad_norm": 10.342374801635742,
|
18816 |
+
"learning_rate": 5.972082798751056e-07,
|
18817 |
+
"loss": 7.6771,
|
18818 |
+
"step": 2687
|
18819 |
+
},
|
18820 |
+
{
|
18821 |
+
"epoch": 0.9526003366705059,
|
18822 |
+
"grad_norm": 10.118182182312012,
|
18823 |
+
"learning_rate": 5.883454400398891e-07,
|
18824 |
+
"loss": 8.3771,
|
18825 |
+
"step": 2688
|
18826 |
+
},
|
18827 |
+
{
|
18828 |
+
"epoch": 0.9529547266767077,
|
18829 |
+
"grad_norm": 9.883116722106934,
|
18830 |
+
"learning_rate": 5.795484678305541e-07,
|
18831 |
+
"loss": 7.5366,
|
18832 |
+
"step": 2689
|
18833 |
+
},
|
18834 |
+
{
|
18835 |
+
"epoch": 0.9533091166829095,
|
18836 |
+
"grad_norm": 10.128074645996094,
|
18837 |
+
"learning_rate": 5.708173749738143e-07,
|
18838 |
+
"loss": 7.0381,
|
18839 |
+
"step": 2690
|
18840 |
+
},
|
18841 |
+
{
|
18842 |
+
"epoch": 0.9536635066891114,
|
18843 |
+
"grad_norm": 10.316349983215332,
|
18844 |
+
"learning_rate": 5.621521731085433e-07,
|
18845 |
+
"loss": 7.694,
|
18846 |
+
"step": 2691
|
18847 |
+
},
|
18848 |
+
{
|
18849 |
+
"epoch": 0.9540178966953132,
|
18850 |
+
"grad_norm": 10.563957214355469,
|
18851 |
+
"learning_rate": 5.535528737857898e-07,
|
18852 |
+
"loss": 7.7825,
|
18853 |
+
"step": 2692
|
18854 |
+
},
|
18855 |
+
{
|
18856 |
+
"epoch": 0.954372286701515,
|
18857 |
+
"grad_norm": 10.890534400939941,
|
18858 |
+
"learning_rate": 5.450194884687509e-07,
|
18859 |
+
"loss": 6.7118,
|
18860 |
+
"step": 2693
|
18861 |
+
},
|
18862 |
+
{
|
18863 |
+
"epoch": 0.9547266767077168,
|
18864 |
+
"grad_norm": 9.884705543518066,
|
18865 |
+
"learning_rate": 5.365520285327441e-07,
|
18866 |
+
"loss": 5.9642,
|
18867 |
+
"step": 2694
|
18868 |
+
},
|
18869 |
+
{
|
18870 |
+
"epoch": 0.9550810667139187,
|
18871 |
+
"grad_norm": 10.200454711914062,
|
18872 |
+
"learning_rate": 5.281505052652347e-07,
|
18873 |
+
"loss": 7.461,
|
18874 |
+
"step": 2695
|
18875 |
+
},
|
18876 |
+
{
|
18877 |
+
"epoch": 0.9554354567201205,
|
18878 |
+
"grad_norm": 10.992594718933105,
|
18879 |
+
"learning_rate": 5.19814929865764e-07,
|
18880 |
+
"loss": 7.0572,
|
18881 |
+
"step": 2696
|
18882 |
+
},
|
18883 |
+
{
|
18884 |
+
"epoch": 0.9557898467263223,
|
18885 |
+
"grad_norm": 11.103619575500488,
|
18886 |
+
"learning_rate": 5.115453134459769e-07,
|
18887 |
+
"loss": 6.8733,
|
18888 |
+
"step": 2697
|
18889 |
+
},
|
18890 |
+
{
|
18891 |
+
"epoch": 0.9561442367325241,
|
18892 |
+
"grad_norm": 11.309475898742676,
|
18893 |
+
"learning_rate": 5.033416670295832e-07,
|
18894 |
+
"loss": 8.2346,
|
18895 |
+
"step": 2698
|
18896 |
+
},
|
18897 |
+
{
|
18898 |
+
"epoch": 0.956498626738726,
|
18899 |
+
"grad_norm": 11.035356521606445,
|
18900 |
+
"learning_rate": 4.952040015523629e-07,
|
18901 |
+
"loss": 6.114,
|
18902 |
+
"step": 2699
|
18903 |
+
},
|
18904 |
+
{
|
18905 |
+
"epoch": 0.9568530167449278,
|
18906 |
+
"grad_norm": 12.30701732635498,
|
18907 |
+
"learning_rate": 4.871323278621331e-07,
|
18908 |
+
"loss": 7.3802,
|
18909 |
+
"step": 2700
|
18910 |
+
},
|
18911 |
+
{
|
18912 |
+
"epoch": 0.9572074067511296,
|
18913 |
+
"grad_norm": 6.6567301750183105,
|
18914 |
+
"learning_rate": 4.791266567187424e-07,
|
18915 |
+
"loss": 8.8847,
|
18916 |
+
"step": 2701
|
18917 |
+
},
|
18918 |
+
{
|
18919 |
+
"epoch": 0.9575617967573314,
|
18920 |
+
"grad_norm": 7.835629940032959,
|
18921 |
+
"learning_rate": 4.711869987940598e-07,
|
18922 |
+
"loss": 8.6423,
|
18923 |
+
"step": 2702
|
18924 |
+
},
|
18925 |
+
{
|
18926 |
+
"epoch": 0.9579161867635333,
|
18927 |
+
"grad_norm": 9.732732772827148,
|
18928 |
+
"learning_rate": 4.633133646719523e-07,
|
18929 |
+
"loss": 10.0649,
|
18930 |
+
"step": 2703
|
18931 |
+
},
|
18932 |
+
{
|
18933 |
+
"epoch": 0.958270576769735,
|
18934 |
+
"grad_norm": 9.80262279510498,
|
18935 |
+
"learning_rate": 4.5550576484827414e-07,
|
18936 |
+
"loss": 9.8086,
|
18937 |
+
"step": 2704
|
18938 |
+
},
|
18939 |
+
{
|
18940 |
+
"epoch": 0.9586249667759369,
|
18941 |
+
"grad_norm": 9.661641120910645,
|
18942 |
+
"learning_rate": 4.477642097308499e-07,
|
18943 |
+
"loss": 8.6068,
|
18944 |
+
"step": 2705
|
18945 |
+
},
|
18946 |
+
{
|
18947 |
+
"epoch": 0.9589793567821387,
|
18948 |
+
"grad_norm": 9.993663787841797,
|
18949 |
+
"learning_rate": 4.400887096394801e-07,
|
18950 |
+
"loss": 7.7553,
|
18951 |
+
"step": 2706
|
18952 |
+
},
|
18953 |
+
{
|
18954 |
+
"epoch": 0.9593337467883406,
|
18955 |
+
"grad_norm": 11.445382118225098,
|
18956 |
+
"learning_rate": 4.324792748058915e-07,
|
18957 |
+
"loss": 8.1653,
|
18958 |
+
"step": 2707
|
18959 |
+
},
|
18960 |
+
{
|
18961 |
+
"epoch": 0.9596881367945423,
|
18962 |
+
"grad_norm": 11.129366874694824,
|
18963 |
+
"learning_rate": 4.249359153737531e-07,
|
18964 |
+
"loss": 6.551,
|
18965 |
+
"step": 2708
|
18966 |
+
},
|
18967 |
+
{
|
18968 |
+
"epoch": 0.9600425268007442,
|
18969 |
+
"grad_norm": 11.85264778137207,
|
18970 |
+
"learning_rate": 4.1745864139865476e-07,
|
18971 |
+
"loss": 7.6299,
|
18972 |
+
"step": 2709
|
18973 |
+
},
|
18974 |
+
{
|
18975 |
+
"epoch": 0.960396916806946,
|
18976 |
+
"grad_norm": 12.840014457702637,
|
18977 |
+
"learning_rate": 4.100474628480844e-07,
|
18978 |
+
"loss": 8.1851,
|
18979 |
+
"step": 2710
|
18980 |
+
},
|
18981 |
+
{
|
18982 |
+
"epoch": 0.9607513068131479,
|
18983 |
+
"grad_norm": 10.203207015991211,
|
18984 |
+
"learning_rate": 4.0270238960142813e-07,
|
18985 |
+
"loss": 7.9981,
|
18986 |
+
"step": 2711
|
18987 |
+
},
|
18988 |
+
{
|
18989 |
+
"epoch": 0.9611056968193497,
|
18990 |
+
"grad_norm": 9.188422203063965,
|
18991 |
+
"learning_rate": 3.954234314499539e-07,
|
18992 |
+
"loss": 7.931,
|
18993 |
+
"step": 2712
|
18994 |
+
},
|
18995 |
+
{
|
18996 |
+
"epoch": 0.9614600868255515,
|
18997 |
+
"grad_norm": 9.173449516296387,
|
18998 |
+
"learning_rate": 3.8821059809678315e-07,
|
18999 |
+
"loss": 8.5163,
|
19000 |
+
"step": 2713
|
19001 |
+
},
|
19002 |
+
{
|
19003 |
+
"epoch": 0.9618144768317534,
|
19004 |
+
"grad_norm": 9.872574806213379,
|
19005 |
+
"learning_rate": 3.8106389915690264e-07,
|
19006 |
+
"loss": 7.8934,
|
19007 |
+
"step": 2714
|
19008 |
+
},
|
19009 |
+
{
|
19010 |
+
"epoch": 0.9621688668379552,
|
19011 |
+
"grad_norm": 8.945125579833984,
|
19012 |
+
"learning_rate": 3.7398334415714163e-07,
|
19013 |
+
"loss": 8.1885,
|
19014 |
+
"step": 2715
|
19015 |
+
},
|
19016 |
+
{
|
19017 |
+
"epoch": 0.962523256844157,
|
19018 |
+
"grad_norm": 10.056449890136719,
|
19019 |
+
"learning_rate": 3.6696894253614447e-07,
|
19020 |
+
"loss": 7.7838,
|
19021 |
+
"step": 2716
|
19022 |
+
},
|
19023 |
+
{
|
19024 |
+
"epoch": 0.9628776468503588,
|
19025 |
+
"grad_norm": 9.586847305297852,
|
19026 |
+
"learning_rate": 3.600207036443759e-07,
|
19027 |
+
"loss": 7.3825,
|
19028 |
+
"step": 2717
|
19029 |
+
},
|
19030 |
+
{
|
19031 |
+
"epoch": 0.9632320368565607,
|
19032 |
+
"grad_norm": 9.794684410095215,
|
19033 |
+
"learning_rate": 3.5313863674410476e-07,
|
19034 |
+
"loss": 8.0346,
|
19035 |
+
"step": 2718
|
19036 |
+
},
|
19037 |
+
{
|
19038 |
+
"epoch": 0.9635864268627625,
|
19039 |
+
"grad_norm": 9.196282386779785,
|
19040 |
+
"learning_rate": 3.463227510093925e-07,
|
19041 |
+
"loss": 6.8752,
|
19042 |
+
"step": 2719
|
19043 |
+
},
|
19044 |
+
{
|
19045 |
+
"epoch": 0.9639408168689643,
|
19046 |
+
"grad_norm": 9.417471885681152,
|
19047 |
+
"learning_rate": 3.3957305552607123e-07,
|
19048 |
+
"loss": 7.9057,
|
19049 |
+
"step": 2720
|
19050 |
+
},
|
19051 |
+
{
|
19052 |
+
"epoch": 0.9642952068751661,
|
19053 |
+
"grad_norm": 9.390932083129883,
|
19054 |
+
"learning_rate": 3.328895592917491e-07,
|
19055 |
+
"loss": 8.1303,
|
19056 |
+
"step": 2721
|
19057 |
+
},
|
19058 |
+
{
|
19059 |
+
"epoch": 0.964649596881368,
|
19060 |
+
"grad_norm": 9.340128898620605,
|
19061 |
+
"learning_rate": 3.262722712157773e-07,
|
19062 |
+
"loss": 7.6802,
|
19063 |
+
"step": 2722
|
19064 |
+
},
|
19065 |
+
{
|
19066 |
+
"epoch": 0.9650039868875697,
|
19067 |
+
"grad_norm": 10.40151309967041,
|
19068 |
+
"learning_rate": 3.197212001192551e-07,
|
19069 |
+
"loss": 7.4673,
|
19070 |
+
"step": 2723
|
19071 |
+
},
|
19072 |
+
{
|
19073 |
+
"epoch": 0.9653583768937716,
|
19074 |
+
"grad_norm": 9.127900123596191,
|
19075 |
+
"learning_rate": 3.1323635473501366e-07,
|
19076 |
+
"loss": 7.2802,
|
19077 |
+
"step": 2724
|
19078 |
+
},
|
19079 |
+
{
|
19080 |
+
"epoch": 0.9657127668999734,
|
19081 |
+
"grad_norm": 9.510401725769043,
|
19082 |
+
"learning_rate": 3.0681774370759365e-07,
|
19083 |
+
"loss": 8.0888,
|
19084 |
+
"step": 2725
|
19085 |
+
},
|
19086 |
+
{
|
19087 |
+
"epoch": 0.9660671569061753,
|
19088 |
+
"grad_norm": 10.736952781677246,
|
19089 |
+
"learning_rate": 3.0046537559325626e-07,
|
19090 |
+
"loss": 8.2639,
|
19091 |
+
"step": 2726
|
19092 |
+
},
|
19093 |
+
{
|
19094 |
+
"epoch": 0.966421546912377,
|
19095 |
+
"grad_norm": 9.19753646850586,
|
19096 |
+
"learning_rate": 2.9417925885994455e-07,
|
19097 |
+
"loss": 8.6266,
|
19098 |
+
"step": 2727
|
19099 |
+
},
|
19100 |
+
{
|
19101 |
+
"epoch": 0.9667759369185789,
|
19102 |
+
"grad_norm": 10.589143753051758,
|
19103 |
+
"learning_rate": 2.879594018873e-07,
|
19104 |
+
"loss": 7.7698,
|
19105 |
+
"step": 2728
|
19106 |
+
},
|
19107 |
+
{
|
19108 |
+
"epoch": 0.9671303269247807,
|
19109 |
+
"grad_norm": 10.227514266967773,
|
19110 |
+
"learning_rate": 2.81805812966629e-07,
|
19111 |
+
"loss": 7.5404,
|
19112 |
+
"step": 2729
|
19113 |
+
},
|
19114 |
+
{
|
19115 |
+
"epoch": 0.9674847169309826,
|
19116 |
+
"grad_norm": 9.569182395935059,
|
19117 |
+
"learning_rate": 2.757185003008922e-07,
|
19118 |
+
"loss": 6.8167,
|
19119 |
+
"step": 2730
|
19120 |
+
},
|
19121 |
+
{
|
19122 |
+
"epoch": 0.9678391069371843,
|
19123 |
+
"grad_norm": 10.21536922454834,
|
19124 |
+
"learning_rate": 2.6969747200472075e-07,
|
19125 |
+
"loss": 7.0856,
|
19126 |
+
"step": 2731
|
19127 |
+
},
|
19128 |
+
{
|
19129 |
+
"epoch": 0.9681934969433862,
|
19130 |
+
"grad_norm": 10.365554809570312,
|
19131 |
+
"learning_rate": 2.637427361043665e-07,
|
19132 |
+
"loss": 7.2888,
|
19133 |
+
"step": 2732
|
19134 |
+
},
|
19135 |
+
{
|
19136 |
+
"epoch": 0.968547886949588,
|
19137 |
+
"grad_norm": 9.141191482543945,
|
19138 |
+
"learning_rate": 2.5785430053772984e-07,
|
19139 |
+
"loss": 6.754,
|
19140 |
+
"step": 2733
|
19141 |
+
},
|
19142 |
+
{
|
19143 |
+
"epoch": 0.9689022769557899,
|
19144 |
+
"grad_norm": 9.845773696899414,
|
19145 |
+
"learning_rate": 2.5203217315431517e-07,
|
19146 |
+
"loss": 7.6066,
|
19147 |
+
"step": 2734
|
19148 |
+
},
|
19149 |
+
{
|
19150 |
+
"epoch": 0.9692566669619916,
|
19151 |
+
"grad_norm": 10.204642295837402,
|
19152 |
+
"learning_rate": 2.4627636171523635e-07,
|
19153 |
+
"loss": 6.6599,
|
19154 |
+
"step": 2735
|
19155 |
+
},
|
19156 |
+
{
|
19157 |
+
"epoch": 0.9696110569681935,
|
19158 |
+
"grad_norm": 9.55280876159668,
|
19159 |
+
"learning_rate": 2.4058687389322266e-07,
|
19160 |
+
"loss": 7.4796,
|
19161 |
+
"step": 2736
|
19162 |
+
},
|
19163 |
+
{
|
19164 |
+
"epoch": 0.9699654469743954,
|
19165 |
+
"grad_norm": 10.470826148986816,
|
19166 |
+
"learning_rate": 2.3496371727256829e-07,
|
19167 |
+
"loss": 8.3157,
|
19168 |
+
"step": 2737
|
19169 |
+
},
|
19170 |
+
{
|
19171 |
+
"epoch": 0.9703198369805971,
|
19172 |
+
"grad_norm": 9.35001277923584,
|
19173 |
+
"learning_rate": 2.294068993491605e-07,
|
19174 |
+
"loss": 6.3077,
|
19175 |
+
"step": 2738
|
19176 |
+
},
|
19177 |
+
{
|
19178 |
+
"epoch": 0.970674226986799,
|
19179 |
+
"grad_norm": 10.80824089050293,
|
19180 |
+
"learning_rate": 2.2391642753044617e-07,
|
19181 |
+
"loss": 8.9389,
|
19182 |
+
"step": 2739
|
19183 |
+
},
|
19184 |
+
{
|
19185 |
+
"epoch": 0.9710286169930008,
|
19186 |
+
"grad_norm": 10.638192176818848,
|
19187 |
+
"learning_rate": 2.184923091354374e-07,
|
19188 |
+
"loss": 7.5248,
|
19189 |
+
"step": 2740
|
19190 |
+
},
|
19191 |
+
{
|
19192 |
+
"epoch": 0.9713830069992027,
|
19193 |
+
"grad_norm": 10.541278839111328,
|
19194 |
+
"learning_rate": 2.1313455139469474e-07,
|
19195 |
+
"loss": 7.765,
|
19196 |
+
"step": 2741
|
19197 |
+
},
|
19198 |
+
{
|
19199 |
+
"epoch": 0.9717373970054044,
|
19200 |
+
"grad_norm": 10.201549530029297,
|
19201 |
+
"learning_rate": 2.0784316145031624e-07,
|
19202 |
+
"loss": 7.2592,
|
19203 |
+
"step": 2742
|
19204 |
+
},
|
19205 |
+
{
|
19206 |
+
"epoch": 0.9720917870116063,
|
19207 |
+
"grad_norm": 9.781977653503418,
|
19208 |
+
"learning_rate": 2.0261814635591514e-07,
|
19209 |
+
"loss": 7.7688,
|
19210 |
+
"step": 2743
|
19211 |
+
},
|
19212 |
+
{
|
19213 |
+
"epoch": 0.9724461770178081,
|
19214 |
+
"grad_norm": 10.238073348999023,
|
19215 |
+
"learning_rate": 1.974595130766421e-07,
|
19216 |
+
"loss": 6.4656,
|
19217 |
+
"step": 2744
|
19218 |
+
},
|
19219 |
+
{
|
19220 |
+
"epoch": 0.97280056702401,
|
19221 |
+
"grad_norm": 10.194886207580566,
|
19222 |
+
"learning_rate": 1.9236726848915754e-07,
|
19223 |
+
"loss": 7.2333,
|
19224 |
+
"step": 2745
|
19225 |
+
},
|
19226 |
+
{
|
19227 |
+
"epoch": 0.9731549570302117,
|
19228 |
+
"grad_norm": 10.862103462219238,
|
19229 |
+
"learning_rate": 1.873414193816092e-07,
|
19230 |
+
"loss": 6.7863,
|
19231 |
+
"step": 2746
|
19232 |
+
},
|
19233 |
+
{
|
19234 |
+
"epoch": 0.9735093470364136,
|
19235 |
+
"grad_norm": 10.344438552856445,
|
19236 |
+
"learning_rate": 1.8238197245366018e-07,
|
19237 |
+
"loss": 7.2087,
|
19238 |
+
"step": 2747
|
19239 |
+
},
|
19240 |
+
{
|
19241 |
+
"epoch": 0.9738637370426154,
|
19242 |
+
"grad_norm": 10.415502548217773,
|
19243 |
+
"learning_rate": 1.7748893431642767e-07,
|
19244 |
+
"loss": 6.8113,
|
19245 |
+
"step": 2748
|
19246 |
+
},
|
19247 |
+
{
|
19248 |
+
"epoch": 0.9742181270488173,
|
19249 |
+
"grad_norm": 11.136659622192383,
|
19250 |
+
"learning_rate": 1.72662311492533e-07,
|
19251 |
+
"loss": 6.8009,
|
19252 |
+
"step": 2749
|
19253 |
+
},
|
19254 |
+
{
|
19255 |
+
"epoch": 0.974572517055019,
|
19256 |
+
"grad_norm": 11.401845932006836,
|
19257 |
+
"learning_rate": 1.67902110416035e-07,
|
19258 |
+
"loss": 5.7257,
|
19259 |
+
"step": 2750
|
19260 |
+
},
|
19261 |
+
{
|
19262 |
+
"epoch": 0.9749269070612209,
|
19263 |
+
"grad_norm": 6.9431891441345215,
|
19264 |
+
"learning_rate": 1.632083374324689e-07,
|
19265 |
+
"loss": 9.8697,
|
19266 |
+
"step": 2751
|
19267 |
+
},
|
19268 |
+
{
|
19269 |
+
"epoch": 0.9752812970674227,
|
19270 |
+
"grad_norm": 7.812088966369629,
|
19271 |
+
"learning_rate": 1.5858099879881848e-07,
|
19272 |
+
"loss": 9.035,
|
19273 |
+
"step": 2752
|
19274 |
+
},
|
19275 |
+
{
|
19276 |
+
"epoch": 0.9756356870736245,
|
19277 |
+
"grad_norm": 8.463058471679688,
|
19278 |
+
"learning_rate": 1.540201006834996e-07,
|
19279 |
+
"loss": 9.1594,
|
19280 |
+
"step": 2753
|
19281 |
+
},
|
19282 |
+
{
|
19283 |
+
"epoch": 0.9759900770798263,
|
19284 |
+
"grad_norm": 9.573930740356445,
|
19285 |
+
"learning_rate": 1.4952564916636546e-07,
|
19286 |
+
"loss": 9.2905,
|
19287 |
+
"step": 2754
|
19288 |
+
},
|
19289 |
+
{
|
19290 |
+
"epoch": 0.9763444670860282,
|
19291 |
+
"grad_norm": 9.488774299621582,
|
19292 |
+
"learning_rate": 1.4509765023868472e-07,
|
19293 |
+
"loss": 8.1606,
|
19294 |
+
"step": 2755
|
19295 |
+
},
|
19296 |
+
{
|
19297 |
+
"epoch": 0.97669885709223,
|
19298 |
+
"grad_norm": 11.166428565979004,
|
19299 |
+
"learning_rate": 1.4073610980316344e-07,
|
19300 |
+
"loss": 8.8016,
|
19301 |
+
"step": 2756
|
19302 |
+
},
|
19303 |
+
{
|
19304 |
+
"epoch": 0.9770532470984318,
|
19305 |
+
"grad_norm": 11.221932411193848,
|
19306 |
+
"learning_rate": 1.364410336738897e-07,
|
19307 |
+
"loss": 8.18,
|
19308 |
+
"step": 2757
|
19309 |
+
},
|
19310 |
+
{
|
19311 |
+
"epoch": 0.9774076371046336,
|
19312 |
+
"grad_norm": 11.63525676727295,
|
19313 |
+
"learning_rate": 1.32212427576367e-07,
|
19314 |
+
"loss": 7.9339,
|
19315 |
+
"step": 2758
|
19316 |
+
},
|
19317 |
+
{
|
19318 |
+
"epoch": 0.9777620271108355,
|
19319 |
+
"grad_norm": 9.624247550964355,
|
19320 |
+
"learning_rate": 1.2805029714749173e-07,
|
19321 |
+
"loss": 6.4656,
|
19322 |
+
"step": 2759
|
19323 |
+
},
|
19324 |
+
{
|
19325 |
+
"epoch": 0.9781164171170373,
|
19326 |
+
"grad_norm": 12.943864822387695,
|
19327 |
+
"learning_rate": 1.239546479355369e-07,
|
19328 |
+
"loss": 7.5002,
|
19329 |
+
"step": 2760
|
19330 |
+
},
|
19331 |
+
{
|
19332 |
+
"epoch": 0.9784708071232391,
|
19333 |
+
"grad_norm": 10.584692001342773,
|
19334 |
+
"learning_rate": 1.1992548540016856e-07,
|
19335 |
+
"loss": 8.1984,
|
19336 |
+
"step": 2761
|
19337 |
+
},
|
19338 |
+
{
|
19339 |
+
"epoch": 0.9788251971294409,
|
19340 |
+
"grad_norm": 10.64038372039795,
|
19341 |
+
"learning_rate": 1.1596281491241257e-07,
|
19342 |
+
"loss": 9.0096,
|
19343 |
+
"step": 2762
|
19344 |
+
},
|
19345 |
+
{
|
19346 |
+
"epoch": 0.9791795871356428,
|
19347 |
+
"grad_norm": 10.040376663208008,
|
19348 |
+
"learning_rate": 1.1206664175465453e-07,
|
19349 |
+
"loss": 7.4336,
|
19350 |
+
"step": 2763
|
19351 |
+
},
|
19352 |
+
{
|
19353 |
+
"epoch": 0.9795339771418446,
|
19354 |
+
"grad_norm": 9.4454345703125,
|
19355 |
+
"learning_rate": 1.0823697112064546e-07,
|
19356 |
+
"loss": 7.6408,
|
19357 |
+
"step": 2764
|
19358 |
+
},
|
19359 |
+
{
|
19360 |
+
"epoch": 0.9798883671480464,
|
19361 |
+
"grad_norm": 8.535350799560547,
|
19362 |
+
"learning_rate": 1.0447380811548502e-07,
|
19363 |
+
"loss": 6.9056,
|
19364 |
+
"step": 2765
|
19365 |
+
},
|
19366 |
+
{
|
19367 |
+
"epoch": 0.9802427571542482,
|
19368 |
+
"grad_norm": 9.074763298034668,
|
19369 |
+
"learning_rate": 1.0077715775561047e-07,
|
19370 |
+
"loss": 9.1287,
|
19371 |
+
"step": 2766
|
19372 |
+
},
|
19373 |
+
{
|
19374 |
+
"epoch": 0.9805971471604501,
|
19375 |
+
"grad_norm": 10.088266372680664,
|
19376 |
+
"learning_rate": 9.714702496880224e-08,
|
19377 |
+
"loss": 8.1048,
|
19378 |
+
"step": 2767
|
19379 |
+
},
|
19380 |
+
{
|
19381 |
+
"epoch": 0.980951537166652,
|
19382 |
+
"grad_norm": 9.633566856384277,
|
19383 |
+
"learning_rate": 9.35834145941561e-08,
|
19384 |
+
"loss": 7.726,
|
19385 |
+
"step": 2768
|
19386 |
+
},
|
19387 |
+
{
|
19388 |
+
"epoch": 0.9813059271728537,
|
19389 |
+
"grad_norm": 9.597146034240723,
|
19390 |
+
"learning_rate": 9.008633138211098e-08,
|
19391 |
+
"loss": 7.7658,
|
19392 |
+
"step": 2769
|
19393 |
+
},
|
19394 |
+
{
|
19395 |
+
"epoch": 0.9816603171790556,
|
19396 |
+
"grad_norm": 8.681363105773926,
|
19397 |
+
"learning_rate": 8.66557799944101e-08,
|
19398 |
+
"loss": 8.0223,
|
19399 |
+
"step": 2770
|
19400 |
+
},
|
19401 |
+
{
|
19402 |
+
"epoch": 0.9820147071852574,
|
19403 |
+
"grad_norm": 9.311779022216797,
|
19404 |
+
"learning_rate": 8.329176500411206e-08,
|
19405 |
+
"loss": 8.4576,
|
19406 |
+
"step": 2771
|
19407 |
+
},
|
19408 |
+
{
|
19409 |
+
"epoch": 0.9823690971914592,
|
19410 |
+
"grad_norm": 9.054991722106934,
|
19411 |
+
"learning_rate": 7.99942908955742e-08,
|
19412 |
+
"loss": 8.237,
|
19413 |
+
"step": 2772
|
19414 |
+
},
|
19415 |
+
{
|
19416 |
+
"epoch": 0.982723487197661,
|
19417 |
+
"grad_norm": 9.217580795288086,
|
19418 |
+
"learning_rate": 7.676336206445256e-08,
|
19419 |
+
"loss": 8.411,
|
19420 |
+
"step": 2773
|
19421 |
+
},
|
19422 |
+
{
|
19423 |
+
"epoch": 0.9830778772038629,
|
19424 |
+
"grad_norm": 8.914115905761719,
|
19425 |
+
"learning_rate": 7.35989828177075e-08,
|
19426 |
+
"loss": 8.1964,
|
19427 |
+
"step": 2774
|
19428 |
+
},
|
19429 |
+
{
|
19430 |
+
"epoch": 0.9834322672100647,
|
19431 |
+
"grad_norm": 10.451409339904785,
|
19432 |
+
"learning_rate": 7.050115737356477e-08,
|
19433 |
+
"loss": 7.1974,
|
19434 |
+
"step": 2775
|
19435 |
+
},
|
19436 |
+
{
|
19437 |
+
"epoch": 0.9837866572162665,
|
19438 |
+
"grad_norm": 8.894097328186035,
|
19439 |
+
"learning_rate": 6.746988986156e-08,
|
19440 |
+
"loss": 7.4948,
|
19441 |
+
"step": 2776
|
19442 |
+
},
|
19443 |
+
{
|
19444 |
+
"epoch": 0.9841410472224683,
|
19445 |
+
"grad_norm": 8.992011070251465,
|
19446 |
+
"learning_rate": 6.450518432247754e-08,
|
19447 |
+
"loss": 7.7269,
|
19448 |
+
"step": 2777
|
19449 |
+
},
|
19450 |
+
{
|
19451 |
+
"epoch": 0.9844954372286702,
|
19452 |
+
"grad_norm": 9.361895561218262,
|
19453 |
+
"learning_rate": 6.160704470838385e-08,
|
19454 |
+
"loss": 8.4291,
|
19455 |
+
"step": 2778
|
19456 |
+
},
|
19457 |
+
{
|
19458 |
+
"epoch": 0.984849827234872,
|
19459 |
+
"grad_norm": 10.004155158996582,
|
19460 |
+
"learning_rate": 5.8775474882616365e-08,
|
19461 |
+
"loss": 7.9027,
|
19462 |
+
"step": 2779
|
19463 |
+
},
|
19464 |
+
{
|
19465 |
+
"epoch": 0.9852042172410738,
|
19466 |
+
"grad_norm": 9.721527099609375,
|
19467 |
+
"learning_rate": 5.601047861976127e-08,
|
19468 |
+
"loss": 8.0284,
|
19469 |
+
"step": 2780
|
19470 |
+
},
|
19471 |
+
{
|
19472 |
+
"epoch": 0.9855586072472756,
|
19473 |
+
"grad_norm": 11.492663383483887,
|
19474 |
+
"learning_rate": 5.3312059605670204e-08,
|
19475 |
+
"loss": 8.6191,
|
19476 |
+
"step": 2781
|
19477 |
+
},
|
19478 |
+
{
|
19479 |
+
"epoch": 0.9859129972534775,
|
19480 |
+
"grad_norm": 9.143500328063965,
|
19481 |
+
"learning_rate": 5.06802214374269e-08,
|
19482 |
+
"loss": 7.0557,
|
19483 |
+
"step": 2782
|
19484 |
+
},
|
19485 |
+
{
|
19486 |
+
"epoch": 0.9862673872596793,
|
19487 |
+
"grad_norm": 9.80386734008789,
|
19488 |
+
"learning_rate": 4.8114967623380525e-08,
|
19489 |
+
"loss": 7.0624,
|
19490 |
+
"step": 2783
|
19491 |
+
},
|
19492 |
+
{
|
19493 |
+
"epoch": 0.9866217772658811,
|
19494 |
+
"grad_norm": 9.714198112487793,
|
19495 |
+
"learning_rate": 4.561630158311792e-08,
|
19496 |
+
"loss": 8.0296,
|
19497 |
+
"step": 2784
|
19498 |
+
},
|
19499 |
+
{
|
19500 |
+
"epoch": 0.9869761672720829,
|
19501 |
+
"grad_norm": 9.890578269958496,
|
19502 |
+
"learning_rate": 4.318422664744137e-08,
|
19503 |
+
"loss": 7.2464,
|
19504 |
+
"step": 2785
|
19505 |
+
},
|
19506 |
+
{
|
19507 |
+
"epoch": 0.9873305572782848,
|
19508 |
+
"grad_norm": 10.45261001586914,
|
19509 |
+
"learning_rate": 4.081874605841307e-08,
|
19510 |
+
"loss": 7.5394,
|
19511 |
+
"step": 2786
|
19512 |
+
},
|
19513 |
+
{
|
19514 |
+
"epoch": 0.9876849472844865,
|
19515 |
+
"grad_norm": 10.546357154846191,
|
19516 |
+
"learning_rate": 3.85198629693051e-08,
|
19517 |
+
"loss": 8.9054,
|
19518 |
+
"step": 2787
|
19519 |
+
},
|
19520 |
+
{
|
19521 |
+
"epoch": 0.9880393372906884,
|
19522 |
+
"grad_norm": 10.089964866638184,
|
19523 |
+
"learning_rate": 3.628758044461611e-08,
|
19524 |
+
"loss": 7.3345,
|
19525 |
+
"step": 2788
|
19526 |
+
},
|
19527 |
+
{
|
19528 |
+
"epoch": 0.9883937272968902,
|
19529 |
+
"grad_norm": 10.31136417388916,
|
19530 |
+
"learning_rate": 3.412190146006578e-08,
|
19531 |
+
"loss": 8.3785,
|
19532 |
+
"step": 2789
|
19533 |
+
},
|
19534 |
+
{
|
19535 |
+
"epoch": 0.9887481173030921,
|
19536 |
+
"grad_norm": 10.49755859375,
|
19537 |
+
"learning_rate": 3.202282890258368e-08,
|
19538 |
+
"loss": 6.5573,
|
19539 |
+
"step": 2790
|
19540 |
+
},
|
19541 |
+
{
|
19542 |
+
"epoch": 0.9891025073092938,
|
19543 |
+
"grad_norm": 10.216747283935547,
|
19544 |
+
"learning_rate": 2.9990365570314873e-08,
|
19545 |
+
"loss": 7.7684,
|
19546 |
+
"step": 2791
|
19547 |
+
},
|
19548 |
+
{
|
19549 |
+
"epoch": 0.9894568973154957,
|
19550 |
+
"grad_norm": 10.758793830871582,
|
19551 |
+
"learning_rate": 2.8024514172608763e-08,
|
19552 |
+
"loss": 7.4823,
|
19553 |
+
"step": 2792
|
19554 |
+
},
|
19555 |
+
{
|
19556 |
+
"epoch": 0.9898112873216975,
|
19557 |
+
"grad_norm": 11.304996490478516,
|
19558 |
+
"learning_rate": 2.612527733002468e-08,
|
19559 |
+
"loss": 8.3669,
|
19560 |
+
"step": 2793
|
19561 |
+
},
|
19562 |
+
{
|
19563 |
+
"epoch": 0.9901656773278994,
|
19564 |
+
"grad_norm": 9.883719444274902,
|
19565 |
+
"learning_rate": 2.4292657574320755e-08,
|
19566 |
+
"loss": 6.5523,
|
19567 |
+
"step": 2794
|
19568 |
+
},
|
19569 |
+
{
|
19570 |
+
"epoch": 0.9905200673341011,
|
19571 |
+
"grad_norm": 11.035935401916504,
|
19572 |
+
"learning_rate": 2.2526657348442835e-08,
|
19573 |
+
"loss": 7.3817,
|
19574 |
+
"step": 2795
|
19575 |
+
},
|
19576 |
+
{
|
19577 |
+
"epoch": 0.990874457340303,
|
19578 |
+
"grad_norm": 10.151780128479004,
|
19579 |
+
"learning_rate": 2.0827279006535582e-08,
|
19580 |
+
"loss": 6.5106,
|
19581 |
+
"step": 2796
|
19582 |
+
},
|
19583 |
+
{
|
19584 |
+
"epoch": 0.9912288473465048,
|
19585 |
+
"grad_norm": 10.024761199951172,
|
19586 |
+
"learning_rate": 1.919452481394246e-08,
|
19587 |
+
"loss": 6.3903,
|
19588 |
+
"step": 2797
|
19589 |
+
},
|
19590 |
+
{
|
19591 |
+
"epoch": 0.9915832373527067,
|
19592 |
+
"grad_norm": 11.098858833312988,
|
19593 |
+
"learning_rate": 1.7628396947183547e-08,
|
19594 |
+
"loss": 7.1063,
|
19595 |
+
"step": 2798
|
19596 |
+
},
|
19597 |
+
{
|
19598 |
+
"epoch": 0.9919376273589084,
|
19599 |
+
"grad_norm": 11.342761039733887,
|
19600 |
+
"learning_rate": 1.612889749396107e-08,
|
19601 |
+
"loss": 7.025,
|
19602 |
+
"step": 2799
|
19603 |
+
},
|
19604 |
+
{
|
19605 |
+
"epoch": 0.9922920173651103,
|
19606 |
+
"grad_norm": 12.230018615722656,
|
19607 |
+
"learning_rate": 1.469602845317608e-08,
|
19608 |
+
"loss": 7.5984,
|
19609 |
+
"step": 2800
|
19610 |
+
},
|
19611 |
+
{
|
19612 |
+
"epoch": 0.9926464073713122,
|
19613 |
+
"grad_norm": 7.639358043670654,
|
19614 |
+
"learning_rate": 1.3329791734895124e-08,
|
19615 |
+
"loss": 8.759,
|
19616 |
+
"step": 2801
|
19617 |
+
},
|
19618 |
+
{
|
19619 |
+
"epoch": 0.9930007973775139,
|
19620 |
+
"grad_norm": 8.9176607131958,
|
19621 |
+
"learning_rate": 1.2030189160355809e-08,
|
19622 |
+
"loss": 8.2035,
|
19623 |
+
"step": 2802
|
19624 |
+
},
|
19625 |
+
{
|
19626 |
+
"epoch": 0.9933551873837158,
|
19627 |
+
"grad_norm": 10.628494262695312,
|
19628 |
+
"learning_rate": 1.0797222461988998e-08,
|
19629 |
+
"loss": 8.0889,
|
19630 |
+
"step": 2803
|
19631 |
+
},
|
19632 |
+
{
|
19633 |
+
"epoch": 0.9937095773899176,
|
19634 |
+
"grad_norm": 12.253202438354492,
|
19635 |
+
"learning_rate": 9.63089328337996e-09,
|
19636 |
+
"loss": 7.7258,
|
19637 |
+
"step": 2804
|
19638 |
+
},
|
19639 |
+
{
|
19640 |
+
"epoch": 0.9940639673961195,
|
19641 |
+
"grad_norm": 12.697697639465332,
|
19642 |
+
"learning_rate": 8.53120317929057e-09,
|
19643 |
+
"loss": 8.0472,
|
19644 |
+
"step": 2805
|
19645 |
+
},
|
19646 |
+
{
|
19647 |
+
"epoch": 0.9944183574023212,
|
19648 |
+
"grad_norm": 9.967103958129883,
|
19649 |
+
"learning_rate": 7.498153615653758e-09,
|
19650 |
+
"loss": 8.0163,
|
19651 |
+
"step": 2806
|
19652 |
+
},
|
19653 |
+
{
|
19654 |
+
"epoch": 0.9947727474085231,
|
19655 |
+
"grad_norm": 10.074397087097168,
|
19656 |
+
"learning_rate": 6.531745969562408e-09,
|
19657 |
+
"loss": 7.7091,
|
19658 |
+
"step": 2807
|
19659 |
+
},
|
19660 |
+
{
|
19661 |
+
"epoch": 0.9951271374147249,
|
19662 |
+
"grad_norm": 10.248494148254395,
|
19663 |
+
"learning_rate": 5.631981529269359e-09,
|
19664 |
+
"loss": 7.9076,
|
19665 |
+
"step": 2808
|
19666 |
+
},
|
19667 |
+
{
|
19668 |
+
"epoch": 0.9954815274209268,
|
19669 |
+
"grad_norm": 9.425408363342285,
|
19670 |
+
"learning_rate": 4.798861494204054e-09,
|
19671 |
+
"loss": 8.3727,
|
19672 |
+
"step": 2809
|
19673 |
+
},
|
19674 |
+
{
|
19675 |
+
"epoch": 0.9958359174271285,
|
19676 |
+
"grad_norm": 9.309487342834473,
|
19677 |
+
"learning_rate": 4.032386974939239e-09,
|
19678 |
+
"loss": 7.7269,
|
19679 |
+
"step": 2810
|
19680 |
+
},
|
19681 |
+
{
|
19682 |
+
"epoch": 0.9961903074333304,
|
19683 |
+
"grad_norm": 10.345414161682129,
|
19684 |
+
"learning_rate": 3.332558993218715e-09,
|
19685 |
+
"loss": 7.8104,
|
19686 |
+
"step": 2811
|
19687 |
+
},
|
19688 |
+
{
|
19689 |
+
"epoch": 0.9965446974395322,
|
19690 |
+
"grad_norm": 9.41676139831543,
|
19691 |
+
"learning_rate": 2.699378481940684e-09,
|
19692 |
+
"loss": 7.3879,
|
19693 |
+
"step": 2812
|
19694 |
+
},
|
19695 |
+
{
|
19696 |
+
"epoch": 0.9968990874457341,
|
19697 |
+
"grad_norm": 9.750468254089355,
|
19698 |
+
"learning_rate": 2.1328462851577525e-09,
|
19699 |
+
"loss": 7.4963,
|
19700 |
+
"step": 2813
|
19701 |
+
},
|
19702 |
+
{
|
19703 |
+
"epoch": 0.9972534774519358,
|
19704 |
+
"grad_norm": 9.87458324432373,
|
19705 |
+
"learning_rate": 1.6329631580769278e-09,
|
19706 |
+
"loss": 7.548,
|
19707 |
+
"step": 2814
|
19708 |
+
},
|
19709 |
+
{
|
19710 |
+
"epoch": 0.9976078674581377,
|
19711 |
+
"grad_norm": 10.732804298400879,
|
19712 |
+
"learning_rate": 1.1997297670651718e-09,
|
19713 |
+
"loss": 7.1676,
|
19714 |
+
"step": 2815
|
19715 |
+
},
|
19716 |
+
{
|
19717 |
+
"epoch": 0.9979622574643395,
|
19718 |
+
"grad_norm": 9.683318138122559,
|
19719 |
+
"learning_rate": 8.331466896382978e-10,
|
19720 |
+
"loss": 7.6941,
|
19721 |
+
"step": 2816
|
19722 |
+
},
|
19723 |
+
{
|
19724 |
+
"epoch": 0.9983166474705414,
|
19725 |
+
"grad_norm": 9.885022163391113,
|
19726 |
+
"learning_rate": 5.332144144665208e-10,
|
19727 |
+
"loss": 8.3338,
|
19728 |
+
"step": 2817
|
19729 |
+
},
|
19730 |
+
{
|
19731 |
+
"epoch": 0.9986710374767431,
|
19732 |
+
"grad_norm": 11.414604187011719,
|
19733 |
+
"learning_rate": 2.999333413689076e-10,
|
19734 |
+
"loss": 8.1915,
|
19735 |
+
"step": 2818
|
19736 |
+
},
|
19737 |
+
{
|
19738 |
+
"epoch": 0.999025427482945,
|
19739 |
+
"grad_norm": 10.389874458312988,
|
19740 |
+
"learning_rate": 1.333037813133764e-10,
|
19741 |
+
"loss": 6.8466,
|
19742 |
+
"step": 2819
|
19743 |
+
},
|
19744 |
+
{
|
19745 |
+
"epoch": 0.9993798174891468,
|
19746 |
+
"grad_norm": 9.702605247497559,
|
19747 |
+
"learning_rate": 3.33259564333499e-11,
|
19748 |
+
"loss": 6.4676,
|
19749 |
+
"step": 2820
|
19750 |
+
},
|
19751 |
+
{
|
19752 |
+
"epoch": 0.9997342074953486,
|
19753 |
+
"grad_norm": 10.419767379760742,
|
19754 |
+
"learning_rate": 0.0,
|
19755 |
+
"loss": 7.04,
|
19756 |
+
"step": 2821
|
19757 |
}
|
19758 |
],
|
19759 |
"logging_steps": 1,
|
|
|
19768 |
"should_evaluate": false,
|
19769 |
"should_log": false,
|
19770 |
"should_save": true,
|
19771 |
+
"should_training_stop": true
|
19772 |
},
|
19773 |
"attributes": {}
|
19774 |
}
|
19775 |
},
|
19776 |
+
"total_flos": 1.0293193213649388e+18,
|
19777 |
"train_batch_size": 4,
|
19778 |
"trial_name": null,
|
19779 |
"trial_params": null
|