Training in progress, step 3700, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 35237104
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ee66a361296b6ddade4373f90ebf70dfec210f4c2cc8a673761b669a0a19a29
|
3 |
size 35237104
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 18810356
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b69ade3d8143fa53df3c44e13e0e86f751c9d91f48e8ba8422b235fd7a4ef95c
|
3 |
size 18810356
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:525e871b8bfa9d0c55029d3a5724dab788324d84396021519791e25b39fc6797
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:110477147f82823f2afe8f6b04f642e31b0df79e35f16b64a881cf01711c33d2
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 2.8474695682525635,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-3600",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -25503,6 +25503,714 @@
|
|
25503 |
"eval_samples_per_second": 59.111,
|
25504 |
"eval_steps_per_second": 14.778,
|
25505 |
"step": 3600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25506 |
}
|
25507 |
],
|
25508 |
"logging_steps": 1,
|
@@ -25517,7 +26225,7 @@
|
|
25517 |
"early_stopping_threshold": 0.0
|
25518 |
},
|
25519 |
"attributes": {
|
25520 |
-
"early_stopping_patience_counter":
|
25521 |
}
|
25522 |
},
|
25523 |
"TrainerControl": {
|
@@ -25531,7 +26239,7 @@
|
|
25531 |
"attributes": {}
|
25532 |
}
|
25533 |
},
|
25534 |
-
"total_flos": 2.
|
25535 |
"train_batch_size": 4,
|
25536 |
"trial_name": null,
|
25537 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 2.8474695682525635,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-3600",
|
4 |
+
"epoch": 1.0297422339722406,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 3700,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
25503 |
"eval_samples_per_second": 59.111,
|
25504 |
"eval_steps_per_second": 14.778,
|
25505 |
"step": 3600
|
25506 |
+
},
|
25507 |
+
{
|
25508 |
+
"epoch": 1.0021915330295335,
|
25509 |
+
"grad_norm": 2.4727015495300293,
|
25510 |
+
"learning_rate": 9.989056736705608e-05,
|
25511 |
+
"loss": 2.7356,
|
25512 |
+
"step": 3601
|
25513 |
+
},
|
25514 |
+
{
|
25515 |
+
"epoch": 1.0024698229380458,
|
25516 |
+
"grad_norm": 2.1826629638671875,
|
25517 |
+
"learning_rate": 9.984679434323399e-05,
|
25518 |
+
"loss": 2.4974,
|
25519 |
+
"step": 3602
|
25520 |
+
},
|
25521 |
+
{
|
25522 |
+
"epoch": 1.0027481128465578,
|
25523 |
+
"grad_norm": 2.0469789505004883,
|
25524 |
+
"learning_rate": 9.98030213487673e-05,
|
25525 |
+
"loss": 2.3743,
|
25526 |
+
"step": 3603
|
25527 |
+
},
|
25528 |
+
{
|
25529 |
+
"epoch": 1.0030264027550702,
|
25530 |
+
"grad_norm": 2.1204702854156494,
|
25531 |
+
"learning_rate": 9.975924839204333e-05,
|
25532 |
+
"loss": 2.3914,
|
25533 |
+
"step": 3604
|
25534 |
+
},
|
25535 |
+
{
|
25536 |
+
"epoch": 1.0033046926635822,
|
25537 |
+
"grad_norm": 2.30515456199646,
|
25538 |
+
"learning_rate": 9.971547548144934e-05,
|
25539 |
+
"loss": 2.7669,
|
25540 |
+
"step": 3605
|
25541 |
+
},
|
25542 |
+
{
|
25543 |
+
"epoch": 1.0035829825720946,
|
25544 |
+
"grad_norm": 2.2537689208984375,
|
25545 |
+
"learning_rate": 9.96717026253725e-05,
|
25546 |
+
"loss": 2.2713,
|
25547 |
+
"step": 3606
|
25548 |
+
},
|
25549 |
+
{
|
25550 |
+
"epoch": 1.0038612724806066,
|
25551 |
+
"grad_norm": 2.1024792194366455,
|
25552 |
+
"learning_rate": 9.962792983220014e-05,
|
25553 |
+
"loss": 2.6992,
|
25554 |
+
"step": 3607
|
25555 |
+
},
|
25556 |
+
{
|
25557 |
+
"epoch": 1.004139562389119,
|
25558 |
+
"grad_norm": 2.6211812496185303,
|
25559 |
+
"learning_rate": 9.958415711031944e-05,
|
25560 |
+
"loss": 2.553,
|
25561 |
+
"step": 3608
|
25562 |
+
},
|
25563 |
+
{
|
25564 |
+
"epoch": 1.004417852297631,
|
25565 |
+
"grad_norm": 2.4872612953186035,
|
25566 |
+
"learning_rate": 9.954038446811755e-05,
|
25567 |
+
"loss": 2.6366,
|
25568 |
+
"step": 3609
|
25569 |
+
},
|
25570 |
+
{
|
25571 |
+
"epoch": 1.0046961422061433,
|
25572 |
+
"grad_norm": 2.327535629272461,
|
25573 |
+
"learning_rate": 9.949661191398175e-05,
|
25574 |
+
"loss": 2.7231,
|
25575 |
+
"step": 3610
|
25576 |
+
},
|
25577 |
+
{
|
25578 |
+
"epoch": 1.0049744321146554,
|
25579 |
+
"grad_norm": 2.1828579902648926,
|
25580 |
+
"learning_rate": 9.94528394562992e-05,
|
25581 |
+
"loss": 2.4099,
|
25582 |
+
"step": 3611
|
25583 |
+
},
|
25584 |
+
{
|
25585 |
+
"epoch": 1.0052527220231677,
|
25586 |
+
"grad_norm": 2.1962826251983643,
|
25587 |
+
"learning_rate": 9.940906710345698e-05,
|
25588 |
+
"loss": 2.4376,
|
25589 |
+
"step": 3612
|
25590 |
+
},
|
25591 |
+
{
|
25592 |
+
"epoch": 1.0055310119316798,
|
25593 |
+
"grad_norm": 2.220290422439575,
|
25594 |
+
"learning_rate": 9.936529486384234e-05,
|
25595 |
+
"loss": 2.3303,
|
25596 |
+
"step": 3613
|
25597 |
+
},
|
25598 |
+
{
|
25599 |
+
"epoch": 1.0058093018401921,
|
25600 |
+
"grad_norm": 2.429093599319458,
|
25601 |
+
"learning_rate": 9.932152274584232e-05,
|
25602 |
+
"loss": 2.5576,
|
25603 |
+
"step": 3614
|
25604 |
+
},
|
25605 |
+
{
|
25606 |
+
"epoch": 1.0060875917487042,
|
25607 |
+
"grad_norm": 2.545867681503296,
|
25608 |
+
"learning_rate": 9.927775075784403e-05,
|
25609 |
+
"loss": 2.5247,
|
25610 |
+
"step": 3615
|
25611 |
+
},
|
25612 |
+
{
|
25613 |
+
"epoch": 1.0063658816572163,
|
25614 |
+
"grad_norm": 2.177238702774048,
|
25615 |
+
"learning_rate": 9.923397890823453e-05,
|
25616 |
+
"loss": 2.2422,
|
25617 |
+
"step": 3616
|
25618 |
+
},
|
25619 |
+
{
|
25620 |
+
"epoch": 1.0066441715657286,
|
25621 |
+
"grad_norm": 2.516214370727539,
|
25622 |
+
"learning_rate": 9.91902072054009e-05,
|
25623 |
+
"loss": 2.6979,
|
25624 |
+
"step": 3617
|
25625 |
+
},
|
25626 |
+
{
|
25627 |
+
"epoch": 1.0069224614742407,
|
25628 |
+
"grad_norm": 2.583171844482422,
|
25629 |
+
"learning_rate": 9.914643565773008e-05,
|
25630 |
+
"loss": 2.4948,
|
25631 |
+
"step": 3618
|
25632 |
+
},
|
25633 |
+
{
|
25634 |
+
"epoch": 1.007200751382753,
|
25635 |
+
"grad_norm": 2.1928822994232178,
|
25636 |
+
"learning_rate": 9.910266427360913e-05,
|
25637 |
+
"loss": 2.4619,
|
25638 |
+
"step": 3619
|
25639 |
+
},
|
25640 |
+
{
|
25641 |
+
"epoch": 1.007479041291265,
|
25642 |
+
"grad_norm": 2.3109724521636963,
|
25643 |
+
"learning_rate": 9.905889306142497e-05,
|
25644 |
+
"loss": 2.4164,
|
25645 |
+
"step": 3620
|
25646 |
+
},
|
25647 |
+
{
|
25648 |
+
"epoch": 1.0077573311997774,
|
25649 |
+
"grad_norm": 2.3388402462005615,
|
25650 |
+
"learning_rate": 9.901512202956447e-05,
|
25651 |
+
"loss": 2.5941,
|
25652 |
+
"step": 3621
|
25653 |
+
},
|
25654 |
+
{
|
25655 |
+
"epoch": 1.0080356211082895,
|
25656 |
+
"grad_norm": 2.36480450630188,
|
25657 |
+
"learning_rate": 9.89713511864146e-05,
|
25658 |
+
"loss": 2.5486,
|
25659 |
+
"step": 3622
|
25660 |
+
},
|
25661 |
+
{
|
25662 |
+
"epoch": 1.0083139110168018,
|
25663 |
+
"grad_norm": 2.3684072494506836,
|
25664 |
+
"learning_rate": 9.892758054036216e-05,
|
25665 |
+
"loss": 2.6478,
|
25666 |
+
"step": 3623
|
25667 |
+
},
|
25668 |
+
{
|
25669 |
+
"epoch": 1.0085922009253139,
|
25670 |
+
"grad_norm": 2.2883782386779785,
|
25671 |
+
"learning_rate": 9.888381009979394e-05,
|
25672 |
+
"loss": 2.3874,
|
25673 |
+
"step": 3624
|
25674 |
+
},
|
25675 |
+
{
|
25676 |
+
"epoch": 1.0088704908338262,
|
25677 |
+
"grad_norm": 2.400454521179199,
|
25678 |
+
"learning_rate": 9.884003987309676e-05,
|
25679 |
+
"loss": 2.6688,
|
25680 |
+
"step": 3625
|
25681 |
+
},
|
25682 |
+
{
|
25683 |
+
"epoch": 1.0091487807423383,
|
25684 |
+
"grad_norm": 2.411891222000122,
|
25685 |
+
"learning_rate": 9.879626986865735e-05,
|
25686 |
+
"loss": 2.8327,
|
25687 |
+
"step": 3626
|
25688 |
+
},
|
25689 |
+
{
|
25690 |
+
"epoch": 1.0094270706508506,
|
25691 |
+
"grad_norm": 2.746245861053467,
|
25692 |
+
"learning_rate": 9.875250009486232e-05,
|
25693 |
+
"loss": 2.7147,
|
25694 |
+
"step": 3627
|
25695 |
+
},
|
25696 |
+
{
|
25697 |
+
"epoch": 1.0097053605593627,
|
25698 |
+
"grad_norm": 2.195439577102661,
|
25699 |
+
"learning_rate": 9.870873056009841e-05,
|
25700 |
+
"loss": 2.2958,
|
25701 |
+
"step": 3628
|
25702 |
+
},
|
25703 |
+
{
|
25704 |
+
"epoch": 1.009983650467875,
|
25705 |
+
"grad_norm": 2.4186742305755615,
|
25706 |
+
"learning_rate": 9.866496127275216e-05,
|
25707 |
+
"loss": 2.3944,
|
25708 |
+
"step": 3629
|
25709 |
+
},
|
25710 |
+
{
|
25711 |
+
"epoch": 1.010261940376387,
|
25712 |
+
"grad_norm": 2.4659276008605957,
|
25713 |
+
"learning_rate": 9.862119224121011e-05,
|
25714 |
+
"loss": 2.8833,
|
25715 |
+
"step": 3630
|
25716 |
+
},
|
25717 |
+
{
|
25718 |
+
"epoch": 1.0105402302848994,
|
25719 |
+
"grad_norm": 3.256303071975708,
|
25720 |
+
"learning_rate": 9.85774234738588e-05,
|
25721 |
+
"loss": 2.5439,
|
25722 |
+
"step": 3631
|
25723 |
+
},
|
25724 |
+
{
|
25725 |
+
"epoch": 1.0108185201934115,
|
25726 |
+
"grad_norm": 2.459998369216919,
|
25727 |
+
"learning_rate": 9.85336549790847e-05,
|
25728 |
+
"loss": 2.4565,
|
25729 |
+
"step": 3632
|
25730 |
+
},
|
25731 |
+
{
|
25732 |
+
"epoch": 1.0110968101019238,
|
25733 |
+
"grad_norm": 2.466240167617798,
|
25734 |
+
"learning_rate": 9.848988676527411e-05,
|
25735 |
+
"loss": 2.4005,
|
25736 |
+
"step": 3633
|
25737 |
+
},
|
25738 |
+
{
|
25739 |
+
"epoch": 1.0113751000104358,
|
25740 |
+
"grad_norm": 2.684922218322754,
|
25741 |
+
"learning_rate": 9.844611884081348e-05,
|
25742 |
+
"loss": 2.4699,
|
25743 |
+
"step": 3634
|
25744 |
+
},
|
25745 |
+
{
|
25746 |
+
"epoch": 1.0116533899189482,
|
25747 |
+
"grad_norm": 2.9135866165161133,
|
25748 |
+
"learning_rate": 9.840235121408902e-05,
|
25749 |
+
"loss": 3.0818,
|
25750 |
+
"step": 3635
|
25751 |
+
},
|
25752 |
+
{
|
25753 |
+
"epoch": 1.0119316798274602,
|
25754 |
+
"grad_norm": 2.5571932792663574,
|
25755 |
+
"learning_rate": 9.835858389348701e-05,
|
25756 |
+
"loss": 2.6709,
|
25757 |
+
"step": 3636
|
25758 |
+
},
|
25759 |
+
{
|
25760 |
+
"epoch": 1.0122099697359725,
|
25761 |
+
"grad_norm": 2.159505605697632,
|
25762 |
+
"learning_rate": 9.831481688739362e-05,
|
25763 |
+
"loss": 2.3602,
|
25764 |
+
"step": 3637
|
25765 |
+
},
|
25766 |
+
{
|
25767 |
+
"epoch": 1.0124882596444846,
|
25768 |
+
"grad_norm": 2.178410291671753,
|
25769 |
+
"learning_rate": 9.827105020419494e-05,
|
25770 |
+
"loss": 2.3776,
|
25771 |
+
"step": 3638
|
25772 |
+
},
|
25773 |
+
{
|
25774 |
+
"epoch": 1.012766549552997,
|
25775 |
+
"grad_norm": 2.4218437671661377,
|
25776 |
+
"learning_rate": 9.8227283852277e-05,
|
25777 |
+
"loss": 2.1694,
|
25778 |
+
"step": 3639
|
25779 |
+
},
|
25780 |
+
{
|
25781 |
+
"epoch": 1.013044839461509,
|
25782 |
+
"grad_norm": 3.1513845920562744,
|
25783 |
+
"learning_rate": 9.818351784002586e-05,
|
25784 |
+
"loss": 2.9845,
|
25785 |
+
"step": 3640
|
25786 |
+
},
|
25787 |
+
{
|
25788 |
+
"epoch": 1.0133231293700211,
|
25789 |
+
"grad_norm": 2.8680331707000732,
|
25790 |
+
"learning_rate": 9.813975217582739e-05,
|
25791 |
+
"loss": 2.5105,
|
25792 |
+
"step": 3641
|
25793 |
+
},
|
25794 |
+
{
|
25795 |
+
"epoch": 1.0136014192785334,
|
25796 |
+
"grad_norm": 2.3797199726104736,
|
25797 |
+
"learning_rate": 9.809598686806746e-05,
|
25798 |
+
"loss": 2.4647,
|
25799 |
+
"step": 3642
|
25800 |
+
},
|
25801 |
+
{
|
25802 |
+
"epoch": 1.0138797091870455,
|
25803 |
+
"grad_norm": 2.403562068939209,
|
25804 |
+
"learning_rate": 9.805222192513184e-05,
|
25805 |
+
"loss": 2.6245,
|
25806 |
+
"step": 3643
|
25807 |
+
},
|
25808 |
+
{
|
25809 |
+
"epoch": 1.0141579990955578,
|
25810 |
+
"grad_norm": 2.2409002780914307,
|
25811 |
+
"learning_rate": 9.800845735540627e-05,
|
25812 |
+
"loss": 2.393,
|
25813 |
+
"step": 3644
|
25814 |
+
},
|
25815 |
+
{
|
25816 |
+
"epoch": 1.01443628900407,
|
25817 |
+
"grad_norm": 2.2305660247802734,
|
25818 |
+
"learning_rate": 9.796469316727641e-05,
|
25819 |
+
"loss": 2.5389,
|
25820 |
+
"step": 3645
|
25821 |
+
},
|
25822 |
+
{
|
25823 |
+
"epoch": 1.0147145789125822,
|
25824 |
+
"grad_norm": 2.433889389038086,
|
25825 |
+
"learning_rate": 9.792092936912777e-05,
|
25826 |
+
"loss": 2.7513,
|
25827 |
+
"step": 3646
|
25828 |
+
},
|
25829 |
+
{
|
25830 |
+
"epoch": 1.0149928688210943,
|
25831 |
+
"grad_norm": 2.397838830947876,
|
25832 |
+
"learning_rate": 9.78771659693459e-05,
|
25833 |
+
"loss": 2.7513,
|
25834 |
+
"step": 3647
|
25835 |
+
},
|
25836 |
+
{
|
25837 |
+
"epoch": 1.0152711587296066,
|
25838 |
+
"grad_norm": 2.5803353786468506,
|
25839 |
+
"learning_rate": 9.783340297631623e-05,
|
25840 |
+
"loss": 2.5487,
|
25841 |
+
"step": 3648
|
25842 |
+
},
|
25843 |
+
{
|
25844 |
+
"epoch": 1.0155494486381187,
|
25845 |
+
"grad_norm": 2.5709424018859863,
|
25846 |
+
"learning_rate": 9.778964039842404e-05,
|
25847 |
+
"loss": 2.4367,
|
25848 |
+
"step": 3649
|
25849 |
+
},
|
25850 |
+
{
|
25851 |
+
"epoch": 1.015827738546631,
|
25852 |
+
"grad_norm": 2.5421626567840576,
|
25853 |
+
"learning_rate": 9.774587824405466e-05,
|
25854 |
+
"loss": 3.0246,
|
25855 |
+
"step": 3650
|
25856 |
+
},
|
25857 |
+
{
|
25858 |
+
"epoch": 1.016106028455143,
|
25859 |
+
"grad_norm": 2.4680778980255127,
|
25860 |
+
"learning_rate": 9.770211652159327e-05,
|
25861 |
+
"loss": 2.4571,
|
25862 |
+
"step": 3651
|
25863 |
+
},
|
25864 |
+
{
|
25865 |
+
"epoch": 1.0163843183636554,
|
25866 |
+
"grad_norm": 2.4402754306793213,
|
25867 |
+
"learning_rate": 9.76583552394249e-05,
|
25868 |
+
"loss": 2.3657,
|
25869 |
+
"step": 3652
|
25870 |
+
},
|
25871 |
+
{
|
25872 |
+
"epoch": 1.0166626082721675,
|
25873 |
+
"grad_norm": 2.582365036010742,
|
25874 |
+
"learning_rate": 9.761459440593466e-05,
|
25875 |
+
"loss": 2.6948,
|
25876 |
+
"step": 3653
|
25877 |
+
},
|
25878 |
+
{
|
25879 |
+
"epoch": 1.0169408981806798,
|
25880 |
+
"grad_norm": 2.3168914318084717,
|
25881 |
+
"learning_rate": 9.757083402950742e-05,
|
25882 |
+
"loss": 2.3346,
|
25883 |
+
"step": 3654
|
25884 |
+
},
|
25885 |
+
{
|
25886 |
+
"epoch": 1.0172191880891919,
|
25887 |
+
"grad_norm": 2.390580654144287,
|
25888 |
+
"learning_rate": 9.752707411852802e-05,
|
25889 |
+
"loss": 2.7386,
|
25890 |
+
"step": 3655
|
25891 |
+
},
|
25892 |
+
{
|
25893 |
+
"epoch": 1.0174974779977042,
|
25894 |
+
"grad_norm": 2.624013900756836,
|
25895 |
+
"learning_rate": 9.748331468138124e-05,
|
25896 |
+
"loss": 2.4795,
|
25897 |
+
"step": 3656
|
25898 |
+
},
|
25899 |
+
{
|
25900 |
+
"epoch": 1.0177757679062163,
|
25901 |
+
"grad_norm": 2.4292471408843994,
|
25902 |
+
"learning_rate": 9.743955572645174e-05,
|
25903 |
+
"loss": 2.7798,
|
25904 |
+
"step": 3657
|
25905 |
+
},
|
25906 |
+
{
|
25907 |
+
"epoch": 1.0180540578147286,
|
25908 |
+
"grad_norm": 2.503296136856079,
|
25909 |
+
"learning_rate": 9.739579726212406e-05,
|
25910 |
+
"loss": 2.6261,
|
25911 |
+
"step": 3658
|
25912 |
+
},
|
25913 |
+
{
|
25914 |
+
"epoch": 1.0183323477232407,
|
25915 |
+
"grad_norm": 2.2126271724700928,
|
25916 |
+
"learning_rate": 9.735203929678272e-05,
|
25917 |
+
"loss": 2.2868,
|
25918 |
+
"step": 3659
|
25919 |
+
},
|
25920 |
+
{
|
25921 |
+
"epoch": 1.018610637631753,
|
25922 |
+
"grad_norm": 2.1973748207092285,
|
25923 |
+
"learning_rate": 9.730828183881208e-05,
|
25924 |
+
"loss": 2.4527,
|
25925 |
+
"step": 3660
|
25926 |
+
},
|
25927 |
+
{
|
25928 |
+
"epoch": 1.018888927540265,
|
25929 |
+
"grad_norm": 2.435417652130127,
|
25930 |
+
"learning_rate": 9.726452489659638e-05,
|
25931 |
+
"loss": 2.5638,
|
25932 |
+
"step": 3661
|
25933 |
+
},
|
25934 |
+
{
|
25935 |
+
"epoch": 1.0191672174487774,
|
25936 |
+
"grad_norm": 2.363333225250244,
|
25937 |
+
"learning_rate": 9.722076847851988e-05,
|
25938 |
+
"loss": 2.4653,
|
25939 |
+
"step": 3662
|
25940 |
+
},
|
25941 |
+
{
|
25942 |
+
"epoch": 1.0194455073572894,
|
25943 |
+
"grad_norm": 2.7161386013031006,
|
25944 |
+
"learning_rate": 9.717701259296665e-05,
|
25945 |
+
"loss": 2.4748,
|
25946 |
+
"step": 3663
|
25947 |
+
},
|
25948 |
+
{
|
25949 |
+
"epoch": 1.0197237972658018,
|
25950 |
+
"grad_norm": 2.3049778938293457,
|
25951 |
+
"learning_rate": 9.713325724832059e-05,
|
25952 |
+
"loss": 2.5271,
|
25953 |
+
"step": 3664
|
25954 |
+
},
|
25955 |
+
{
|
25956 |
+
"epoch": 1.0200020871743138,
|
25957 |
+
"grad_norm": 2.264054775238037,
|
25958 |
+
"learning_rate": 9.708950245296569e-05,
|
25959 |
+
"loss": 2.4146,
|
25960 |
+
"step": 3665
|
25961 |
+
},
|
25962 |
+
{
|
25963 |
+
"epoch": 1.020280377082826,
|
25964 |
+
"grad_norm": 2.786287784576416,
|
25965 |
+
"learning_rate": 9.704574821528566e-05,
|
25966 |
+
"loss": 2.6383,
|
25967 |
+
"step": 3666
|
25968 |
+
},
|
25969 |
+
{
|
25970 |
+
"epoch": 1.0205586669913382,
|
25971 |
+
"grad_norm": 2.3080055713653564,
|
25972 |
+
"learning_rate": 9.700199454366415e-05,
|
25973 |
+
"loss": 2.5227,
|
25974 |
+
"step": 3667
|
25975 |
+
},
|
25976 |
+
{
|
25977 |
+
"epoch": 1.0208369568998503,
|
25978 |
+
"grad_norm": 2.4426419734954834,
|
25979 |
+
"learning_rate": 9.695824144648478e-05,
|
25980 |
+
"loss": 2.4908,
|
25981 |
+
"step": 3668
|
25982 |
+
},
|
25983 |
+
{
|
25984 |
+
"epoch": 1.0211152468083626,
|
25985 |
+
"grad_norm": 2.478846788406372,
|
25986 |
+
"learning_rate": 9.691448893213095e-05,
|
25987 |
+
"loss": 2.5778,
|
25988 |
+
"step": 3669
|
25989 |
+
},
|
25990 |
+
{
|
25991 |
+
"epoch": 1.0213935367168747,
|
25992 |
+
"grad_norm": 2.3766379356384277,
|
25993 |
+
"learning_rate": 9.687073700898598e-05,
|
25994 |
+
"loss": 2.5546,
|
25995 |
+
"step": 3670
|
25996 |
+
},
|
25997 |
+
{
|
25998 |
+
"epoch": 1.021671826625387,
|
25999 |
+
"grad_norm": 2.3187801837921143,
|
26000 |
+
"learning_rate": 9.682698568543317e-05,
|
26001 |
+
"loss": 2.5168,
|
26002 |
+
"step": 3671
|
26003 |
+
},
|
26004 |
+
{
|
26005 |
+
"epoch": 1.0219501165338991,
|
26006 |
+
"grad_norm": 2.4878695011138916,
|
26007 |
+
"learning_rate": 9.678323496985557e-05,
|
26008 |
+
"loss": 2.7055,
|
26009 |
+
"step": 3672
|
26010 |
+
},
|
26011 |
+
{
|
26012 |
+
"epoch": 1.0222284064424114,
|
26013 |
+
"grad_norm": 2.6090481281280518,
|
26014 |
+
"learning_rate": 9.673948487063614e-05,
|
26015 |
+
"loss": 2.6359,
|
26016 |
+
"step": 3673
|
26017 |
+
},
|
26018 |
+
{
|
26019 |
+
"epoch": 1.0225066963509235,
|
26020 |
+
"grad_norm": 2.3619978427886963,
|
26021 |
+
"learning_rate": 9.669573539615782e-05,
|
26022 |
+
"loss": 2.4851,
|
26023 |
+
"step": 3674
|
26024 |
+
},
|
26025 |
+
{
|
26026 |
+
"epoch": 1.0227849862594358,
|
26027 |
+
"grad_norm": 2.271355390548706,
|
26028 |
+
"learning_rate": 9.665198655480334e-05,
|
26029 |
+
"loss": 2.7666,
|
26030 |
+
"step": 3675
|
26031 |
+
},
|
26032 |
+
{
|
26033 |
+
"epoch": 1.023063276167948,
|
26034 |
+
"grad_norm": 2.36423659324646,
|
26035 |
+
"learning_rate": 9.660823835495531e-05,
|
26036 |
+
"loss": 2.3988,
|
26037 |
+
"step": 3676
|
26038 |
+
},
|
26039 |
+
{
|
26040 |
+
"epoch": 1.0233415660764602,
|
26041 |
+
"grad_norm": 2.4783449172973633,
|
26042 |
+
"learning_rate": 9.656449080499627e-05,
|
26043 |
+
"loss": 2.4864,
|
26044 |
+
"step": 3677
|
26045 |
+
},
|
26046 |
+
{
|
26047 |
+
"epoch": 1.0236198559849723,
|
26048 |
+
"grad_norm": 2.3957090377807617,
|
26049 |
+
"learning_rate": 9.65207439133086e-05,
|
26050 |
+
"loss": 2.3701,
|
26051 |
+
"step": 3678
|
26052 |
+
},
|
26053 |
+
{
|
26054 |
+
"epoch": 1.0238981458934846,
|
26055 |
+
"grad_norm": 2.900683879852295,
|
26056 |
+
"learning_rate": 9.64769976882745e-05,
|
26057 |
+
"loss": 2.5155,
|
26058 |
+
"step": 3679
|
26059 |
+
},
|
26060 |
+
{
|
26061 |
+
"epoch": 1.0241764358019967,
|
26062 |
+
"grad_norm": 2.5048580169677734,
|
26063 |
+
"learning_rate": 9.643325213827619e-05,
|
26064 |
+
"loss": 2.5991,
|
26065 |
+
"step": 3680
|
26066 |
+
},
|
26067 |
+
{
|
26068 |
+
"epoch": 1.024454725710509,
|
26069 |
+
"grad_norm": 2.4622344970703125,
|
26070 |
+
"learning_rate": 9.638950727169564e-05,
|
26071 |
+
"loss": 2.5138,
|
26072 |
+
"step": 3681
|
26073 |
+
},
|
26074 |
+
{
|
26075 |
+
"epoch": 1.024733015619021,
|
26076 |
+
"grad_norm": 2.480517625808716,
|
26077 |
+
"learning_rate": 9.63457630969147e-05,
|
26078 |
+
"loss": 2.6391,
|
26079 |
+
"step": 3682
|
26080 |
+
},
|
26081 |
+
{
|
26082 |
+
"epoch": 1.0250113055275334,
|
26083 |
+
"grad_norm": 2.688997268676758,
|
26084 |
+
"learning_rate": 9.630201962231511e-05,
|
26085 |
+
"loss": 2.3735,
|
26086 |
+
"step": 3683
|
26087 |
+
},
|
26088 |
+
{
|
26089 |
+
"epoch": 1.0252895954360455,
|
26090 |
+
"grad_norm": 2.426302909851074,
|
26091 |
+
"learning_rate": 9.62582768562785e-05,
|
26092 |
+
"loss": 2.2221,
|
26093 |
+
"step": 3684
|
26094 |
+
},
|
26095 |
+
{
|
26096 |
+
"epoch": 1.0255678853445578,
|
26097 |
+
"grad_norm": 2.492079496383667,
|
26098 |
+
"learning_rate": 9.621453480718634e-05,
|
26099 |
+
"loss": 2.9364,
|
26100 |
+
"step": 3685
|
26101 |
+
},
|
26102 |
+
{
|
26103 |
+
"epoch": 1.0258461752530699,
|
26104 |
+
"grad_norm": 2.756873846054077,
|
26105 |
+
"learning_rate": 9.61707934834199e-05,
|
26106 |
+
"loss": 2.6593,
|
26107 |
+
"step": 3686
|
26108 |
+
},
|
26109 |
+
{
|
26110 |
+
"epoch": 1.0261244651615822,
|
26111 |
+
"grad_norm": 2.2915287017822266,
|
26112 |
+
"learning_rate": 9.612705289336045e-05,
|
26113 |
+
"loss": 2.3577,
|
26114 |
+
"step": 3687
|
26115 |
+
},
|
26116 |
+
{
|
26117 |
+
"epoch": 1.0264027550700943,
|
26118 |
+
"grad_norm": 2.240419387817383,
|
26119 |
+
"learning_rate": 9.6083313045389e-05,
|
26120 |
+
"loss": 2.4756,
|
26121 |
+
"step": 3688
|
26122 |
+
},
|
26123 |
+
{
|
26124 |
+
"epoch": 1.0266810449786066,
|
26125 |
+
"grad_norm": 2.4698326587677,
|
26126 |
+
"learning_rate": 9.603957394788644e-05,
|
26127 |
+
"loss": 2.5557,
|
26128 |
+
"step": 3689
|
26129 |
+
},
|
26130 |
+
{
|
26131 |
+
"epoch": 1.0269593348871187,
|
26132 |
+
"grad_norm": 2.5737481117248535,
|
26133 |
+
"learning_rate": 9.599583560923359e-05,
|
26134 |
+
"loss": 2.6166,
|
26135 |
+
"step": 3690
|
26136 |
+
},
|
26137 |
+
{
|
26138 |
+
"epoch": 1.0272376247956307,
|
26139 |
+
"grad_norm": 2.226649761199951,
|
26140 |
+
"learning_rate": 9.595209803781102e-05,
|
26141 |
+
"loss": 2.4705,
|
26142 |
+
"step": 3691
|
26143 |
+
},
|
26144 |
+
{
|
26145 |
+
"epoch": 1.027515914704143,
|
26146 |
+
"grad_norm": 2.4698619842529297,
|
26147 |
+
"learning_rate": 9.590836124199921e-05,
|
26148 |
+
"loss": 2.4206,
|
26149 |
+
"step": 3692
|
26150 |
+
},
|
26151 |
+
{
|
26152 |
+
"epoch": 1.0277942046126551,
|
26153 |
+
"grad_norm": 2.4154977798461914,
|
26154 |
+
"learning_rate": 9.586462523017852e-05,
|
26155 |
+
"loss": 2.6235,
|
26156 |
+
"step": 3693
|
26157 |
+
},
|
26158 |
+
{
|
26159 |
+
"epoch": 1.0280724945211674,
|
26160 |
+
"grad_norm": 2.2395994663238525,
|
26161 |
+
"learning_rate": 9.58208900107291e-05,
|
26162 |
+
"loss": 2.5879,
|
26163 |
+
"step": 3694
|
26164 |
+
},
|
26165 |
+
{
|
26166 |
+
"epoch": 1.0283507844296795,
|
26167 |
+
"grad_norm": 2.263139486312866,
|
26168 |
+
"learning_rate": 9.577715559203095e-05,
|
26169 |
+
"loss": 2.794,
|
26170 |
+
"step": 3695
|
26171 |
+
},
|
26172 |
+
{
|
26173 |
+
"epoch": 1.0286290743381918,
|
26174 |
+
"grad_norm": 2.4009816646575928,
|
26175 |
+
"learning_rate": 9.573342198246397e-05,
|
26176 |
+
"loss": 2.8207,
|
26177 |
+
"step": 3696
|
26178 |
+
},
|
26179 |
+
{
|
26180 |
+
"epoch": 1.028907364246704,
|
26181 |
+
"grad_norm": 2.4740641117095947,
|
26182 |
+
"learning_rate": 9.568968919040787e-05,
|
26183 |
+
"loss": 2.7345,
|
26184 |
+
"step": 3697
|
26185 |
+
},
|
26186 |
+
{
|
26187 |
+
"epoch": 1.0291856541552162,
|
26188 |
+
"grad_norm": 2.556521415710449,
|
26189 |
+
"learning_rate": 9.564595722424217e-05,
|
26190 |
+
"loss": 2.6085,
|
26191 |
+
"step": 3698
|
26192 |
+
},
|
26193 |
+
{
|
26194 |
+
"epoch": 1.0294639440637283,
|
26195 |
+
"grad_norm": 2.379560708999634,
|
26196 |
+
"learning_rate": 9.560222609234633e-05,
|
26197 |
+
"loss": 2.4874,
|
26198 |
+
"step": 3699
|
26199 |
+
},
|
26200 |
+
{
|
26201 |
+
"epoch": 1.0297422339722406,
|
26202 |
+
"grad_norm": 2.2581701278686523,
|
26203 |
+
"learning_rate": 9.555849580309954e-05,
|
26204 |
+
"loss": 2.4456,
|
26205 |
+
"step": 3700
|
26206 |
+
},
|
26207 |
+
{
|
26208 |
+
"epoch": 1.0297422339722406,
|
26209 |
+
"eval_loss": 2.8660240173339844,
|
26210 |
+
"eval_runtime": 84.4219,
|
26211 |
+
"eval_samples_per_second": 59.226,
|
26212 |
+
"eval_steps_per_second": 14.807,
|
26213 |
+
"step": 3700
|
26214 |
}
|
26215 |
],
|
26216 |
"logging_steps": 1,
|
|
|
26225 |
"early_stopping_threshold": 0.0
|
26226 |
},
|
26227 |
"attributes": {
|
26228 |
+
"early_stopping_patience_counter": 1
|
26229 |
}
|
26230 |
},
|
26231 |
"TrainerControl": {
|
|
|
26239 |
"attributes": {}
|
26240 |
}
|
26241 |
},
|
26242 |
+
"total_flos": 2.66753091305472e+17,
|
26243 |
"train_batch_size": 4,
|
26244 |
"trial_name": null,
|
26245 |
"trial_params": null
|