Training in progress, step 550, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 323014168
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d702064a54a83cfe7011e94312c6740c68631ec38198bb112e73283b5b9325d
|
3 |
size 323014168
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 165484738
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2354310a4c79f7892941438ca7a0bf30a918f8b24d806f087ecb08bdf61012e2
|
3 |
size 165484738
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42b5bf42e6137ca7b21462d382307354d023593dbf4c25759316f573752c36b8
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d1caf05e3c7d3f6b37ac8a69117422ba2bf5b941e02ad9725bc89f907b4e6ef
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3595,6 +3595,364 @@
|
|
3595 |
"eval_samples_per_second": 2.925,
|
3596 |
"eval_steps_per_second": 2.925,
|
3597 |
"step": 500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3598 |
}
|
3599 |
],
|
3600 |
"logging_steps": 1,
|
@@ -3623,7 +3981,7 @@
|
|
3623 |
"attributes": {}
|
3624 |
}
|
3625 |
},
|
3626 |
-
"total_flos": 3.
|
3627 |
"train_batch_size": 1,
|
3628 |
"trial_name": null,
|
3629 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.37203362584114075,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-550",
|
4 |
+
"epoch": 0.9335879482283047,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 550,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3595 |
"eval_samples_per_second": 2.925,
|
3596 |
"eval_steps_per_second": 2.925,
|
3597 |
"step": 500
|
3598 |
+
},
|
3599 |
+
{
|
3600 |
+
"epoch": 0.8504137492043284,
|
3601 |
+
"grad_norm": 0.23917846381664276,
|
3602 |
+
"learning_rate": 0.00015190394441942843,
|
3603 |
+
"loss": 0.8684,
|
3604 |
+
"step": 501
|
3605 |
+
},
|
3606 |
+
{
|
3607 |
+
"epoch": 0.8521111818374708,
|
3608 |
+
"grad_norm": 0.27708154916763306,
|
3609 |
+
"learning_rate": 0.0001514279750909365,
|
3610 |
+
"loss": 1.0004,
|
3611 |
+
"step": 502
|
3612 |
+
},
|
3613 |
+
{
|
3614 |
+
"epoch": 0.8538086144706132,
|
3615 |
+
"grad_norm": 0.25657930970191956,
|
3616 |
+
"learning_rate": 0.00015095199138275128,
|
3617 |
+
"loss": 0.7568,
|
3618 |
+
"step": 503
|
3619 |
+
},
|
3620 |
+
{
|
3621 |
+
"epoch": 0.8555060471037556,
|
3622 |
+
"grad_norm": 0.2313452512025833,
|
3623 |
+
"learning_rate": 0.00015047599808802332,
|
3624 |
+
"loss": 0.8288,
|
3625 |
+
"step": 504
|
3626 |
+
},
|
3627 |
+
{
|
3628 |
+
"epoch": 0.8572034797368979,
|
3629 |
+
"grad_norm": 0.2528156042098999,
|
3630 |
+
"learning_rate": 0.00015,
|
3631 |
+
"loss": 1.011,
|
3632 |
+
"step": 505
|
3633 |
+
},
|
3634 |
+
{
|
3635 |
+
"epoch": 0.8589009123700403,
|
3636 |
+
"grad_norm": 0.1938907355070114,
|
3637 |
+
"learning_rate": 0.00014952400191197665,
|
3638 |
+
"loss": 0.5598,
|
3639 |
+
"step": 506
|
3640 |
+
},
|
3641 |
+
{
|
3642 |
+
"epoch": 0.8605983450031827,
|
3643 |
+
"grad_norm": 0.23120371997356415,
|
3644 |
+
"learning_rate": 0.00014904800861724872,
|
3645 |
+
"loss": 0.6959,
|
3646 |
+
"step": 507
|
3647 |
+
},
|
3648 |
+
{
|
3649 |
+
"epoch": 0.862295777636325,
|
3650 |
+
"grad_norm": 0.23072639107704163,
|
3651 |
+
"learning_rate": 0.00014857202490906347,
|
3652 |
+
"loss": 0.7868,
|
3653 |
+
"step": 508
|
3654 |
+
},
|
3655 |
+
{
|
3656 |
+
"epoch": 0.8639932102694674,
|
3657 |
+
"grad_norm": 0.21651454269886017,
|
3658 |
+
"learning_rate": 0.00014809605558057157,
|
3659 |
+
"loss": 0.6725,
|
3660 |
+
"step": 509
|
3661 |
+
},
|
3662 |
+
{
|
3663 |
+
"epoch": 0.8656906429026098,
|
3664 |
+
"grad_norm": 0.1905306875705719,
|
3665 |
+
"learning_rate": 0.0001476201054247788,
|
3666 |
+
"loss": 0.5756,
|
3667 |
+
"step": 510
|
3668 |
+
},
|
3669 |
+
{
|
3670 |
+
"epoch": 0.8673880755357521,
|
3671 |
+
"grad_norm": 0.35889434814453125,
|
3672 |
+
"learning_rate": 0.00014714417923449797,
|
3673 |
+
"loss": 0.6115,
|
3674 |
+
"step": 511
|
3675 |
+
},
|
3676 |
+
{
|
3677 |
+
"epoch": 0.8690855081688945,
|
3678 |
+
"grad_norm": 0.19174250960350037,
|
3679 |
+
"learning_rate": 0.00014666828180230057,
|
3680 |
+
"loss": 0.4859,
|
3681 |
+
"step": 512
|
3682 |
+
},
|
3683 |
+
{
|
3684 |
+
"epoch": 0.8707829408020369,
|
3685 |
+
"grad_norm": 0.2138870805501938,
|
3686 |
+
"learning_rate": 0.0001461924179204684,
|
3687 |
+
"loss": 0.6636,
|
3688 |
+
"step": 513
|
3689 |
+
},
|
3690 |
+
{
|
3691 |
+
"epoch": 0.8724803734351793,
|
3692 |
+
"grad_norm": 0.17262116074562073,
|
3693 |
+
"learning_rate": 0.00014571659238094556,
|
3694 |
+
"loss": 0.42,
|
3695 |
+
"step": 514
|
3696 |
+
},
|
3697 |
+
{
|
3698 |
+
"epoch": 0.8741778060683216,
|
3699 |
+
"grad_norm": 0.22339358925819397,
|
3700 |
+
"learning_rate": 0.00014524080997528987,
|
3701 |
+
"loss": 0.6612,
|
3702 |
+
"step": 515
|
3703 |
+
},
|
3704 |
+
{
|
3705 |
+
"epoch": 0.875875238701464,
|
3706 |
+
"grad_norm": 0.1979471892118454,
|
3707 |
+
"learning_rate": 0.0001447650754946249,
|
3708 |
+
"loss": 0.5441,
|
3709 |
+
"step": 516
|
3710 |
+
},
|
3711 |
+
{
|
3712 |
+
"epoch": 0.8775726713346064,
|
3713 |
+
"grad_norm": 0.20259279012680054,
|
3714 |
+
"learning_rate": 0.00014428939372959152,
|
3715 |
+
"loss": 0.5254,
|
3716 |
+
"step": 517
|
3717 |
+
},
|
3718 |
+
{
|
3719 |
+
"epoch": 0.8792701039677487,
|
3720 |
+
"grad_norm": 0.12251409888267517,
|
3721 |
+
"learning_rate": 0.0001438137694702999,
|
3722 |
+
"loss": 0.2171,
|
3723 |
+
"step": 518
|
3724 |
+
},
|
3725 |
+
{
|
3726 |
+
"epoch": 0.8809675366008911,
|
3727 |
+
"grad_norm": 0.16714578866958618,
|
3728 |
+
"learning_rate": 0.00014333820750628105,
|
3729 |
+
"loss": 0.311,
|
3730 |
+
"step": 519
|
3731 |
+
},
|
3732 |
+
{
|
3733 |
+
"epoch": 0.8826649692340335,
|
3734 |
+
"grad_norm": 0.24203087389469147,
|
3735 |
+
"learning_rate": 0.00014286271262643866,
|
3736 |
+
"loss": 0.6175,
|
3737 |
+
"step": 520
|
3738 |
+
},
|
3739 |
+
{
|
3740 |
+
"epoch": 0.8843624018671759,
|
3741 |
+
"grad_norm": 0.1858789026737213,
|
3742 |
+
"learning_rate": 0.00014238728961900088,
|
3743 |
+
"loss": 0.3565,
|
3744 |
+
"step": 521
|
3745 |
+
},
|
3746 |
+
{
|
3747 |
+
"epoch": 0.8860598345003182,
|
3748 |
+
"grad_norm": 0.09111540019512177,
|
3749 |
+
"learning_rate": 0.00014191194327147212,
|
3750 |
+
"loss": 0.1199,
|
3751 |
+
"step": 522
|
3752 |
+
},
|
3753 |
+
{
|
3754 |
+
"epoch": 0.8877572671334606,
|
3755 |
+
"grad_norm": 0.13533198833465576,
|
3756 |
+
"learning_rate": 0.00014143667837058477,
|
3757 |
+
"loss": 0.2471,
|
3758 |
+
"step": 523
|
3759 |
+
},
|
3760 |
+
{
|
3761 |
+
"epoch": 0.889454699766603,
|
3762 |
+
"grad_norm": 0.17338241636753082,
|
3763 |
+
"learning_rate": 0.00014096149970225122,
|
3764 |
+
"loss": 0.3255,
|
3765 |
+
"step": 524
|
3766 |
+
},
|
3767 |
+
{
|
3768 |
+
"epoch": 0.8911521323997453,
|
3769 |
+
"grad_norm": 0.05573137849569321,
|
3770 |
+
"learning_rate": 0.00014048641205151533,
|
3771 |
+
"loss": 0.0455,
|
3772 |
+
"step": 525
|
3773 |
+
},
|
3774 |
+
{
|
3775 |
+
"epoch": 0.8928495650328877,
|
3776 |
+
"grad_norm": 0.007357200141996145,
|
3777 |
+
"learning_rate": 0.0001400114202025044,
|
3778 |
+
"loss": 0.0004,
|
3779 |
+
"step": 526
|
3780 |
+
},
|
3781 |
+
{
|
3782 |
+
"epoch": 0.8945469976660301,
|
3783 |
+
"grad_norm": 0.00043303659185767174,
|
3784 |
+
"learning_rate": 0.00013953652893838119,
|
3785 |
+
"loss": 0.0,
|
3786 |
+
"step": 527
|
3787 |
+
},
|
3788 |
+
{
|
3789 |
+
"epoch": 0.8962444302991726,
|
3790 |
+
"grad_norm": 0.02541971206665039,
|
3791 |
+
"learning_rate": 0.0001390617430412954,
|
3792 |
+
"loss": 0.0028,
|
3793 |
+
"step": 528
|
3794 |
+
},
|
3795 |
+
{
|
3796 |
+
"epoch": 0.8979418629323149,
|
3797 |
+
"grad_norm": 0.010525004006922245,
|
3798 |
+
"learning_rate": 0.0001385870672923357,
|
3799 |
+
"loss": 0.0005,
|
3800 |
+
"step": 529
|
3801 |
+
},
|
3802 |
+
{
|
3803 |
+
"epoch": 0.8996392955654573,
|
3804 |
+
"grad_norm": 0.03903070092201233,
|
3805 |
+
"learning_rate": 0.0001381125064714817,
|
3806 |
+
"loss": 0.0003,
|
3807 |
+
"step": 530
|
3808 |
+
},
|
3809 |
+
{
|
3810 |
+
"epoch": 0.9013367281985997,
|
3811 |
+
"grad_norm": 0.010076366364955902,
|
3812 |
+
"learning_rate": 0.00013763806535755562,
|
3813 |
+
"loss": 0.0002,
|
3814 |
+
"step": 531
|
3815 |
+
},
|
3816 |
+
{
|
3817 |
+
"epoch": 0.903034160831742,
|
3818 |
+
"grad_norm": 0.0008758578333072364,
|
3819 |
+
"learning_rate": 0.00013716374872817407,
|
3820 |
+
"loss": 0.0,
|
3821 |
+
"step": 532
|
3822 |
+
},
|
3823 |
+
{
|
3824 |
+
"epoch": 0.9047315934648844,
|
3825 |
+
"grad_norm": 0.0009034467511810362,
|
3826 |
+
"learning_rate": 0.0001366895613597003,
|
3827 |
+
"loss": 0.0,
|
3828 |
+
"step": 533
|
3829 |
+
},
|
3830 |
+
{
|
3831 |
+
"epoch": 0.9064290260980268,
|
3832 |
+
"grad_norm": 0.0004988125874660909,
|
3833 |
+
"learning_rate": 0.00013621550802719588,
|
3834 |
+
"loss": 0.0,
|
3835 |
+
"step": 534
|
3836 |
+
},
|
3837 |
+
{
|
3838 |
+
"epoch": 0.9081264587311692,
|
3839 |
+
"grad_norm": 0.012061301618814468,
|
3840 |
+
"learning_rate": 0.00013574159350437261,
|
3841 |
+
"loss": 0.0006,
|
3842 |
+
"step": 535
|
3843 |
+
},
|
3844 |
+
{
|
3845 |
+
"epoch": 0.9098238913643115,
|
3846 |
+
"grad_norm": 0.0005069606122560799,
|
3847 |
+
"learning_rate": 0.0001352678225635444,
|
3848 |
+
"loss": 0.0,
|
3849 |
+
"step": 536
|
3850 |
+
},
|
3851 |
+
{
|
3852 |
+
"epoch": 0.9115213239974539,
|
3853 |
+
"grad_norm": 0.003097748151049018,
|
3854 |
+
"learning_rate": 0.00013479419997557948,
|
3855 |
+
"loss": 0.0001,
|
3856 |
+
"step": 537
|
3857 |
+
},
|
3858 |
+
{
|
3859 |
+
"epoch": 0.9132187566305963,
|
3860 |
+
"grad_norm": 0.010489325039088726,
|
3861 |
+
"learning_rate": 0.000134320730509852,
|
3862 |
+
"loss": 0.0002,
|
3863 |
+
"step": 538
|
3864 |
+
},
|
3865 |
+
{
|
3866 |
+
"epoch": 0.9149161892637386,
|
3867 |
+
"grad_norm": 0.00030282657826319337,
|
3868 |
+
"learning_rate": 0.00013384741893419415,
|
3869 |
+
"loss": 0.0,
|
3870 |
+
"step": 539
|
3871 |
+
},
|
3872 |
+
{
|
3873 |
+
"epoch": 0.916613621896881,
|
3874 |
+
"grad_norm": 0.0403389073908329,
|
3875 |
+
"learning_rate": 0.00013337427001484836,
|
3876 |
+
"loss": 0.0005,
|
3877 |
+
"step": 540
|
3878 |
+
},
|
3879 |
+
{
|
3880 |
+
"epoch": 0.9183110545300234,
|
3881 |
+
"grad_norm": 0.003200069535523653,
|
3882 |
+
"learning_rate": 0.0001329012885164189,
|
3883 |
+
"loss": 0.0001,
|
3884 |
+
"step": 541
|
3885 |
+
},
|
3886 |
+
{
|
3887 |
+
"epoch": 0.9200084871631657,
|
3888 |
+
"grad_norm": 0.007805091328918934,
|
3889 |
+
"learning_rate": 0.00013242847920182424,
|
3890 |
+
"loss": 0.0002,
|
3891 |
+
"step": 542
|
3892 |
+
},
|
3893 |
+
{
|
3894 |
+
"epoch": 0.9217059197963081,
|
3895 |
+
"grad_norm": 0.004255454055964947,
|
3896 |
+
"learning_rate": 0.000131955846832249,
|
3897 |
+
"loss": 0.0001,
|
3898 |
+
"step": 543
|
3899 |
+
},
|
3900 |
+
{
|
3901 |
+
"epoch": 0.9234033524294505,
|
3902 |
+
"grad_norm": 0.0008626742055639625,
|
3903 |
+
"learning_rate": 0.00013148339616709577,
|
3904 |
+
"loss": 0.0,
|
3905 |
+
"step": 544
|
3906 |
+
},
|
3907 |
+
{
|
3908 |
+
"epoch": 0.9251007850625929,
|
3909 |
+
"grad_norm": 0.005825830157846212,
|
3910 |
+
"learning_rate": 0.00013101113196393758,
|
3911 |
+
"loss": 0.0002,
|
3912 |
+
"step": 545
|
3913 |
+
},
|
3914 |
+
{
|
3915 |
+
"epoch": 0.9267982176957352,
|
3916 |
+
"grad_norm": 0.00038926751585677266,
|
3917 |
+
"learning_rate": 0.00013053905897846972,
|
3918 |
+
"loss": 0.0,
|
3919 |
+
"step": 546
|
3920 |
+
},
|
3921 |
+
{
|
3922 |
+
"epoch": 0.9284956503288776,
|
3923 |
+
"grad_norm": 0.025748664513230324,
|
3924 |
+
"learning_rate": 0.00013006718196446188,
|
3925 |
+
"loss": 0.0007,
|
3926 |
+
"step": 547
|
3927 |
+
},
|
3928 |
+
{
|
3929 |
+
"epoch": 0.93019308296202,
|
3930 |
+
"grad_norm": 0.000722411903552711,
|
3931 |
+
"learning_rate": 0.0001295955056737104,
|
3932 |
+
"loss": 0.0,
|
3933 |
+
"step": 548
|
3934 |
+
},
|
3935 |
+
{
|
3936 |
+
"epoch": 0.9318905155951623,
|
3937 |
+
"grad_norm": 0.013827555812895298,
|
3938 |
+
"learning_rate": 0.0001291240348559902,
|
3939 |
+
"loss": 0.0003,
|
3940 |
+
"step": 549
|
3941 |
+
},
|
3942 |
+
{
|
3943 |
+
"epoch": 0.9335879482283047,
|
3944 |
+
"grad_norm": 0.0006142717902548611,
|
3945 |
+
"learning_rate": 0.00012865277425900724,
|
3946 |
+
"loss": 0.0,
|
3947 |
+
"step": 550
|
3948 |
+
},
|
3949 |
+
{
|
3950 |
+
"epoch": 0.9335879482283047,
|
3951 |
+
"eval_loss": 0.37203362584114075,
|
3952 |
+
"eval_runtime": 65.9103,
|
3953 |
+
"eval_samples_per_second": 2.928,
|
3954 |
+
"eval_steps_per_second": 2.928,
|
3955 |
+
"step": 550
|
3956 |
}
|
3957 |
],
|
3958 |
"logging_steps": 1,
|
|
|
3981 |
"attributes": {}
|
3982 |
}
|
3983 |
},
|
3984 |
+
"total_flos": 3.562049824924631e+17,
|
3985 |
"train_batch_size": 1,
|
3986 |
"trial_name": null,
|
3987 |
"trial_params": null
|