Training in progress, step 6000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5323528
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d5523b247f7b13c0b8050366732eda9261579a506398d6de39341316a1ec0b1
|
3 |
size 5323528
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 10707706
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:273c5173037f928b2e179fc992186b177c9f9ca1bb1712cdb61c2c4f9bd56859
|
3 |
size 10707706
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:646e83085f2d11ddc437ce48f3f5db2b43745e68a45e0af2d7fa68feadebf520
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a4dd1d74816502c8ecbc715add6bae4e99a2b4e50b653b0b20cfecda567b3eb
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "bart_lora_outputs\\checkpoint-
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3747,13 +3747,353 @@
|
|
3747 |
"eval_samples_per_second": 91.742,
|
3748 |
"eval_steps_per_second": 11.517,
|
3749 |
"step": 5500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3750 |
}
|
3751 |
],
|
3752 |
"logging_steps": 10,
|
3753 |
"max_steps": 6130,
|
3754 |
"num_train_epochs": 10,
|
3755 |
"save_steps": 500,
|
3756 |
-
"total_flos": 1.
|
3757 |
"trial_name": null,
|
3758 |
"trial_params": null
|
3759 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.4120555818080902,
|
3 |
+
"best_model_checkpoint": "bart_lora_outputs\\checkpoint-6000",
|
4 |
+
"epoch": 9.787928221859707,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 6000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3747 |
"eval_samples_per_second": 91.742,
|
3748 |
"eval_steps_per_second": 11.517,
|
3749 |
"step": 5500
|
3750 |
+
},
|
3751 |
+
{
|
3752 |
+
"epoch": 8.99,
|
3753 |
+
"learning_rate": 0.00011012433392539965,
|
3754 |
+
"loss": 0.4155,
|
3755 |
+
"step": 5510
|
3756 |
+
},
|
3757 |
+
{
|
3758 |
+
"epoch": 9.0,
|
3759 |
+
"learning_rate": 0.000108348134991119,
|
3760 |
+
"loss": 0.3614,
|
3761 |
+
"step": 5520
|
3762 |
+
},
|
3763 |
+
{
|
3764 |
+
"epoch": 9.02,
|
3765 |
+
"learning_rate": 0.00010657193605683836,
|
3766 |
+
"loss": 0.3688,
|
3767 |
+
"step": 5530
|
3768 |
+
},
|
3769 |
+
{
|
3770 |
+
"epoch": 9.04,
|
3771 |
+
"learning_rate": 0.00010479573712255772,
|
3772 |
+
"loss": 0.3798,
|
3773 |
+
"step": 5540
|
3774 |
+
},
|
3775 |
+
{
|
3776 |
+
"epoch": 9.05,
|
3777 |
+
"learning_rate": 0.00010301953818827709,
|
3778 |
+
"loss": 0.3968,
|
3779 |
+
"step": 5550
|
3780 |
+
},
|
3781 |
+
{
|
3782 |
+
"epoch": 9.07,
|
3783 |
+
"learning_rate": 0.00010124333925399646,
|
3784 |
+
"loss": 0.3894,
|
3785 |
+
"step": 5560
|
3786 |
+
},
|
3787 |
+
{
|
3788 |
+
"epoch": 9.09,
|
3789 |
+
"learning_rate": 9.946714031971581e-05,
|
3790 |
+
"loss": 0.3858,
|
3791 |
+
"step": 5570
|
3792 |
+
},
|
3793 |
+
{
|
3794 |
+
"epoch": 9.1,
|
3795 |
+
"learning_rate": 9.769094138543518e-05,
|
3796 |
+
"loss": 0.3638,
|
3797 |
+
"step": 5580
|
3798 |
+
},
|
3799 |
+
{
|
3800 |
+
"epoch": 9.12,
|
3801 |
+
"learning_rate": 9.591474245115453e-05,
|
3802 |
+
"loss": 0.3674,
|
3803 |
+
"step": 5590
|
3804 |
+
},
|
3805 |
+
{
|
3806 |
+
"epoch": 9.14,
|
3807 |
+
"learning_rate": 9.41385435168739e-05,
|
3808 |
+
"loss": 0.3568,
|
3809 |
+
"step": 5600
|
3810 |
+
},
|
3811 |
+
{
|
3812 |
+
"epoch": 9.14,
|
3813 |
+
"eval_loss": 0.41371241211891174,
|
3814 |
+
"eval_runtime": 13.0451,
|
3815 |
+
"eval_samples_per_second": 89.766,
|
3816 |
+
"eval_steps_per_second": 11.269,
|
3817 |
+
"step": 5600
|
3818 |
+
},
|
3819 |
+
{
|
3820 |
+
"epoch": 9.15,
|
3821 |
+
"learning_rate": 9.236234458259325e-05,
|
3822 |
+
"loss": 0.3748,
|
3823 |
+
"step": 5610
|
3824 |
+
},
|
3825 |
+
{
|
3826 |
+
"epoch": 9.17,
|
3827 |
+
"learning_rate": 9.05861456483126e-05,
|
3828 |
+
"loss": 0.3901,
|
3829 |
+
"step": 5620
|
3830 |
+
},
|
3831 |
+
{
|
3832 |
+
"epoch": 9.18,
|
3833 |
+
"learning_rate": 8.880994671403198e-05,
|
3834 |
+
"loss": 0.3737,
|
3835 |
+
"step": 5630
|
3836 |
+
},
|
3837 |
+
{
|
3838 |
+
"epoch": 9.2,
|
3839 |
+
"learning_rate": 8.703374777975133e-05,
|
3840 |
+
"loss": 0.3973,
|
3841 |
+
"step": 5640
|
3842 |
+
},
|
3843 |
+
{
|
3844 |
+
"epoch": 9.22,
|
3845 |
+
"learning_rate": 8.52575488454707e-05,
|
3846 |
+
"loss": 0.3985,
|
3847 |
+
"step": 5650
|
3848 |
+
},
|
3849 |
+
{
|
3850 |
+
"epoch": 9.23,
|
3851 |
+
"learning_rate": 8.348134991119005e-05,
|
3852 |
+
"loss": 0.37,
|
3853 |
+
"step": 5660
|
3854 |
+
},
|
3855 |
+
{
|
3856 |
+
"epoch": 9.25,
|
3857 |
+
"learning_rate": 8.170515097690942e-05,
|
3858 |
+
"loss": 0.4537,
|
3859 |
+
"step": 5670
|
3860 |
+
},
|
3861 |
+
{
|
3862 |
+
"epoch": 9.27,
|
3863 |
+
"learning_rate": 7.992895204262877e-05,
|
3864 |
+
"loss": 0.3747,
|
3865 |
+
"step": 5680
|
3866 |
+
},
|
3867 |
+
{
|
3868 |
+
"epoch": 9.28,
|
3869 |
+
"learning_rate": 7.815275310834814e-05,
|
3870 |
+
"loss": 0.4095,
|
3871 |
+
"step": 5690
|
3872 |
+
},
|
3873 |
+
{
|
3874 |
+
"epoch": 9.3,
|
3875 |
+
"learning_rate": 7.63765541740675e-05,
|
3876 |
+
"loss": 0.3971,
|
3877 |
+
"step": 5700
|
3878 |
+
},
|
3879 |
+
{
|
3880 |
+
"epoch": 9.3,
|
3881 |
+
"eval_loss": 0.4128452241420746,
|
3882 |
+
"eval_runtime": 13.0551,
|
3883 |
+
"eval_samples_per_second": 89.697,
|
3884 |
+
"eval_steps_per_second": 11.26,
|
3885 |
+
"step": 5700
|
3886 |
+
},
|
3887 |
+
{
|
3888 |
+
"epoch": 9.31,
|
3889 |
+
"learning_rate": 7.460035523978686e-05,
|
3890 |
+
"loss": 0.3814,
|
3891 |
+
"step": 5710
|
3892 |
+
},
|
3893 |
+
{
|
3894 |
+
"epoch": 9.33,
|
3895 |
+
"learning_rate": 7.282415630550622e-05,
|
3896 |
+
"loss": 0.3894,
|
3897 |
+
"step": 5720
|
3898 |
+
},
|
3899 |
+
{
|
3900 |
+
"epoch": 9.35,
|
3901 |
+
"learning_rate": 7.104795737122558e-05,
|
3902 |
+
"loss": 0.3985,
|
3903 |
+
"step": 5730
|
3904 |
+
},
|
3905 |
+
{
|
3906 |
+
"epoch": 9.36,
|
3907 |
+
"learning_rate": 6.927175843694494e-05,
|
3908 |
+
"loss": 0.3593,
|
3909 |
+
"step": 5740
|
3910 |
+
},
|
3911 |
+
{
|
3912 |
+
"epoch": 9.38,
|
3913 |
+
"learning_rate": 6.74955595026643e-05,
|
3914 |
+
"loss": 0.3783,
|
3915 |
+
"step": 5750
|
3916 |
+
},
|
3917 |
+
{
|
3918 |
+
"epoch": 9.4,
|
3919 |
+
"learning_rate": 6.571936056838366e-05,
|
3920 |
+
"loss": 0.387,
|
3921 |
+
"step": 5760
|
3922 |
+
},
|
3923 |
+
{
|
3924 |
+
"epoch": 9.41,
|
3925 |
+
"learning_rate": 6.394316163410301e-05,
|
3926 |
+
"loss": 0.3839,
|
3927 |
+
"step": 5770
|
3928 |
+
},
|
3929 |
+
{
|
3930 |
+
"epoch": 9.43,
|
3931 |
+
"learning_rate": 6.216696269982238e-05,
|
3932 |
+
"loss": 0.3701,
|
3933 |
+
"step": 5780
|
3934 |
+
},
|
3935 |
+
{
|
3936 |
+
"epoch": 9.45,
|
3937 |
+
"learning_rate": 6.039076376554174e-05,
|
3938 |
+
"loss": 0.3752,
|
3939 |
+
"step": 5790
|
3940 |
+
},
|
3941 |
+
{
|
3942 |
+
"epoch": 9.46,
|
3943 |
+
"learning_rate": 5.861456483126111e-05,
|
3944 |
+
"loss": 0.3907,
|
3945 |
+
"step": 5800
|
3946 |
+
},
|
3947 |
+
{
|
3948 |
+
"epoch": 9.46,
|
3949 |
+
"eval_loss": 0.41585057973861694,
|
3950 |
+
"eval_runtime": 13.094,
|
3951 |
+
"eval_samples_per_second": 89.43,
|
3952 |
+
"eval_steps_per_second": 11.227,
|
3953 |
+
"step": 5800
|
3954 |
+
},
|
3955 |
+
{
|
3956 |
+
"epoch": 9.48,
|
3957 |
+
"learning_rate": 5.6838365896980466e-05,
|
3958 |
+
"loss": 0.3675,
|
3959 |
+
"step": 5810
|
3960 |
+
},
|
3961 |
+
{
|
3962 |
+
"epoch": 9.49,
|
3963 |
+
"learning_rate": 5.5062166962699826e-05,
|
3964 |
+
"loss": 0.3804,
|
3965 |
+
"step": 5820
|
3966 |
+
},
|
3967 |
+
{
|
3968 |
+
"epoch": 9.51,
|
3969 |
+
"learning_rate": 5.328596802841918e-05,
|
3970 |
+
"loss": 0.3968,
|
3971 |
+
"step": 5830
|
3972 |
+
},
|
3973 |
+
{
|
3974 |
+
"epoch": 9.53,
|
3975 |
+
"learning_rate": 5.1509769094138545e-05,
|
3976 |
+
"loss": 0.3831,
|
3977 |
+
"step": 5840
|
3978 |
+
},
|
3979 |
+
{
|
3980 |
+
"epoch": 9.54,
|
3981 |
+
"learning_rate": 4.9733570159857905e-05,
|
3982 |
+
"loss": 0.3973,
|
3983 |
+
"step": 5850
|
3984 |
+
},
|
3985 |
+
{
|
3986 |
+
"epoch": 9.56,
|
3987 |
+
"learning_rate": 4.7957371225577264e-05,
|
3988 |
+
"loss": 0.3962,
|
3989 |
+
"step": 5860
|
3990 |
+
},
|
3991 |
+
{
|
3992 |
+
"epoch": 9.58,
|
3993 |
+
"learning_rate": 4.6181172291296624e-05,
|
3994 |
+
"loss": 0.3819,
|
3995 |
+
"step": 5870
|
3996 |
+
},
|
3997 |
+
{
|
3998 |
+
"epoch": 9.59,
|
3999 |
+
"learning_rate": 4.440497335701599e-05,
|
4000 |
+
"loss": 0.4078,
|
4001 |
+
"step": 5880
|
4002 |
+
},
|
4003 |
+
{
|
4004 |
+
"epoch": 9.61,
|
4005 |
+
"learning_rate": 4.262877442273535e-05,
|
4006 |
+
"loss": 0.4032,
|
4007 |
+
"step": 5890
|
4008 |
+
},
|
4009 |
+
{
|
4010 |
+
"epoch": 9.62,
|
4011 |
+
"learning_rate": 4.085257548845471e-05,
|
4012 |
+
"loss": 0.4149,
|
4013 |
+
"step": 5900
|
4014 |
+
},
|
4015 |
+
{
|
4016 |
+
"epoch": 9.62,
|
4017 |
+
"eval_loss": 0.4138317406177521,
|
4018 |
+
"eval_runtime": 13.0042,
|
4019 |
+
"eval_samples_per_second": 90.048,
|
4020 |
+
"eval_steps_per_second": 11.304,
|
4021 |
+
"step": 5900
|
4022 |
+
},
|
4023 |
+
{
|
4024 |
+
"epoch": 9.64,
|
4025 |
+
"learning_rate": 3.907637655417407e-05,
|
4026 |
+
"loss": 0.385,
|
4027 |
+
"step": 5910
|
4028 |
+
},
|
4029 |
+
{
|
4030 |
+
"epoch": 9.66,
|
4031 |
+
"learning_rate": 3.730017761989343e-05,
|
4032 |
+
"loss": 0.3657,
|
4033 |
+
"step": 5920
|
4034 |
+
},
|
4035 |
+
{
|
4036 |
+
"epoch": 9.67,
|
4037 |
+
"learning_rate": 3.552397868561279e-05,
|
4038 |
+
"loss": 0.3999,
|
4039 |
+
"step": 5930
|
4040 |
+
},
|
4041 |
+
{
|
4042 |
+
"epoch": 9.69,
|
4043 |
+
"learning_rate": 3.374777975133215e-05,
|
4044 |
+
"loss": 0.3896,
|
4045 |
+
"step": 5940
|
4046 |
+
},
|
4047 |
+
{
|
4048 |
+
"epoch": 9.71,
|
4049 |
+
"learning_rate": 3.197158081705151e-05,
|
4050 |
+
"loss": 0.4021,
|
4051 |
+
"step": 5950
|
4052 |
+
},
|
4053 |
+
{
|
4054 |
+
"epoch": 9.72,
|
4055 |
+
"learning_rate": 3.019538188277087e-05,
|
4056 |
+
"loss": 0.3781,
|
4057 |
+
"step": 5960
|
4058 |
+
},
|
4059 |
+
{
|
4060 |
+
"epoch": 9.74,
|
4061 |
+
"learning_rate": 2.8419182948490233e-05,
|
4062 |
+
"loss": 0.4082,
|
4063 |
+
"step": 5970
|
4064 |
+
},
|
4065 |
+
{
|
4066 |
+
"epoch": 9.76,
|
4067 |
+
"learning_rate": 2.664298401420959e-05,
|
4068 |
+
"loss": 0.3819,
|
4069 |
+
"step": 5980
|
4070 |
+
},
|
4071 |
+
{
|
4072 |
+
"epoch": 9.77,
|
4073 |
+
"learning_rate": 2.4866785079928952e-05,
|
4074 |
+
"loss": 0.3874,
|
4075 |
+
"step": 5990
|
4076 |
+
},
|
4077 |
+
{
|
4078 |
+
"epoch": 9.79,
|
4079 |
+
"learning_rate": 2.3090586145648312e-05,
|
4080 |
+
"loss": 0.3801,
|
4081 |
+
"step": 6000
|
4082 |
+
},
|
4083 |
+
{
|
4084 |
+
"epoch": 9.79,
|
4085 |
+
"eval_loss": 0.4120555818080902,
|
4086 |
+
"eval_runtime": 13.0061,
|
4087 |
+
"eval_samples_per_second": 90.035,
|
4088 |
+
"eval_steps_per_second": 11.302,
|
4089 |
+
"step": 6000
|
4090 |
}
|
4091 |
],
|
4092 |
"logging_steps": 10,
|
4093 |
"max_steps": 6130,
|
4094 |
"num_train_epochs": 10,
|
4095 |
"save_steps": 500,
|
4096 |
+
"total_flos": 1.1329492947664896e+16,
|
4097 |
"trial_name": null,
|
4098 |
"trial_params": null
|
4099 |
}
|