Training in progress, step 6000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2669168
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd42a6a3b012049dbfcf0d4ad62b630ef8dca1676e140ca7ca180a24e13c910d
|
3 |
size 2669168
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5399290
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:628d3ad5e22c61b9e139f66dac9c70cf34279bc9e3690968e4e15d122f68c3fb
|
3 |
size 5399290
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:acaefe44adfe5a8938c73ae689d02d9d4a52614411ab8ab1eb80188c16c1a919
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a4dd1d74816502c8ecbc715add6bae4e99a2b4e50b653b0b20cfecda567b3eb
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "bart_lora_outputs\\checkpoint-
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3747,13 +3747,353 @@
|
|
3747 |
"eval_samples_per_second": 205.042,
|
3748 |
"eval_steps_per_second": 12.957,
|
3749 |
"step": 5500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3750 |
}
|
3751 |
],
|
3752 |
"logging_steps": 10,
|
3753 |
"max_steps": 6130,
|
3754 |
"num_train_epochs": 10,
|
3755 |
"save_steps": 500,
|
3756 |
-
"total_flos": 1.
|
3757 |
"trial_name": null,
|
3758 |
"trial_params": null
|
3759 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.4207456707954407,
|
3 |
+
"best_model_checkpoint": "bart_lora_outputs\\checkpoint-6000",
|
4 |
+
"epoch": 9.787928221859707,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 6000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3747 |
"eval_samples_per_second": 205.042,
|
3748 |
"eval_steps_per_second": 12.957,
|
3749 |
"step": 5500
|
3750 |
+
},
|
3751 |
+
{
|
3752 |
+
"epoch": 8.99,
|
3753 |
+
"learning_rate": 0.00011012433392539965,
|
3754 |
+
"loss": 0.4506,
|
3755 |
+
"step": 5510
|
3756 |
+
},
|
3757 |
+
{
|
3758 |
+
"epoch": 9.0,
|
3759 |
+
"learning_rate": 0.000108348134991119,
|
3760 |
+
"loss": 0.3832,
|
3761 |
+
"step": 5520
|
3762 |
+
},
|
3763 |
+
{
|
3764 |
+
"epoch": 9.02,
|
3765 |
+
"learning_rate": 0.00010657193605683836,
|
3766 |
+
"loss": 0.4018,
|
3767 |
+
"step": 5530
|
3768 |
+
},
|
3769 |
+
{
|
3770 |
+
"epoch": 9.04,
|
3771 |
+
"learning_rate": 0.00010479573712255772,
|
3772 |
+
"loss": 0.413,
|
3773 |
+
"step": 5540
|
3774 |
+
},
|
3775 |
+
{
|
3776 |
+
"epoch": 9.05,
|
3777 |
+
"learning_rate": 0.00010301953818827709,
|
3778 |
+
"loss": 0.4317,
|
3779 |
+
"step": 5550
|
3780 |
+
},
|
3781 |
+
{
|
3782 |
+
"epoch": 9.07,
|
3783 |
+
"learning_rate": 0.00010124333925399646,
|
3784 |
+
"loss": 0.4117,
|
3785 |
+
"step": 5560
|
3786 |
+
},
|
3787 |
+
{
|
3788 |
+
"epoch": 9.09,
|
3789 |
+
"learning_rate": 9.946714031971581e-05,
|
3790 |
+
"loss": 0.4068,
|
3791 |
+
"step": 5570
|
3792 |
+
},
|
3793 |
+
{
|
3794 |
+
"epoch": 9.1,
|
3795 |
+
"learning_rate": 9.769094138543518e-05,
|
3796 |
+
"loss": 0.3956,
|
3797 |
+
"step": 5580
|
3798 |
+
},
|
3799 |
+
{
|
3800 |
+
"epoch": 9.12,
|
3801 |
+
"learning_rate": 9.591474245115453e-05,
|
3802 |
+
"loss": 0.3948,
|
3803 |
+
"step": 5590
|
3804 |
+
},
|
3805 |
+
{
|
3806 |
+
"epoch": 9.14,
|
3807 |
+
"learning_rate": 9.41385435168739e-05,
|
3808 |
+
"loss": 0.3842,
|
3809 |
+
"step": 5600
|
3810 |
+
},
|
3811 |
+
{
|
3812 |
+
"epoch": 9.14,
|
3813 |
+
"eval_loss": 0.42232006788253784,
|
3814 |
+
"eval_runtime": 5.687,
|
3815 |
+
"eval_samples_per_second": 205.907,
|
3816 |
+
"eval_steps_per_second": 13.012,
|
3817 |
+
"step": 5600
|
3818 |
+
},
|
3819 |
+
{
|
3820 |
+
"epoch": 9.15,
|
3821 |
+
"learning_rate": 9.236234458259325e-05,
|
3822 |
+
"loss": 0.4099,
|
3823 |
+
"step": 5610
|
3824 |
+
},
|
3825 |
+
{
|
3826 |
+
"epoch": 9.17,
|
3827 |
+
"learning_rate": 9.05861456483126e-05,
|
3828 |
+
"loss": 0.413,
|
3829 |
+
"step": 5620
|
3830 |
+
},
|
3831 |
+
{
|
3832 |
+
"epoch": 9.18,
|
3833 |
+
"learning_rate": 8.880994671403198e-05,
|
3834 |
+
"loss": 0.4015,
|
3835 |
+
"step": 5630
|
3836 |
+
},
|
3837 |
+
{
|
3838 |
+
"epoch": 9.2,
|
3839 |
+
"learning_rate": 8.703374777975133e-05,
|
3840 |
+
"loss": 0.4304,
|
3841 |
+
"step": 5640
|
3842 |
+
},
|
3843 |
+
{
|
3844 |
+
"epoch": 9.22,
|
3845 |
+
"learning_rate": 8.52575488454707e-05,
|
3846 |
+
"loss": 0.4239,
|
3847 |
+
"step": 5650
|
3848 |
+
},
|
3849 |
+
{
|
3850 |
+
"epoch": 9.23,
|
3851 |
+
"learning_rate": 8.348134991119005e-05,
|
3852 |
+
"loss": 0.3997,
|
3853 |
+
"step": 5660
|
3854 |
+
},
|
3855 |
+
{
|
3856 |
+
"epoch": 9.25,
|
3857 |
+
"learning_rate": 8.170515097690942e-05,
|
3858 |
+
"loss": 0.4935,
|
3859 |
+
"step": 5670
|
3860 |
+
},
|
3861 |
+
{
|
3862 |
+
"epoch": 9.27,
|
3863 |
+
"learning_rate": 7.992895204262877e-05,
|
3864 |
+
"loss": 0.4035,
|
3865 |
+
"step": 5680
|
3866 |
+
},
|
3867 |
+
{
|
3868 |
+
"epoch": 9.28,
|
3869 |
+
"learning_rate": 7.815275310834814e-05,
|
3870 |
+
"loss": 0.435,
|
3871 |
+
"step": 5690
|
3872 |
+
},
|
3873 |
+
{
|
3874 |
+
"epoch": 9.3,
|
3875 |
+
"learning_rate": 7.63765541740675e-05,
|
3876 |
+
"loss": 0.4267,
|
3877 |
+
"step": 5700
|
3878 |
+
},
|
3879 |
+
{
|
3880 |
+
"epoch": 9.3,
|
3881 |
+
"eval_loss": 0.4202769696712494,
|
3882 |
+
"eval_runtime": 5.77,
|
3883 |
+
"eval_samples_per_second": 202.946,
|
3884 |
+
"eval_steps_per_second": 12.825,
|
3885 |
+
"step": 5700
|
3886 |
+
},
|
3887 |
+
{
|
3888 |
+
"epoch": 9.31,
|
3889 |
+
"learning_rate": 7.460035523978686e-05,
|
3890 |
+
"loss": 0.4154,
|
3891 |
+
"step": 5710
|
3892 |
+
},
|
3893 |
+
{
|
3894 |
+
"epoch": 9.33,
|
3895 |
+
"learning_rate": 7.282415630550622e-05,
|
3896 |
+
"loss": 0.4107,
|
3897 |
+
"step": 5720
|
3898 |
+
},
|
3899 |
+
{
|
3900 |
+
"epoch": 9.35,
|
3901 |
+
"learning_rate": 7.104795737122558e-05,
|
3902 |
+
"loss": 0.424,
|
3903 |
+
"step": 5730
|
3904 |
+
},
|
3905 |
+
{
|
3906 |
+
"epoch": 9.36,
|
3907 |
+
"learning_rate": 6.927175843694494e-05,
|
3908 |
+
"loss": 0.3896,
|
3909 |
+
"step": 5740
|
3910 |
+
},
|
3911 |
+
{
|
3912 |
+
"epoch": 9.38,
|
3913 |
+
"learning_rate": 6.74955595026643e-05,
|
3914 |
+
"loss": 0.4094,
|
3915 |
+
"step": 5750
|
3916 |
+
},
|
3917 |
+
{
|
3918 |
+
"epoch": 9.4,
|
3919 |
+
"learning_rate": 6.571936056838366e-05,
|
3920 |
+
"loss": 0.4134,
|
3921 |
+
"step": 5760
|
3922 |
+
},
|
3923 |
+
{
|
3924 |
+
"epoch": 9.41,
|
3925 |
+
"learning_rate": 6.394316163410301e-05,
|
3926 |
+
"loss": 0.4232,
|
3927 |
+
"step": 5770
|
3928 |
+
},
|
3929 |
+
{
|
3930 |
+
"epoch": 9.43,
|
3931 |
+
"learning_rate": 6.216696269982238e-05,
|
3932 |
+
"loss": 0.4005,
|
3933 |
+
"step": 5780
|
3934 |
+
},
|
3935 |
+
{
|
3936 |
+
"epoch": 9.45,
|
3937 |
+
"learning_rate": 6.039076376554174e-05,
|
3938 |
+
"loss": 0.4138,
|
3939 |
+
"step": 5790
|
3940 |
+
},
|
3941 |
+
{
|
3942 |
+
"epoch": 9.46,
|
3943 |
+
"learning_rate": 5.861456483126111e-05,
|
3944 |
+
"loss": 0.423,
|
3945 |
+
"step": 5800
|
3946 |
+
},
|
3947 |
+
{
|
3948 |
+
"epoch": 9.46,
|
3949 |
+
"eval_loss": 0.4260061979293823,
|
3950 |
+
"eval_runtime": 6.015,
|
3951 |
+
"eval_samples_per_second": 194.679,
|
3952 |
+
"eval_steps_per_second": 12.303,
|
3953 |
+
"step": 5800
|
3954 |
+
},
|
3955 |
+
{
|
3956 |
+
"epoch": 9.48,
|
3957 |
+
"learning_rate": 5.6838365896980466e-05,
|
3958 |
+
"loss": 0.3959,
|
3959 |
+
"step": 5810
|
3960 |
+
},
|
3961 |
+
{
|
3962 |
+
"epoch": 9.49,
|
3963 |
+
"learning_rate": 5.5062166962699826e-05,
|
3964 |
+
"loss": 0.4072,
|
3965 |
+
"step": 5820
|
3966 |
+
},
|
3967 |
+
{
|
3968 |
+
"epoch": 9.51,
|
3969 |
+
"learning_rate": 5.328596802841918e-05,
|
3970 |
+
"loss": 0.429,
|
3971 |
+
"step": 5830
|
3972 |
+
},
|
3973 |
+
{
|
3974 |
+
"epoch": 9.53,
|
3975 |
+
"learning_rate": 5.1509769094138545e-05,
|
3976 |
+
"loss": 0.4161,
|
3977 |
+
"step": 5840
|
3978 |
+
},
|
3979 |
+
{
|
3980 |
+
"epoch": 9.54,
|
3981 |
+
"learning_rate": 4.9733570159857905e-05,
|
3982 |
+
"loss": 0.4319,
|
3983 |
+
"step": 5850
|
3984 |
+
},
|
3985 |
+
{
|
3986 |
+
"epoch": 9.56,
|
3987 |
+
"learning_rate": 4.7957371225577264e-05,
|
3988 |
+
"loss": 0.4195,
|
3989 |
+
"step": 5860
|
3990 |
+
},
|
3991 |
+
{
|
3992 |
+
"epoch": 9.58,
|
3993 |
+
"learning_rate": 4.6181172291296624e-05,
|
3994 |
+
"loss": 0.409,
|
3995 |
+
"step": 5870
|
3996 |
+
},
|
3997 |
+
{
|
3998 |
+
"epoch": 9.59,
|
3999 |
+
"learning_rate": 4.440497335701599e-05,
|
4000 |
+
"loss": 0.4327,
|
4001 |
+
"step": 5880
|
4002 |
+
},
|
4003 |
+
{
|
4004 |
+
"epoch": 9.61,
|
4005 |
+
"learning_rate": 4.262877442273535e-05,
|
4006 |
+
"loss": 0.4337,
|
4007 |
+
"step": 5890
|
4008 |
+
},
|
4009 |
+
{
|
4010 |
+
"epoch": 9.62,
|
4011 |
+
"learning_rate": 4.085257548845471e-05,
|
4012 |
+
"loss": 0.4506,
|
4013 |
+
"step": 5900
|
4014 |
+
},
|
4015 |
+
{
|
4016 |
+
"epoch": 9.62,
|
4017 |
+
"eval_loss": 0.4249822795391083,
|
4018 |
+
"eval_runtime": 5.633,
|
4019 |
+
"eval_samples_per_second": 207.881,
|
4020 |
+
"eval_steps_per_second": 13.137,
|
4021 |
+
"step": 5900
|
4022 |
+
},
|
4023 |
+
{
|
4024 |
+
"epoch": 9.64,
|
4025 |
+
"learning_rate": 3.907637655417407e-05,
|
4026 |
+
"loss": 0.4067,
|
4027 |
+
"step": 5910
|
4028 |
+
},
|
4029 |
+
{
|
4030 |
+
"epoch": 9.66,
|
4031 |
+
"learning_rate": 3.730017761989343e-05,
|
4032 |
+
"loss": 0.3946,
|
4033 |
+
"step": 5920
|
4034 |
+
},
|
4035 |
+
{
|
4036 |
+
"epoch": 9.67,
|
4037 |
+
"learning_rate": 3.552397868561279e-05,
|
4038 |
+
"loss": 0.4301,
|
4039 |
+
"step": 5930
|
4040 |
+
},
|
4041 |
+
{
|
4042 |
+
"epoch": 9.69,
|
4043 |
+
"learning_rate": 3.374777975133215e-05,
|
4044 |
+
"loss": 0.4204,
|
4045 |
+
"step": 5940
|
4046 |
+
},
|
4047 |
+
{
|
4048 |
+
"epoch": 9.71,
|
4049 |
+
"learning_rate": 3.197158081705151e-05,
|
4050 |
+
"loss": 0.4298,
|
4051 |
+
"step": 5950
|
4052 |
+
},
|
4053 |
+
{
|
4054 |
+
"epoch": 9.72,
|
4055 |
+
"learning_rate": 3.019538188277087e-05,
|
4056 |
+
"loss": 0.4056,
|
4057 |
+
"step": 5960
|
4058 |
+
},
|
4059 |
+
{
|
4060 |
+
"epoch": 9.74,
|
4061 |
+
"learning_rate": 2.8419182948490233e-05,
|
4062 |
+
"loss": 0.4377,
|
4063 |
+
"step": 5970
|
4064 |
+
},
|
4065 |
+
{
|
4066 |
+
"epoch": 9.76,
|
4067 |
+
"learning_rate": 2.664298401420959e-05,
|
4068 |
+
"loss": 0.417,
|
4069 |
+
"step": 5980
|
4070 |
+
},
|
4071 |
+
{
|
4072 |
+
"epoch": 9.77,
|
4073 |
+
"learning_rate": 2.4866785079928952e-05,
|
4074 |
+
"loss": 0.4153,
|
4075 |
+
"step": 5990
|
4076 |
+
},
|
4077 |
+
{
|
4078 |
+
"epoch": 9.79,
|
4079 |
+
"learning_rate": 2.3090586145648312e-05,
|
4080 |
+
"loss": 0.4087,
|
4081 |
+
"step": 6000
|
4082 |
+
},
|
4083 |
+
{
|
4084 |
+
"epoch": 9.79,
|
4085 |
+
"eval_loss": 0.4207456707954407,
|
4086 |
+
"eval_runtime": 5.625,
|
4087 |
+
"eval_samples_per_second": 208.177,
|
4088 |
+
"eval_steps_per_second": 13.156,
|
4089 |
+
"step": 6000
|
4090 |
}
|
4091 |
],
|
4092 |
"logging_steps": 10,
|
4093 |
"max_steps": 6130,
|
4094 |
"num_train_epochs": 10,
|
4095 |
"save_steps": 500,
|
4096 |
+
"total_flos": 1.3060363769413632e+16,
|
4097 |
"trial_name": null,
|
4098 |
"trial_params": null
|
4099 |
}
|