somaia02 commited on
Commit
24602bc
·
1 Parent(s): 0e37dec

Training in progress, step 6000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afdc7bd9d3bef6c5e894b53ed72a94b99726290b734e0bf561bc5081e613a3a5
3
  size 5323528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d5523b247f7b13c0b8050366732eda9261579a506398d6de39341316a1ec0b1
3
  size 5323528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b8dc6e6ab63f8f2ab0a2ef7abcaaafacf3195f20706e2cf750f59052bce8207
3
  size 10707706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:273c5173037f928b2e179fc992186b177c9f9ca1bb1712cdb61c2c4f9bd56859
3
  size 10707706
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab1e40e99fab0633bd87e6ef55b536c62fe829ab788d235ef7008de5c52a1ab6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:646e83085f2d11ddc437ce48f3f5db2b43745e68a45e0af2d7fa68feadebf520
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30332e8503dd44f865572ef943e9da8b0c9c1c0a4084d30212d8b7e0b9a4d2d8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a4dd1d74816502c8ecbc715add6bae4e99a2b4e50b653b0b20cfecda567b3eb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.417066365480423,
3
- "best_model_checkpoint": "bart_lora_outputs\\checkpoint-5500",
4
- "epoch": 8.97226753670473,
5
  "eval_steps": 100,
6
- "global_step": 5500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3747,13 +3747,353 @@
3747
  "eval_samples_per_second": 91.742,
3748
  "eval_steps_per_second": 11.517,
3749
  "step": 5500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3750
  }
3751
  ],
3752
  "logging_steps": 10,
3753
  "max_steps": 6130,
3754
  "num_train_epochs": 10,
3755
  "save_steps": 500,
3756
- "total_flos": 1.039214421393408e+16,
3757
  "trial_name": null,
3758
  "trial_params": null
3759
  }
 
1
  {
2
+ "best_metric": 0.4120555818080902,
3
+ "best_model_checkpoint": "bart_lora_outputs\\checkpoint-6000",
4
+ "epoch": 9.787928221859707,
5
  "eval_steps": 100,
6
+ "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3747
  "eval_samples_per_second": 91.742,
3748
  "eval_steps_per_second": 11.517,
3749
  "step": 5500
3750
+ },
3751
+ {
3752
+ "epoch": 8.99,
3753
+ "learning_rate": 0.00011012433392539965,
3754
+ "loss": 0.4155,
3755
+ "step": 5510
3756
+ },
3757
+ {
3758
+ "epoch": 9.0,
3759
+ "learning_rate": 0.000108348134991119,
3760
+ "loss": 0.3614,
3761
+ "step": 5520
3762
+ },
3763
+ {
3764
+ "epoch": 9.02,
3765
+ "learning_rate": 0.00010657193605683836,
3766
+ "loss": 0.3688,
3767
+ "step": 5530
3768
+ },
3769
+ {
3770
+ "epoch": 9.04,
3771
+ "learning_rate": 0.00010479573712255772,
3772
+ "loss": 0.3798,
3773
+ "step": 5540
3774
+ },
3775
+ {
3776
+ "epoch": 9.05,
3777
+ "learning_rate": 0.00010301953818827709,
3778
+ "loss": 0.3968,
3779
+ "step": 5550
3780
+ },
3781
+ {
3782
+ "epoch": 9.07,
3783
+ "learning_rate": 0.00010124333925399646,
3784
+ "loss": 0.3894,
3785
+ "step": 5560
3786
+ },
3787
+ {
3788
+ "epoch": 9.09,
3789
+ "learning_rate": 9.946714031971581e-05,
3790
+ "loss": 0.3858,
3791
+ "step": 5570
3792
+ },
3793
+ {
3794
+ "epoch": 9.1,
3795
+ "learning_rate": 9.769094138543518e-05,
3796
+ "loss": 0.3638,
3797
+ "step": 5580
3798
+ },
3799
+ {
3800
+ "epoch": 9.12,
3801
+ "learning_rate": 9.591474245115453e-05,
3802
+ "loss": 0.3674,
3803
+ "step": 5590
3804
+ },
3805
+ {
3806
+ "epoch": 9.14,
3807
+ "learning_rate": 9.41385435168739e-05,
3808
+ "loss": 0.3568,
3809
+ "step": 5600
3810
+ },
3811
+ {
3812
+ "epoch": 9.14,
3813
+ "eval_loss": 0.41371241211891174,
3814
+ "eval_runtime": 13.0451,
3815
+ "eval_samples_per_second": 89.766,
3816
+ "eval_steps_per_second": 11.269,
3817
+ "step": 5600
3818
+ },
3819
+ {
3820
+ "epoch": 9.15,
3821
+ "learning_rate": 9.236234458259325e-05,
3822
+ "loss": 0.3748,
3823
+ "step": 5610
3824
+ },
3825
+ {
3826
+ "epoch": 9.17,
3827
+ "learning_rate": 9.05861456483126e-05,
3828
+ "loss": 0.3901,
3829
+ "step": 5620
3830
+ },
3831
+ {
3832
+ "epoch": 9.18,
3833
+ "learning_rate": 8.880994671403198e-05,
3834
+ "loss": 0.3737,
3835
+ "step": 5630
3836
+ },
3837
+ {
3838
+ "epoch": 9.2,
3839
+ "learning_rate": 8.703374777975133e-05,
3840
+ "loss": 0.3973,
3841
+ "step": 5640
3842
+ },
3843
+ {
3844
+ "epoch": 9.22,
3845
+ "learning_rate": 8.52575488454707e-05,
3846
+ "loss": 0.3985,
3847
+ "step": 5650
3848
+ },
3849
+ {
3850
+ "epoch": 9.23,
3851
+ "learning_rate": 8.348134991119005e-05,
3852
+ "loss": 0.37,
3853
+ "step": 5660
3854
+ },
3855
+ {
3856
+ "epoch": 9.25,
3857
+ "learning_rate": 8.170515097690942e-05,
3858
+ "loss": 0.4537,
3859
+ "step": 5670
3860
+ },
3861
+ {
3862
+ "epoch": 9.27,
3863
+ "learning_rate": 7.992895204262877e-05,
3864
+ "loss": 0.3747,
3865
+ "step": 5680
3866
+ },
3867
+ {
3868
+ "epoch": 9.28,
3869
+ "learning_rate": 7.815275310834814e-05,
3870
+ "loss": 0.4095,
3871
+ "step": 5690
3872
+ },
3873
+ {
3874
+ "epoch": 9.3,
3875
+ "learning_rate": 7.63765541740675e-05,
3876
+ "loss": 0.3971,
3877
+ "step": 5700
3878
+ },
3879
+ {
3880
+ "epoch": 9.3,
3881
+ "eval_loss": 0.4128452241420746,
3882
+ "eval_runtime": 13.0551,
3883
+ "eval_samples_per_second": 89.697,
3884
+ "eval_steps_per_second": 11.26,
3885
+ "step": 5700
3886
+ },
3887
+ {
3888
+ "epoch": 9.31,
3889
+ "learning_rate": 7.460035523978686e-05,
3890
+ "loss": 0.3814,
3891
+ "step": 5710
3892
+ },
3893
+ {
3894
+ "epoch": 9.33,
3895
+ "learning_rate": 7.282415630550622e-05,
3896
+ "loss": 0.3894,
3897
+ "step": 5720
3898
+ },
3899
+ {
3900
+ "epoch": 9.35,
3901
+ "learning_rate": 7.104795737122558e-05,
3902
+ "loss": 0.3985,
3903
+ "step": 5730
3904
+ },
3905
+ {
3906
+ "epoch": 9.36,
3907
+ "learning_rate": 6.927175843694494e-05,
3908
+ "loss": 0.3593,
3909
+ "step": 5740
3910
+ },
3911
+ {
3912
+ "epoch": 9.38,
3913
+ "learning_rate": 6.74955595026643e-05,
3914
+ "loss": 0.3783,
3915
+ "step": 5750
3916
+ },
3917
+ {
3918
+ "epoch": 9.4,
3919
+ "learning_rate": 6.571936056838366e-05,
3920
+ "loss": 0.387,
3921
+ "step": 5760
3922
+ },
3923
+ {
3924
+ "epoch": 9.41,
3925
+ "learning_rate": 6.394316163410301e-05,
3926
+ "loss": 0.3839,
3927
+ "step": 5770
3928
+ },
3929
+ {
3930
+ "epoch": 9.43,
3931
+ "learning_rate": 6.216696269982238e-05,
3932
+ "loss": 0.3701,
3933
+ "step": 5780
3934
+ },
3935
+ {
3936
+ "epoch": 9.45,
3937
+ "learning_rate": 6.039076376554174e-05,
3938
+ "loss": 0.3752,
3939
+ "step": 5790
3940
+ },
3941
+ {
3942
+ "epoch": 9.46,
3943
+ "learning_rate": 5.861456483126111e-05,
3944
+ "loss": 0.3907,
3945
+ "step": 5800
3946
+ },
3947
+ {
3948
+ "epoch": 9.46,
3949
+ "eval_loss": 0.41585057973861694,
3950
+ "eval_runtime": 13.094,
3951
+ "eval_samples_per_second": 89.43,
3952
+ "eval_steps_per_second": 11.227,
3953
+ "step": 5800
3954
+ },
3955
+ {
3956
+ "epoch": 9.48,
3957
+ "learning_rate": 5.6838365896980466e-05,
3958
+ "loss": 0.3675,
3959
+ "step": 5810
3960
+ },
3961
+ {
3962
+ "epoch": 9.49,
3963
+ "learning_rate": 5.5062166962699826e-05,
3964
+ "loss": 0.3804,
3965
+ "step": 5820
3966
+ },
3967
+ {
3968
+ "epoch": 9.51,
3969
+ "learning_rate": 5.328596802841918e-05,
3970
+ "loss": 0.3968,
3971
+ "step": 5830
3972
+ },
3973
+ {
3974
+ "epoch": 9.53,
3975
+ "learning_rate": 5.1509769094138545e-05,
3976
+ "loss": 0.3831,
3977
+ "step": 5840
3978
+ },
3979
+ {
3980
+ "epoch": 9.54,
3981
+ "learning_rate": 4.9733570159857905e-05,
3982
+ "loss": 0.3973,
3983
+ "step": 5850
3984
+ },
3985
+ {
3986
+ "epoch": 9.56,
3987
+ "learning_rate": 4.7957371225577264e-05,
3988
+ "loss": 0.3962,
3989
+ "step": 5860
3990
+ },
3991
+ {
3992
+ "epoch": 9.58,
3993
+ "learning_rate": 4.6181172291296624e-05,
3994
+ "loss": 0.3819,
3995
+ "step": 5870
3996
+ },
3997
+ {
3998
+ "epoch": 9.59,
3999
+ "learning_rate": 4.440497335701599e-05,
4000
+ "loss": 0.4078,
4001
+ "step": 5880
4002
+ },
4003
+ {
4004
+ "epoch": 9.61,
4005
+ "learning_rate": 4.262877442273535e-05,
4006
+ "loss": 0.4032,
4007
+ "step": 5890
4008
+ },
4009
+ {
4010
+ "epoch": 9.62,
4011
+ "learning_rate": 4.085257548845471e-05,
4012
+ "loss": 0.4149,
4013
+ "step": 5900
4014
+ },
4015
+ {
4016
+ "epoch": 9.62,
4017
+ "eval_loss": 0.4138317406177521,
4018
+ "eval_runtime": 13.0042,
4019
+ "eval_samples_per_second": 90.048,
4020
+ "eval_steps_per_second": 11.304,
4021
+ "step": 5900
4022
+ },
4023
+ {
4024
+ "epoch": 9.64,
4025
+ "learning_rate": 3.907637655417407e-05,
4026
+ "loss": 0.385,
4027
+ "step": 5910
4028
+ },
4029
+ {
4030
+ "epoch": 9.66,
4031
+ "learning_rate": 3.730017761989343e-05,
4032
+ "loss": 0.3657,
4033
+ "step": 5920
4034
+ },
4035
+ {
4036
+ "epoch": 9.67,
4037
+ "learning_rate": 3.552397868561279e-05,
4038
+ "loss": 0.3999,
4039
+ "step": 5930
4040
+ },
4041
+ {
4042
+ "epoch": 9.69,
4043
+ "learning_rate": 3.374777975133215e-05,
4044
+ "loss": 0.3896,
4045
+ "step": 5940
4046
+ },
4047
+ {
4048
+ "epoch": 9.71,
4049
+ "learning_rate": 3.197158081705151e-05,
4050
+ "loss": 0.4021,
4051
+ "step": 5950
4052
+ },
4053
+ {
4054
+ "epoch": 9.72,
4055
+ "learning_rate": 3.019538188277087e-05,
4056
+ "loss": 0.3781,
4057
+ "step": 5960
4058
+ },
4059
+ {
4060
+ "epoch": 9.74,
4061
+ "learning_rate": 2.8419182948490233e-05,
4062
+ "loss": 0.4082,
4063
+ "step": 5970
4064
+ },
4065
+ {
4066
+ "epoch": 9.76,
4067
+ "learning_rate": 2.664298401420959e-05,
4068
+ "loss": 0.3819,
4069
+ "step": 5980
4070
+ },
4071
+ {
4072
+ "epoch": 9.77,
4073
+ "learning_rate": 2.4866785079928952e-05,
4074
+ "loss": 0.3874,
4075
+ "step": 5990
4076
+ },
4077
+ {
4078
+ "epoch": 9.79,
4079
+ "learning_rate": 2.3090586145648312e-05,
4080
+ "loss": 0.3801,
4081
+ "step": 6000
4082
+ },
4083
+ {
4084
+ "epoch": 9.79,
4085
+ "eval_loss": 0.4120555818080902,
4086
+ "eval_runtime": 13.0061,
4087
+ "eval_samples_per_second": 90.035,
4088
+ "eval_steps_per_second": 11.302,
4089
+ "step": 6000
4090
  }
4091
  ],
4092
  "logging_steps": 10,
4093
  "max_steps": 6130,
4094
  "num_train_epochs": 10,
4095
  "save_steps": 500,
4096
+ "total_flos": 1.1329492947664896e+16,
4097
  "trial_name": null,
4098
  "trial_params": null
4099
  }