somaia02 commited on
Commit
d058be0
·
1 Parent(s): f554747

Training in progress, step 6000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:179d085ca837c247e51e495c481698bce1fe87106ba872d158b33f05ebd68c28
3
  size 2669168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd42a6a3b012049dbfcf0d4ad62b630ef8dca1676e140ca7ca180a24e13c910d
3
  size 2669168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5865282ec31b70188127e177049048a4608470067efdf2c6677aee5dd113a3d
3
  size 5399290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:628d3ad5e22c61b9e139f66dac9c70cf34279bc9e3690968e4e15d122f68c3fb
3
  size 5399290
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a35ab43707c6a593050fe6f3d1b69b4d0787f56cec6787477847ec0479597cc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acaefe44adfe5a8938c73ae689d02d9d4a52614411ab8ab1eb80188c16c1a919
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30332e8503dd44f865572ef943e9da8b0c9c1c0a4084d30212d8b7e0b9a4d2d8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a4dd1d74816502c8ecbc715add6bae4e99a2b4e50b653b0b20cfecda567b3eb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.4268430769443512,
3
- "best_model_checkpoint": "bart_lora_outputs\\checkpoint-5500",
4
- "epoch": 8.97226753670473,
5
  "eval_steps": 100,
6
- "global_step": 5500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3747,13 +3747,353 @@
3747
  "eval_samples_per_second": 205.042,
3748
  "eval_steps_per_second": 12.957,
3749
  "step": 5500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3750
  }
3751
  ],
3752
  "logging_steps": 10,
3753
  "max_steps": 6130,
3754
  "num_train_epochs": 10,
3755
  "save_steps": 500,
3756
- "total_flos": 1.1987241223716864e+16,
3757
  "trial_name": null,
3758
  "trial_params": null
3759
  }
 
1
  {
2
+ "best_metric": 0.4207456707954407,
3
+ "best_model_checkpoint": "bart_lora_outputs\\checkpoint-6000",
4
+ "epoch": 9.787928221859707,
5
  "eval_steps": 100,
6
+ "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3747
  "eval_samples_per_second": 205.042,
3748
  "eval_steps_per_second": 12.957,
3749
  "step": 5500
3750
+ },
3751
+ {
3752
+ "epoch": 8.99,
3753
+ "learning_rate": 0.00011012433392539965,
3754
+ "loss": 0.4506,
3755
+ "step": 5510
3756
+ },
3757
+ {
3758
+ "epoch": 9.0,
3759
+ "learning_rate": 0.000108348134991119,
3760
+ "loss": 0.3832,
3761
+ "step": 5520
3762
+ },
3763
+ {
3764
+ "epoch": 9.02,
3765
+ "learning_rate": 0.00010657193605683836,
3766
+ "loss": 0.4018,
3767
+ "step": 5530
3768
+ },
3769
+ {
3770
+ "epoch": 9.04,
3771
+ "learning_rate": 0.00010479573712255772,
3772
+ "loss": 0.413,
3773
+ "step": 5540
3774
+ },
3775
+ {
3776
+ "epoch": 9.05,
3777
+ "learning_rate": 0.00010301953818827709,
3778
+ "loss": 0.4317,
3779
+ "step": 5550
3780
+ },
3781
+ {
3782
+ "epoch": 9.07,
3783
+ "learning_rate": 0.00010124333925399646,
3784
+ "loss": 0.4117,
3785
+ "step": 5560
3786
+ },
3787
+ {
3788
+ "epoch": 9.09,
3789
+ "learning_rate": 9.946714031971581e-05,
3790
+ "loss": 0.4068,
3791
+ "step": 5570
3792
+ },
3793
+ {
3794
+ "epoch": 9.1,
3795
+ "learning_rate": 9.769094138543518e-05,
3796
+ "loss": 0.3956,
3797
+ "step": 5580
3798
+ },
3799
+ {
3800
+ "epoch": 9.12,
3801
+ "learning_rate": 9.591474245115453e-05,
3802
+ "loss": 0.3948,
3803
+ "step": 5590
3804
+ },
3805
+ {
3806
+ "epoch": 9.14,
3807
+ "learning_rate": 9.41385435168739e-05,
3808
+ "loss": 0.3842,
3809
+ "step": 5600
3810
+ },
3811
+ {
3812
+ "epoch": 9.14,
3813
+ "eval_loss": 0.42232006788253784,
3814
+ "eval_runtime": 5.687,
3815
+ "eval_samples_per_second": 205.907,
3816
+ "eval_steps_per_second": 13.012,
3817
+ "step": 5600
3818
+ },
3819
+ {
3820
+ "epoch": 9.15,
3821
+ "learning_rate": 9.236234458259325e-05,
3822
+ "loss": 0.4099,
3823
+ "step": 5610
3824
+ },
3825
+ {
3826
+ "epoch": 9.17,
3827
+ "learning_rate": 9.05861456483126e-05,
3828
+ "loss": 0.413,
3829
+ "step": 5620
3830
+ },
3831
+ {
3832
+ "epoch": 9.18,
3833
+ "learning_rate": 8.880994671403198e-05,
3834
+ "loss": 0.4015,
3835
+ "step": 5630
3836
+ },
3837
+ {
3838
+ "epoch": 9.2,
3839
+ "learning_rate": 8.703374777975133e-05,
3840
+ "loss": 0.4304,
3841
+ "step": 5640
3842
+ },
3843
+ {
3844
+ "epoch": 9.22,
3845
+ "learning_rate": 8.52575488454707e-05,
3846
+ "loss": 0.4239,
3847
+ "step": 5650
3848
+ },
3849
+ {
3850
+ "epoch": 9.23,
3851
+ "learning_rate": 8.348134991119005e-05,
3852
+ "loss": 0.3997,
3853
+ "step": 5660
3854
+ },
3855
+ {
3856
+ "epoch": 9.25,
3857
+ "learning_rate": 8.170515097690942e-05,
3858
+ "loss": 0.4935,
3859
+ "step": 5670
3860
+ },
3861
+ {
3862
+ "epoch": 9.27,
3863
+ "learning_rate": 7.992895204262877e-05,
3864
+ "loss": 0.4035,
3865
+ "step": 5680
3866
+ },
3867
+ {
3868
+ "epoch": 9.28,
3869
+ "learning_rate": 7.815275310834814e-05,
3870
+ "loss": 0.435,
3871
+ "step": 5690
3872
+ },
3873
+ {
3874
+ "epoch": 9.3,
3875
+ "learning_rate": 7.63765541740675e-05,
3876
+ "loss": 0.4267,
3877
+ "step": 5700
3878
+ },
3879
+ {
3880
+ "epoch": 9.3,
3881
+ "eval_loss": 0.4202769696712494,
3882
+ "eval_runtime": 5.77,
3883
+ "eval_samples_per_second": 202.946,
3884
+ "eval_steps_per_second": 12.825,
3885
+ "step": 5700
3886
+ },
3887
+ {
3888
+ "epoch": 9.31,
3889
+ "learning_rate": 7.460035523978686e-05,
3890
+ "loss": 0.4154,
3891
+ "step": 5710
3892
+ },
3893
+ {
3894
+ "epoch": 9.33,
3895
+ "learning_rate": 7.282415630550622e-05,
3896
+ "loss": 0.4107,
3897
+ "step": 5720
3898
+ },
3899
+ {
3900
+ "epoch": 9.35,
3901
+ "learning_rate": 7.104795737122558e-05,
3902
+ "loss": 0.424,
3903
+ "step": 5730
3904
+ },
3905
+ {
3906
+ "epoch": 9.36,
3907
+ "learning_rate": 6.927175843694494e-05,
3908
+ "loss": 0.3896,
3909
+ "step": 5740
3910
+ },
3911
+ {
3912
+ "epoch": 9.38,
3913
+ "learning_rate": 6.74955595026643e-05,
3914
+ "loss": 0.4094,
3915
+ "step": 5750
3916
+ },
3917
+ {
3918
+ "epoch": 9.4,
3919
+ "learning_rate": 6.571936056838366e-05,
3920
+ "loss": 0.4134,
3921
+ "step": 5760
3922
+ },
3923
+ {
3924
+ "epoch": 9.41,
3925
+ "learning_rate": 6.394316163410301e-05,
3926
+ "loss": 0.4232,
3927
+ "step": 5770
3928
+ },
3929
+ {
3930
+ "epoch": 9.43,
3931
+ "learning_rate": 6.216696269982238e-05,
3932
+ "loss": 0.4005,
3933
+ "step": 5780
3934
+ },
3935
+ {
3936
+ "epoch": 9.45,
3937
+ "learning_rate": 6.039076376554174e-05,
3938
+ "loss": 0.4138,
3939
+ "step": 5790
3940
+ },
3941
+ {
3942
+ "epoch": 9.46,
3943
+ "learning_rate": 5.861456483126111e-05,
3944
+ "loss": 0.423,
3945
+ "step": 5800
3946
+ },
3947
+ {
3948
+ "epoch": 9.46,
3949
+ "eval_loss": 0.4260061979293823,
3950
+ "eval_runtime": 6.015,
3951
+ "eval_samples_per_second": 194.679,
3952
+ "eval_steps_per_second": 12.303,
3953
+ "step": 5800
3954
+ },
3955
+ {
3956
+ "epoch": 9.48,
3957
+ "learning_rate": 5.6838365896980466e-05,
3958
+ "loss": 0.3959,
3959
+ "step": 5810
3960
+ },
3961
+ {
3962
+ "epoch": 9.49,
3963
+ "learning_rate": 5.5062166962699826e-05,
3964
+ "loss": 0.4072,
3965
+ "step": 5820
3966
+ },
3967
+ {
3968
+ "epoch": 9.51,
3969
+ "learning_rate": 5.328596802841918e-05,
3970
+ "loss": 0.429,
3971
+ "step": 5830
3972
+ },
3973
+ {
3974
+ "epoch": 9.53,
3975
+ "learning_rate": 5.1509769094138545e-05,
3976
+ "loss": 0.4161,
3977
+ "step": 5840
3978
+ },
3979
+ {
3980
+ "epoch": 9.54,
3981
+ "learning_rate": 4.9733570159857905e-05,
3982
+ "loss": 0.4319,
3983
+ "step": 5850
3984
+ },
3985
+ {
3986
+ "epoch": 9.56,
3987
+ "learning_rate": 4.7957371225577264e-05,
3988
+ "loss": 0.4195,
3989
+ "step": 5860
3990
+ },
3991
+ {
3992
+ "epoch": 9.58,
3993
+ "learning_rate": 4.6181172291296624e-05,
3994
+ "loss": 0.409,
3995
+ "step": 5870
3996
+ },
3997
+ {
3998
+ "epoch": 9.59,
3999
+ "learning_rate": 4.440497335701599e-05,
4000
+ "loss": 0.4327,
4001
+ "step": 5880
4002
+ },
4003
+ {
4004
+ "epoch": 9.61,
4005
+ "learning_rate": 4.262877442273535e-05,
4006
+ "loss": 0.4337,
4007
+ "step": 5890
4008
+ },
4009
+ {
4010
+ "epoch": 9.62,
4011
+ "learning_rate": 4.085257548845471e-05,
4012
+ "loss": 0.4506,
4013
+ "step": 5900
4014
+ },
4015
+ {
4016
+ "epoch": 9.62,
4017
+ "eval_loss": 0.4249822795391083,
4018
+ "eval_runtime": 5.633,
4019
+ "eval_samples_per_second": 207.881,
4020
+ "eval_steps_per_second": 13.137,
4021
+ "step": 5900
4022
+ },
4023
+ {
4024
+ "epoch": 9.64,
4025
+ "learning_rate": 3.907637655417407e-05,
4026
+ "loss": 0.4067,
4027
+ "step": 5910
4028
+ },
4029
+ {
4030
+ "epoch": 9.66,
4031
+ "learning_rate": 3.730017761989343e-05,
4032
+ "loss": 0.3946,
4033
+ "step": 5920
4034
+ },
4035
+ {
4036
+ "epoch": 9.67,
4037
+ "learning_rate": 3.552397868561279e-05,
4038
+ "loss": 0.4301,
4039
+ "step": 5930
4040
+ },
4041
+ {
4042
+ "epoch": 9.69,
4043
+ "learning_rate": 3.374777975133215e-05,
4044
+ "loss": 0.4204,
4045
+ "step": 5940
4046
+ },
4047
+ {
4048
+ "epoch": 9.71,
4049
+ "learning_rate": 3.197158081705151e-05,
4050
+ "loss": 0.4298,
4051
+ "step": 5950
4052
+ },
4053
+ {
4054
+ "epoch": 9.72,
4055
+ "learning_rate": 3.019538188277087e-05,
4056
+ "loss": 0.4056,
4057
+ "step": 5960
4058
+ },
4059
+ {
4060
+ "epoch": 9.74,
4061
+ "learning_rate": 2.8419182948490233e-05,
4062
+ "loss": 0.4377,
4063
+ "step": 5970
4064
+ },
4065
+ {
4066
+ "epoch": 9.76,
4067
+ "learning_rate": 2.664298401420959e-05,
4068
+ "loss": 0.417,
4069
+ "step": 5980
4070
+ },
4071
+ {
4072
+ "epoch": 9.77,
4073
+ "learning_rate": 2.4866785079928952e-05,
4074
+ "loss": 0.4153,
4075
+ "step": 5990
4076
+ },
4077
+ {
4078
+ "epoch": 9.79,
4079
+ "learning_rate": 2.3090586145648312e-05,
4080
+ "loss": 0.4087,
4081
+ "step": 6000
4082
+ },
4083
+ {
4084
+ "epoch": 9.79,
4085
+ "eval_loss": 0.4207456707954407,
4086
+ "eval_runtime": 5.625,
4087
+ "eval_samples_per_second": 208.177,
4088
+ "eval_steps_per_second": 13.156,
4089
+ "step": 6000
4090
  }
4091
  ],
4092
  "logging_steps": 10,
4093
  "max_steps": 6130,
4094
  "num_train_epochs": 10,
4095
  "save_steps": 500,
4096
+ "total_flos": 1.3060363769413632e+16,
4097
  "trial_name": null,
4098
  "trial_params": null
4099
  }