somaia02 commited on
Commit
b98c12f
·
1 Parent(s): 1156d03

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c53e59c2bd584a2174d8768ace772b5d5796aa08166aa25302904cb3a665ff7
3
  size 5323528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a58fa9dcca1c27ffd494a46534c40e8cf04faf98e8b281458e05b5136ae9fdcb
3
  size 5323528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ed95370d354bc8f635abaa3992f7d5d462b1bcb514428235550ba0f48b08b85
3
  size 10707706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c03e500ed8e4953132e62be3704d6bfdca68f7a406db1b6ee83e2921feef9003
3
  size 10707706
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa5f9b3cb2653c573d5d70e945ac9e78ff4b3999c649bc26690d113787889e70
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f82adac821233515f57963faf84277e6be21f1e14004a972d38969d3b12b54c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74be72e979aeba041a40f5740c89e95223a2d1671e242dd571b2005ebd09a8c1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf79ba88210639e0b725ee3ca8af70f266780a8aabbf9d25faf56fd6dd10d11b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.48665139079093933,
3
- "best_model_checkpoint": "bart_lora_outputs\\checkpoint-1000",
4
- "epoch": 1.631321370309951,
5
  "eval_steps": 100,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -687,13 +687,353 @@
687
  "eval_samples_per_second": 197.803,
688
  "eval_steps_per_second": 24.831,
689
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
690
  }
691
  ],
692
  "logging_steps": 10,
693
  "max_steps": 6130,
694
  "num_train_epochs": 10,
695
  "save_steps": 500,
696
- "total_flos": 1883435087757312.0,
697
  "trial_name": null,
698
  "trial_params": null
699
  }
 
1
  {
2
+ "best_metric": 0.45563551783561707,
3
+ "best_model_checkpoint": "bart_lora_outputs\\checkpoint-1500",
4
+ "epoch": 2.4469820554649266,
5
  "eval_steps": 100,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
687
  "eval_samples_per_second": 197.803,
688
  "eval_steps_per_second": 24.831,
689
  "step": 1000
690
+ },
691
+ {
692
+ "epoch": 1.65,
693
+ "learning_rate": 0.0009094138543516875,
694
+ "loss": 0.5042,
695
+ "step": 1010
696
+ },
697
+ {
698
+ "epoch": 1.66,
699
+ "learning_rate": 0.0009076376554174067,
700
+ "loss": 0.5495,
701
+ "step": 1020
702
+ },
703
+ {
704
+ "epoch": 1.68,
705
+ "learning_rate": 0.0009058614564831261,
706
+ "loss": 0.5309,
707
+ "step": 1030
708
+ },
709
+ {
710
+ "epoch": 1.7,
711
+ "learning_rate": 0.0009040852575488455,
712
+ "loss": 0.5207,
713
+ "step": 1040
714
+ },
715
+ {
716
+ "epoch": 1.71,
717
+ "learning_rate": 0.0009023090586145648,
718
+ "loss": 0.5209,
719
+ "step": 1050
720
+ },
721
+ {
722
+ "epoch": 1.73,
723
+ "learning_rate": 0.0009005328596802842,
724
+ "loss": 0.5172,
725
+ "step": 1060
726
+ },
727
+ {
728
+ "epoch": 1.75,
729
+ "learning_rate": 0.0008987566607460036,
730
+ "loss": 0.5284,
731
+ "step": 1070
732
+ },
733
+ {
734
+ "epoch": 1.76,
735
+ "learning_rate": 0.0008969804618117229,
736
+ "loss": 0.532,
737
+ "step": 1080
738
+ },
739
+ {
740
+ "epoch": 1.78,
741
+ "learning_rate": 0.0008952042628774423,
742
+ "loss": 0.534,
743
+ "step": 1090
744
+ },
745
+ {
746
+ "epoch": 1.79,
747
+ "learning_rate": 0.0008934280639431617,
748
+ "loss": 0.5299,
749
+ "step": 1100
750
+ },
751
+ {
752
+ "epoch": 1.79,
753
+ "eval_loss": 0.47891008853912354,
754
+ "eval_runtime": 5.951,
755
+ "eval_samples_per_second": 196.772,
756
+ "eval_steps_per_second": 24.702,
757
+ "step": 1100
758
+ },
759
+ {
760
+ "epoch": 1.81,
761
+ "learning_rate": 0.000891651865008881,
762
+ "loss": 0.5213,
763
+ "step": 1110
764
+ },
765
+ {
766
+ "epoch": 1.83,
767
+ "learning_rate": 0.0008898756660746004,
768
+ "loss": 0.5443,
769
+ "step": 1120
770
+ },
771
+ {
772
+ "epoch": 1.84,
773
+ "learning_rate": 0.0008880994671403197,
774
+ "loss": 0.5367,
775
+ "step": 1130
776
+ },
777
+ {
778
+ "epoch": 1.86,
779
+ "learning_rate": 0.0008863232682060391,
780
+ "loss": 0.5393,
781
+ "step": 1140
782
+ },
783
+ {
784
+ "epoch": 1.88,
785
+ "learning_rate": 0.0008845470692717584,
786
+ "loss": 0.5286,
787
+ "step": 1150
788
+ },
789
+ {
790
+ "epoch": 1.89,
791
+ "learning_rate": 0.0008827708703374778,
792
+ "loss": 0.5363,
793
+ "step": 1160
794
+ },
795
+ {
796
+ "epoch": 1.91,
797
+ "learning_rate": 0.0008809946714031972,
798
+ "loss": 0.4707,
799
+ "step": 1170
800
+ },
801
+ {
802
+ "epoch": 1.92,
803
+ "learning_rate": 0.0008792184724689165,
804
+ "loss": 0.5414,
805
+ "step": 1180
806
+ },
807
+ {
808
+ "epoch": 1.94,
809
+ "learning_rate": 0.0008774422735346359,
810
+ "loss": 0.508,
811
+ "step": 1190
812
+ },
813
+ {
814
+ "epoch": 1.96,
815
+ "learning_rate": 0.0008756660746003553,
816
+ "loss": 0.5238,
817
+ "step": 1200
818
+ },
819
+ {
820
+ "epoch": 1.96,
821
+ "eval_loss": 0.47723039984703064,
822
+ "eval_runtime": 11.5341,
823
+ "eval_samples_per_second": 101.525,
824
+ "eval_steps_per_second": 12.745,
825
+ "step": 1200
826
+ },
827
+ {
828
+ "epoch": 1.97,
829
+ "learning_rate": 0.0008738898756660746,
830
+ "loss": 0.527,
831
+ "step": 1210
832
+ },
833
+ {
834
+ "epoch": 1.99,
835
+ "learning_rate": 0.000872113676731794,
836
+ "loss": 0.537,
837
+ "step": 1220
838
+ },
839
+ {
840
+ "epoch": 2.01,
841
+ "learning_rate": 0.0008703374777975134,
842
+ "loss": 0.5252,
843
+ "step": 1230
844
+ },
845
+ {
846
+ "epoch": 2.02,
847
+ "learning_rate": 0.0008685612788632326,
848
+ "loss": 0.5252,
849
+ "step": 1240
850
+ },
851
+ {
852
+ "epoch": 2.04,
853
+ "learning_rate": 0.000866785079928952,
854
+ "loss": 0.501,
855
+ "step": 1250
856
+ },
857
+ {
858
+ "epoch": 2.06,
859
+ "learning_rate": 0.0008650088809946714,
860
+ "loss": 0.4979,
861
+ "step": 1260
862
+ },
863
+ {
864
+ "epoch": 2.07,
865
+ "learning_rate": 0.0008632326820603907,
866
+ "loss": 0.5041,
867
+ "step": 1270
868
+ },
869
+ {
870
+ "epoch": 2.09,
871
+ "learning_rate": 0.0008614564831261101,
872
+ "loss": 0.4837,
873
+ "step": 1280
874
+ },
875
+ {
876
+ "epoch": 2.1,
877
+ "learning_rate": 0.0008596802841918295,
878
+ "loss": 0.5124,
879
+ "step": 1290
880
+ },
881
+ {
882
+ "epoch": 2.12,
883
+ "learning_rate": 0.0008579040852575488,
884
+ "loss": 0.4876,
885
+ "step": 1300
886
+ },
887
+ {
888
+ "epoch": 2.12,
889
+ "eval_loss": 0.4801134765148163,
890
+ "eval_runtime": 5.9243,
891
+ "eval_samples_per_second": 197.662,
892
+ "eval_steps_per_second": 24.813,
893
+ "step": 1300
894
+ },
895
+ {
896
+ "epoch": 2.14,
897
+ "learning_rate": 0.0008561278863232682,
898
+ "loss": 0.4937,
899
+ "step": 1310
900
+ },
901
+ {
902
+ "epoch": 2.15,
903
+ "learning_rate": 0.0008543516873889876,
904
+ "loss": 0.4969,
905
+ "step": 1320
906
+ },
907
+ {
908
+ "epoch": 2.17,
909
+ "learning_rate": 0.0008525754884547069,
910
+ "loss": 0.4921,
911
+ "step": 1330
912
+ },
913
+ {
914
+ "epoch": 2.19,
915
+ "learning_rate": 0.0008507992895204263,
916
+ "loss": 0.5073,
917
+ "step": 1340
918
+ },
919
+ {
920
+ "epoch": 2.2,
921
+ "learning_rate": 0.0008490230905861456,
922
+ "loss": 0.4758,
923
+ "step": 1350
924
+ },
925
+ {
926
+ "epoch": 2.22,
927
+ "learning_rate": 0.000847246891651865,
928
+ "loss": 0.5329,
929
+ "step": 1360
930
+ },
931
+ {
932
+ "epoch": 2.23,
933
+ "learning_rate": 0.0008454706927175843,
934
+ "loss": 0.4786,
935
+ "step": 1370
936
+ },
937
+ {
938
+ "epoch": 2.25,
939
+ "learning_rate": 0.0008436944937833037,
940
+ "loss": 0.4819,
941
+ "step": 1380
942
+ },
943
+ {
944
+ "epoch": 2.27,
945
+ "learning_rate": 0.0008419182948490231,
946
+ "loss": 0.5125,
947
+ "step": 1390
948
+ },
949
+ {
950
+ "epoch": 2.28,
951
+ "learning_rate": 0.0008401420959147424,
952
+ "loss": 0.5048,
953
+ "step": 1400
954
+ },
955
+ {
956
+ "epoch": 2.28,
957
+ "eval_loss": 0.46734750270843506,
958
+ "eval_runtime": 13.0931,
959
+ "eval_samples_per_second": 89.436,
960
+ "eval_steps_per_second": 11.227,
961
+ "step": 1400
962
+ },
963
+ {
964
+ "epoch": 2.3,
965
+ "learning_rate": 0.0008383658969804618,
966
+ "loss": 0.5128,
967
+ "step": 1410
968
+ },
969
+ {
970
+ "epoch": 2.32,
971
+ "learning_rate": 0.0008365896980461812,
972
+ "loss": 0.5022,
973
+ "step": 1420
974
+ },
975
+ {
976
+ "epoch": 2.33,
977
+ "learning_rate": 0.0008348134991119005,
978
+ "loss": 0.4767,
979
+ "step": 1430
980
+ },
981
+ {
982
+ "epoch": 2.35,
983
+ "learning_rate": 0.00083303730017762,
984
+ "loss": 0.4959,
985
+ "step": 1440
986
+ },
987
+ {
988
+ "epoch": 2.37,
989
+ "learning_rate": 0.0008312611012433394,
990
+ "loss": 0.5147,
991
+ "step": 1450
992
+ },
993
+ {
994
+ "epoch": 2.38,
995
+ "learning_rate": 0.0008294849023090586,
996
+ "loss": 0.4922,
997
+ "step": 1460
998
+ },
999
+ {
1000
+ "epoch": 2.4,
1001
+ "learning_rate": 0.000827708703374778,
1002
+ "loss": 0.4936,
1003
+ "step": 1470
1004
+ },
1005
+ {
1006
+ "epoch": 2.41,
1007
+ "learning_rate": 0.0008259325044404974,
1008
+ "loss": 0.5041,
1009
+ "step": 1480
1010
+ },
1011
+ {
1012
+ "epoch": 2.43,
1013
+ "learning_rate": 0.0008241563055062167,
1014
+ "loss": 0.491,
1015
+ "step": 1490
1016
+ },
1017
+ {
1018
+ "epoch": 2.45,
1019
+ "learning_rate": 0.0008223801065719361,
1020
+ "loss": 0.5096,
1021
+ "step": 1500
1022
+ },
1023
+ {
1024
+ "epoch": 2.45,
1025
+ "eval_loss": 0.45563551783561707,
1026
+ "eval_runtime": 13.1221,
1027
+ "eval_samples_per_second": 89.239,
1028
+ "eval_steps_per_second": 11.202,
1029
+ "step": 1500
1030
  }
1031
  ],
1032
  "logging_steps": 10,
1033
  "max_steps": 6130,
1034
  "num_train_epochs": 10,
1035
  "save_steps": 500,
1036
+ "total_flos": 2833006835073024.0,
1037
  "trial_name": null,
1038
  "trial_params": null
1039
  }