aleegis12 commited on
Commit
02b2ac3
·
verified ·
1 Parent(s): 7d45176

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d075d2936cd1c0bf21eac541d3b0399a076d42ec3e1a53897c12e8cfce9b59bf
3
  size 639691872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:397caea8f75e97f5170a7fefe06f8a05aec2ec435c2bc808f24e2608e7af54e5
3
  size 639691872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccc8787b19f6752fc2d8ce1293689e24228e4c8003eb4971b579a5f3ae0344a6
3
  size 325339796
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dad69227d2a233c91445d42d0503c168983cbb09ab6e1a80b10be311f1cc3888
3
  size 325339796
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa1ee7d8d451c6a6b5393a7a6ed78f22ba5306b92c735102acba345b7a279a81
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acbceda8b0e3f90ddd941ffea9fd96050462c086b9498ea5af11ceb857234bf0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aaed74bb3fc7c6c5e7701c295c87765052405aa7053481b3c8b27c902d9a447e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe6385b1b60f064938f7c87459100029ab630f31bb282ac8fee66acbff88efe0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.0379580594599247,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 1.7743362831858407,
5
  "eval_steps": 50,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -731,6 +731,364 @@
731
  "eval_samples_per_second": 14.209,
732
  "eval_steps_per_second": 3.59,
733
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
  }
735
  ],
736
  "logging_steps": 1,
@@ -759,7 +1117,7 @@
759
  "attributes": {}
760
  }
761
  },
762
- "total_flos": 1.330495774261248e+17,
763
  "train_batch_size": 8,
764
  "trial_name": null,
765
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.032022152096033096,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
+ "epoch": 2.663716814159292,
5
  "eval_steps": 50,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
731
  "eval_samples_per_second": 14.209,
732
  "eval_steps_per_second": 3.59,
733
  "step": 100
734
+ },
735
+ {
736
+ "epoch": 1.7920353982300885,
737
+ "grad_norm": 0.20991753041744232,
738
+ "learning_rate": 3.873640452903026e-05,
739
+ "loss": 0.0253,
740
+ "step": 101
741
+ },
742
+ {
743
+ "epoch": 1.8097345132743363,
744
+ "grad_norm": 0.3313141167163849,
745
+ "learning_rate": 3.777613711607087e-05,
746
+ "loss": 0.0289,
747
+ "step": 102
748
+ },
749
+ {
750
+ "epoch": 1.827433628318584,
751
+ "grad_norm": 0.19206620752811432,
752
+ "learning_rate": 3.682064169654663e-05,
753
+ "loss": 0.0265,
754
+ "step": 103
755
+ },
756
+ {
757
+ "epoch": 1.8451327433628317,
758
+ "grad_norm": 0.4013370871543884,
759
+ "learning_rate": 3.587029128003006e-05,
760
+ "loss": 0.0308,
761
+ "step": 104
762
+ },
763
+ {
764
+ "epoch": 1.8628318584070795,
765
+ "grad_norm": 0.18024751543998718,
766
+ "learning_rate": 3.492545686756986e-05,
767
+ "loss": 0.0162,
768
+ "step": 105
769
+ },
770
+ {
771
+ "epoch": 1.8805309734513274,
772
+ "grad_norm": 0.2305198758840561,
773
+ "learning_rate": 3.3986507306858125e-05,
774
+ "loss": 0.0284,
775
+ "step": 106
776
+ },
777
+ {
778
+ "epoch": 1.8982300884955752,
779
+ "grad_norm": 0.19050440192222595,
780
+ "learning_rate": 3.3053809148238426e-05,
781
+ "loss": 0.0227,
782
+ "step": 107
783
+ },
784
+ {
785
+ "epoch": 1.915929203539823,
786
+ "grad_norm": 0.15006108582019806,
787
+ "learning_rate": 3.212772650161056e-05,
788
+ "loss": 0.0191,
789
+ "step": 108
790
+ },
791
+ {
792
+ "epoch": 1.9336283185840708,
793
+ "grad_norm": 0.2283119261264801,
794
+ "learning_rate": 3.12086208942881e-05,
795
+ "loss": 0.0227,
796
+ "step": 109
797
+ },
798
+ {
799
+ "epoch": 1.9513274336283186,
800
+ "grad_norm": 0.2260059416294098,
801
+ "learning_rate": 3.0296851129864168e-05,
802
+ "loss": 0.0253,
803
+ "step": 110
804
+ },
805
+ {
806
+ "epoch": 1.9690265486725664,
807
+ "grad_norm": 0.23939450085163116,
808
+ "learning_rate": 2.9392773148140408e-05,
809
+ "loss": 0.0265,
810
+ "step": 111
811
+ },
812
+ {
813
+ "epoch": 1.9867256637168142,
814
+ "grad_norm": 0.1815921664237976,
815
+ "learning_rate": 2.8496739886173995e-05,
816
+ "loss": 0.0216,
817
+ "step": 112
818
+ },
819
+ {
820
+ "epoch": 2.0088495575221237,
821
+ "grad_norm": 0.24775980412960052,
822
+ "learning_rate": 2.7609101140496863e-05,
823
+ "loss": 0.0232,
824
+ "step": 113
825
+ },
826
+ {
827
+ "epoch": 2.0265486725663715,
828
+ "grad_norm": 0.12413739413022995,
829
+ "learning_rate": 2.6730203430560947e-05,
830
+ "loss": 0.0206,
831
+ "step": 114
832
+ },
833
+ {
834
+ "epoch": 2.0442477876106193,
835
+ "grad_norm": 0.1339850276708603,
836
+ "learning_rate": 2.5860389863462765e-05,
837
+ "loss": 0.0196,
838
+ "step": 115
839
+ },
840
+ {
841
+ "epoch": 2.061946902654867,
842
+ "grad_norm": 0.13854354619979858,
843
+ "learning_rate": 2.500000000000001e-05,
844
+ "loss": 0.0159,
845
+ "step": 116
846
+ },
847
+ {
848
+ "epoch": 2.079646017699115,
849
+ "grad_norm": 0.1154838278889656,
850
+ "learning_rate": 2.414936972211272e-05,
851
+ "loss": 0.0142,
852
+ "step": 117
853
+ },
854
+ {
855
+ "epoch": 2.0973451327433628,
856
+ "grad_norm": 0.10285928845405579,
857
+ "learning_rate": 2.3308831101760486e-05,
858
+ "loss": 0.01,
859
+ "step": 118
860
+ },
861
+ {
862
+ "epoch": 2.1150442477876106,
863
+ "grad_norm": 0.1806727647781372,
864
+ "learning_rate": 2.247871227128709e-05,
865
+ "loss": 0.0187,
866
+ "step": 119
867
+ },
868
+ {
869
+ "epoch": 2.1327433628318584,
870
+ "grad_norm": 0.19010406732559204,
871
+ "learning_rate": 2.1659337295323118e-05,
872
+ "loss": 0.0235,
873
+ "step": 120
874
+ },
875
+ {
876
+ "epoch": 2.150442477876106,
877
+ "grad_norm": 0.1291082799434662,
878
+ "learning_rate": 2.0851026044276406e-05,
879
+ "loss": 0.0144,
880
+ "step": 121
881
+ },
882
+ {
883
+ "epoch": 2.168141592920354,
884
+ "grad_norm": 0.16923344135284424,
885
+ "learning_rate": 2.005409406946e-05,
886
+ "loss": 0.0138,
887
+ "step": 122
888
+ },
889
+ {
890
+ "epoch": 2.185840707964602,
891
+ "grad_norm": 0.14110802114009857,
892
+ "learning_rate": 1.9268852479906147e-05,
893
+ "loss": 0.0119,
894
+ "step": 123
895
+ },
896
+ {
897
+ "epoch": 2.2035398230088497,
898
+ "grad_norm": 0.10906057059764862,
899
+ "learning_rate": 1.849560782091445e-05,
900
+ "loss": 0.009,
901
+ "step": 124
902
+ },
903
+ {
904
+ "epoch": 2.2212389380530975,
905
+ "grad_norm": 0.1177993193268776,
906
+ "learning_rate": 1.7734661954381754e-05,
907
+ "loss": 0.014,
908
+ "step": 125
909
+ },
910
+ {
911
+ "epoch": 2.2389380530973453,
912
+ "grad_norm": 0.10654302686452866,
913
+ "learning_rate": 1.6986311940960147e-05,
914
+ "loss": 0.0101,
915
+ "step": 126
916
+ },
917
+ {
918
+ "epoch": 2.256637168141593,
919
+ "grad_norm": 0.16150303184986115,
920
+ "learning_rate": 1.6250849924089484e-05,
921
+ "loss": 0.0168,
922
+ "step": 127
923
+ },
924
+ {
925
+ "epoch": 2.274336283185841,
926
+ "grad_norm": 0.12198394536972046,
927
+ "learning_rate": 1.552856301594942e-05,
928
+ "loss": 0.0136,
929
+ "step": 128
930
+ },
931
+ {
932
+ "epoch": 2.2920353982300883,
933
+ "grad_norm": 0.16058827936649323,
934
+ "learning_rate": 1.4819733185375534e-05,
935
+ "loss": 0.0196,
936
+ "step": 129
937
+ },
938
+ {
939
+ "epoch": 2.309734513274336,
940
+ "grad_norm": 0.19370707869529724,
941
+ "learning_rate": 1.4124637147783432e-05,
942
+ "loss": 0.0205,
943
+ "step": 130
944
+ },
945
+ {
946
+ "epoch": 2.327433628318584,
947
+ "grad_norm": 0.0963333398103714,
948
+ "learning_rate": 1.3443546257143624e-05,
949
+ "loss": 0.011,
950
+ "step": 131
951
+ },
952
+ {
953
+ "epoch": 2.3451327433628317,
954
+ "grad_norm": 0.1633400171995163,
955
+ "learning_rate": 1.277672640004936e-05,
956
+ "loss": 0.0208,
957
+ "step": 132
958
+ },
959
+ {
960
+ "epoch": 2.3628318584070795,
961
+ "grad_norm": 0.1224859431385994,
962
+ "learning_rate": 1.2124437891918993e-05,
963
+ "loss": 0.015,
964
+ "step": 133
965
+ },
966
+ {
967
+ "epoch": 2.3805309734513274,
968
+ "grad_norm": 0.10458586364984512,
969
+ "learning_rate": 1.1486935375373126e-05,
970
+ "loss": 0.0101,
971
+ "step": 134
972
+ },
973
+ {
974
+ "epoch": 2.398230088495575,
975
+ "grad_norm": 0.15571683645248413,
976
+ "learning_rate": 1.0864467720826343e-05,
977
+ "loss": 0.0162,
978
+ "step": 135
979
+ },
980
+ {
981
+ "epoch": 2.415929203539823,
982
+ "grad_norm": 0.08478286117315292,
983
+ "learning_rate": 1.0257277929332332e-05,
984
+ "loss": 0.0072,
985
+ "step": 136
986
+ },
987
+ {
988
+ "epoch": 2.433628318584071,
989
+ "grad_norm": 0.10365074872970581,
990
+ "learning_rate": 9.66560303772035e-06,
991
+ "loss": 0.0095,
992
+ "step": 137
993
+ },
994
+ {
995
+ "epoch": 2.4513274336283186,
996
+ "grad_norm": 0.07361367344856262,
997
+ "learning_rate": 9.08967402605988e-06,
998
+ "loss": 0.0076,
999
+ "step": 138
1000
+ },
1001
+ {
1002
+ "epoch": 2.4690265486725664,
1003
+ "grad_norm": 0.13091503083705902,
1004
+ "learning_rate": 8.529715727489912e-06,
1005
+ "loss": 0.0082,
1006
+ "step": 139
1007
+ },
1008
+ {
1009
+ "epoch": 2.4867256637168142,
1010
+ "grad_norm": 0.11134269833564758,
1011
+ "learning_rate": 7.985946740447791e-06,
1012
+ "loss": 0.0079,
1013
+ "step": 140
1014
+ },
1015
+ {
1016
+ "epoch": 2.504424778761062,
1017
+ "grad_norm": 0.18341496586799622,
1018
+ "learning_rate": 7.458579343331995e-06,
1019
+ "loss": 0.0129,
1020
+ "step": 141
1021
+ },
1022
+ {
1023
+ "epoch": 2.52212389380531,
1024
+ "grad_norm": 0.11337222903966904,
1025
+ "learning_rate": 6.947819411632223e-06,
1026
+ "loss": 0.0173,
1027
+ "step": 142
1028
+ },
1029
+ {
1030
+ "epoch": 2.5398230088495577,
1031
+ "grad_norm": 0.20701636373996735,
1032
+ "learning_rate": 6.45386633755894e-06,
1033
+ "loss": 0.0175,
1034
+ "step": 143
1035
+ },
1036
+ {
1037
+ "epoch": 2.557522123893805,
1038
+ "grad_norm": 0.20868322253227234,
1039
+ "learning_rate": 5.976912952204017e-06,
1040
+ "loss": 0.0147,
1041
+ "step": 144
1042
+ },
1043
+ {
1044
+ "epoch": 2.5752212389380533,
1045
+ "grad_norm": 0.10272178053855896,
1046
+ "learning_rate": 5.51714545026264e-06,
1047
+ "loss": 0.0128,
1048
+ "step": 145
1049
+ },
1050
+ {
1051
+ "epoch": 2.5929203539823007,
1052
+ "grad_norm": 0.14340701699256897,
1053
+ "learning_rate": 5.074743317346009e-06,
1054
+ "loss": 0.0136,
1055
+ "step": 146
1056
+ },
1057
+ {
1058
+ "epoch": 2.6106194690265485,
1059
+ "grad_norm": 0.12963014841079712,
1060
+ "learning_rate": 4.649879259913137e-06,
1061
+ "loss": 0.0093,
1062
+ "step": 147
1063
+ },
1064
+ {
1065
+ "epoch": 2.6283185840707963,
1066
+ "grad_norm": 0.12100816518068314,
1067
+ "learning_rate": 4.242719137849077e-06,
1068
+ "loss": 0.0107,
1069
+ "step": 148
1070
+ },
1071
+ {
1072
+ "epoch": 2.646017699115044,
1073
+ "grad_norm": 0.09687142819166183,
1074
+ "learning_rate": 3.853421899715992e-06,
1075
+ "loss": 0.0082,
1076
+ "step": 149
1077
+ },
1078
+ {
1079
+ "epoch": 2.663716814159292,
1080
+ "grad_norm": 0.09663711488246918,
1081
+ "learning_rate": 3.4821395207022766e-06,
1082
+ "loss": 0.0081,
1083
+ "step": 150
1084
+ },
1085
+ {
1086
+ "epoch": 2.663716814159292,
1087
+ "eval_loss": 0.032022152096033096,
1088
+ "eval_runtime": 6.6983,
1089
+ "eval_samples_per_second": 14.183,
1090
+ "eval_steps_per_second": 3.583,
1091
+ "step": 150
1092
  }
1093
  ],
1094
  "logging_steps": 1,
 
1117
  "attributes": {}
1118
  }
1119
  },
1120
+ "total_flos": 1.995743661391872e+17,
1121
  "train_batch_size": 8,
1122
  "trial_name": null,
1123
  "trial_params": null