lesso12 commited on
Commit
aa0c5d3
·
verified ·
1 Parent(s): d497a8c

Training in progress, step 141, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3a07bb6a35ea14e1ed69c3c2d77fbf0664d1804e0dc1874fe82dd47e43cda05
3
  size 389074464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb20191a6c2b119231d7678dd617bc707711a23a1bb9b64cf760c7de0b71218
3
  size 389074464
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d656b3e2fae6d74b6b99b062276209667100e0e05d05a48c76898b9843f72c57
3
  size 198011252
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c06718b376d024d256e2c7914d486bd8d83ea7b66f2817ccdc19466a5d4484cc
3
  size 198011252
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63add0586146b851f47d881f8a2c86d2e7bbd1031b34991ea727eda767e3ba6f
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9faa80e895f2ee441f65233c3b9d99d52a69600429d38bbddd6eead4f9c541b
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8187a6c88d7933f4442806aa816104ffa2abe40157d1b5941b707067b0f91484
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ef808943930759c29df7d6cb43c7ce42f18630028d7057fa5027d9913fbda00
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:428d17df5bbcb9c5bba8928ed07c3ac1e2473387421762fc2dd2a18dad7163cb
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:400862bf97811c06d931cbea4873b2c4f76e79cd2ca67b0903bcbae23651c690
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10e76a88b495596517a8c5e8dd0c9cf73e2fa8d302cb53089a3a0a19398d0705
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c9ee43196afab265d60dcd4c1c900179ab22f775dc75282af2f09c2068eaf05
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e07c2149652b1186dc91873797838ca755d72668f5e0d5315a9746f87efcc58
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2bc9793c1138e05078b265d5eaf89968ed1a8a6e30d8d2e6c8cd400956edb28
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea6c246d355698e0da22b783baa4cd562f3bf8472a265e83f06d63d516cc95c7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d6e96260fd16c14c9f9f12f3b8f9c8c35af2268f7c6f573e7123e9610bf0097
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55aa132a1f6a464d42b97932e211287aa777c051c2c25ed9e36ac7ddda94bf95
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad3f1ce05ff8747c562da4063aca1d62d228d34ae20324fd91268b964a6f1fd5
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15a7d871d8fc1dcb9a693c26cb0b989af2ab985ab337aecb1daa8032f9df0a10
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aec81ebc5fec1f650ab3e9c5c8e2325a8fbc2b8c2c20649882b77b60f6d6de60
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b323f2cb8c959ffbaf90b97d17df7a3447ef35ce5770b54457987ada42ec67f4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7b8e2b43f384d74d38718a5b54e296fe62eb266007d5519e2bb4f49d875fe13
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.9264618754386902,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 2.127659574468085,
5
  "eval_steps": 50,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -731,6 +731,293 @@
731
  "eval_samples_per_second": 168.787,
732
  "eval_steps_per_second": 5.333,
733
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
  }
735
  ],
736
  "logging_steps": 1,
@@ -754,12 +1041,12 @@
754
  "should_evaluate": false,
755
  "should_log": false,
756
  "should_save": true,
757
- "should_training_stop": false
758
  },
759
  "attributes": {}
760
  }
761
  },
762
- "total_flos": 4.586475853185024e+17,
763
  "train_batch_size": 8,
764
  "trial_name": null,
765
  "trial_params": null
 
1
  {
2
  "best_metric": 0.9264618754386902,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 3.0,
5
  "eval_steps": 50,
6
+ "global_step": 141,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
731
  "eval_samples_per_second": 168.787,
732
  "eval_steps_per_second": 5.333,
733
  "step": 100
734
+ },
735
+ {
736
+ "epoch": 2.148936170212766,
737
+ "grad_norm": 0.26321274042129517,
738
+ "learning_rate": 2.384916843344419e-05,
739
+ "loss": 0.2123,
740
+ "step": 101
741
+ },
742
+ {
743
+ "epoch": 2.1702127659574466,
744
+ "grad_norm": 0.36982113122940063,
745
+ "learning_rate": 2.275893467018154e-05,
746
+ "loss": 0.7511,
747
+ "step": 102
748
+ },
749
+ {
750
+ "epoch": 2.1914893617021276,
751
+ "grad_norm": 0.4143376648426056,
752
+ "learning_rate": 2.1687817534238292e-05,
753
+ "loss": 1.2891,
754
+ "step": 103
755
+ },
756
+ {
757
+ "epoch": 2.2127659574468086,
758
+ "grad_norm": 0.3768226206302643,
759
+ "learning_rate": 2.0636433015111154e-05,
760
+ "loss": 1.127,
761
+ "step": 104
762
+ },
763
+ {
764
+ "epoch": 2.2340425531914896,
765
+ "grad_norm": 0.3875614106655121,
766
+ "learning_rate": 1.9605385754252593e-05,
767
+ "loss": 0.9696,
768
+ "step": 105
769
+ },
770
+ {
771
+ "epoch": 2.25531914893617,
772
+ "grad_norm": 0.4567450284957886,
773
+ "learning_rate": 1.8595268697347047e-05,
774
+ "loss": 0.8626,
775
+ "step": 106
776
+ },
777
+ {
778
+ "epoch": 2.276595744680851,
779
+ "grad_norm": 0.43263593316078186,
780
+ "learning_rate": 1.76066627533135e-05,
781
+ "loss": 0.417,
782
+ "step": 107
783
+ },
784
+ {
785
+ "epoch": 2.297872340425532,
786
+ "grad_norm": 0.2629643380641937,
787
+ "learning_rate": 1.664013646023009e-05,
788
+ "loss": 0.3379,
789
+ "step": 108
790
+ },
791
+ {
792
+ "epoch": 2.3191489361702127,
793
+ "grad_norm": 0.47793322801589966,
794
+ "learning_rate": 1.5696245658373157e-05,
795
+ "loss": 1.1889,
796
+ "step": 109
797
+ },
798
+ {
799
+ "epoch": 2.3404255319148937,
800
+ "grad_norm": 0.4138708710670471,
801
+ "learning_rate": 1.4775533170558723e-05,
802
+ "loss": 1.1401,
803
+ "step": 110
804
+ },
805
+ {
806
+ "epoch": 2.3617021276595747,
807
+ "grad_norm": 0.4172796905040741,
808
+ "learning_rate": 1.3878528489970085e-05,
809
+ "loss": 1.0396,
810
+ "step": 111
811
+ },
812
+ {
813
+ "epoch": 2.382978723404255,
814
+ "grad_norm": 0.4409525692462921,
815
+ "learning_rate": 1.3005747475651238e-05,
816
+ "loss": 0.9267,
817
+ "step": 112
818
+ },
819
+ {
820
+ "epoch": 2.404255319148936,
821
+ "grad_norm": 0.49562060832977295,
822
+ "learning_rate": 1.2157692055841128e-05,
823
+ "loss": 0.6103,
824
+ "step": 113
825
+ },
826
+ {
827
+ "epoch": 2.425531914893617,
828
+ "grad_norm": 0.16902895271778107,
829
+ "learning_rate": 1.1334849939319436e-05,
830
+ "loss": 0.1601,
831
+ "step": 114
832
+ },
833
+ {
834
+ "epoch": 2.4468085106382977,
835
+ "grad_norm": 0.5262559652328491,
836
+ "learning_rate": 1.0537694334929756e-05,
837
+ "loss": 1.2164,
838
+ "step": 115
839
+ },
840
+ {
841
+ "epoch": 2.4680851063829787,
842
+ "grad_norm": 0.45780274271965027,
843
+ "learning_rate": 9.766683679441566e-06,
844
+ "loss": 1.2017,
845
+ "step": 116
846
+ },
847
+ {
848
+ "epoch": 2.4893617021276597,
849
+ "grad_norm": 0.4124818444252014,
850
+ "learning_rate": 9.022261373907599e-06,
851
+ "loss": 1.0688,
852
+ "step": 117
853
+ },
854
+ {
855
+ "epoch": 2.5106382978723403,
856
+ "grad_norm": 0.41770249605178833,
857
+ "learning_rate": 8.304855528667915e-06,
858
+ "loss": 0.893,
859
+ "step": 118
860
+ },
861
+ {
862
+ "epoch": 2.5319148936170213,
863
+ "grad_norm": 0.5359745025634766,
864
+ "learning_rate": 7.614878717147731e-06,
865
+ "loss": 0.734,
866
+ "step": 119
867
+ },
868
+ {
869
+ "epoch": 2.5531914893617023,
870
+ "grad_norm": 0.12150562554597855,
871
+ "learning_rate": 6.952727738590198e-06,
872
+ "loss": 0.0908,
873
+ "step": 120
874
+ },
875
+ {
876
+ "epoch": 2.574468085106383,
877
+ "grad_norm": 0.45060330629348755,
878
+ "learning_rate": 6.318783389860888e-06,
879
+ "loss": 0.9969,
880
+ "step": 121
881
+ },
882
+ {
883
+ "epoch": 2.595744680851064,
884
+ "grad_norm": 0.4552464485168457,
885
+ "learning_rate": 5.7134102464550925e-06,
886
+ "loss": 1.2133,
887
+ "step": 122
888
+ },
889
+ {
890
+ "epoch": 2.617021276595745,
891
+ "grad_norm": 0.41424861550331116,
892
+ "learning_rate": 5.136956452833776e-06,
893
+ "loss": 1.0531,
894
+ "step": 123
895
+ },
896
+ {
897
+ "epoch": 2.6382978723404253,
898
+ "grad_norm": 0.4196318984031677,
899
+ "learning_rate": 4.589753522209003e-06,
900
+ "loss": 0.9811,
901
+ "step": 124
902
+ },
903
+ {
904
+ "epoch": 2.6595744680851063,
905
+ "grad_norm": 0.4966506063938141,
906
+ "learning_rate": 4.072116145893723e-06,
907
+ "loss": 0.8532,
908
+ "step": 125
909
+ },
910
+ {
911
+ "epoch": 2.6808510638297873,
912
+ "grad_norm": 0.2934734523296356,
913
+ "learning_rate": 3.584342012325771e-06,
914
+ "loss": 0.1873,
915
+ "step": 126
916
+ },
917
+ {
918
+ "epoch": 2.702127659574468,
919
+ "grad_norm": 0.3809848725795746,
920
+ "learning_rate": 3.126711635869966e-06,
921
+ "loss": 0.6348,
922
+ "step": 127
923
+ },
924
+ {
925
+ "epoch": 2.723404255319149,
926
+ "grad_norm": 0.463334858417511,
927
+ "learning_rate": 2.699488195496971e-06,
928
+ "loss": 1.2586,
929
+ "step": 128
930
+ },
931
+ {
932
+ "epoch": 2.74468085106383,
933
+ "grad_norm": 0.43426749110221863,
934
+ "learning_rate": 2.3029173834314634e-06,
935
+ "loss": 1.1442,
936
+ "step": 129
937
+ },
938
+ {
939
+ "epoch": 2.7659574468085104,
940
+ "grad_norm": 0.4165303111076355,
941
+ "learning_rate": 1.9372272638568494e-06,
942
+ "loss": 1.0423,
943
+ "step": 130
944
+ },
945
+ {
946
+ "epoch": 2.7872340425531914,
947
+ "grad_norm": 0.48008161783218384,
948
+ "learning_rate": 1.6026281417576689e-06,
949
+ "loss": 0.8429,
950
+ "step": 131
951
+ },
952
+ {
953
+ "epoch": 2.8085106382978724,
954
+ "grad_norm": 0.45738592743873596,
955
+ "learning_rate": 1.299312441975153e-06,
956
+ "loss": 0.3767,
957
+ "step": 132
958
+ },
959
+ {
960
+ "epoch": 2.829787234042553,
961
+ "grad_norm": 0.350779265165329,
962
+ "learning_rate": 1.0274545985455078e-06,
963
+ "loss": 0.5226,
964
+ "step": 133
965
+ },
966
+ {
967
+ "epoch": 2.851063829787234,
968
+ "grad_norm": 0.49393415451049805,
969
+ "learning_rate": 7.872109543844799e-07,
970
+ "loss": 1.2395,
971
+ "step": 134
972
+ },
973
+ {
974
+ "epoch": 2.872340425531915,
975
+ "grad_norm": 0.4229400157928467,
976
+ "learning_rate": 5.787196713760618e-07,
977
+ "loss": 1.1422,
978
+ "step": 135
979
+ },
980
+ {
981
+ "epoch": 2.8936170212765955,
982
+ "grad_norm": 0.42691662907600403,
983
+ "learning_rate": 4.021006509168048e-07,
984
+ "loss": 1.0496,
985
+ "step": 136
986
+ },
987
+ {
988
+ "epoch": 2.9148936170212765,
989
+ "grad_norm": 0.4528738260269165,
990
+ "learning_rate": 2.574554649617209e-07,
991
+ "loss": 0.905,
992
+ "step": 137
993
+ },
994
+ {
995
+ "epoch": 2.9361702127659575,
996
+ "grad_norm": 0.4881468713283539,
997
+ "learning_rate": 1.4486729761113447e-07,
998
+ "loss": 0.5696,
999
+ "step": 138
1000
+ },
1001
+ {
1002
+ "epoch": 2.9574468085106385,
1003
+ "grad_norm": 0.412494957447052,
1004
+ "learning_rate": 6.440089727230269e-08,
1005
+ "loss": 0.7729,
1006
+ "step": 139
1007
+ },
1008
+ {
1009
+ "epoch": 2.978723404255319,
1010
+ "grad_norm": 0.4168694317340851,
1011
+ "learning_rate": 1.6102539423217266e-08,
1012
+ "loss": 1.1272,
1013
+ "step": 140
1014
+ },
1015
+ {
1016
+ "epoch": 3.0,
1017
+ "grad_norm": 0.48817628622055054,
1018
+ "learning_rate": 0.0,
1019
+ "loss": 0.8255,
1020
+ "step": 141
1021
  }
1022
  ],
1023
  "logging_steps": 1,
 
1041
  "should_evaluate": false,
1042
  "should_log": false,
1043
  "should_save": true,
1044
+ "should_training_stop": true
1045
  },
1046
  "attributes": {}
1047
  }
1048
  },
1049
+ "total_flos": 6.466930952990884e+17,
1050
  "train_batch_size": 8,
1051
  "trial_name": null,
1052
  "trial_params": null