OsamaMo commited on
Commit
ffdc083
·
verified ·
1 Parent(s): 45e3c0b

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17ba270b888a201fead48ad37c2c2e228e832cc5e2304c9d48ddcc2a4ab95b9d
3
  size 295488936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3001072512cf0094aa413adef722b38d30d55f1d695532e69f11d0e79e17410
3
  size 295488936
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94ad12c53cb7962a72e2d80a27249286394dc06a5b1f83bd4257087da8221ea0
3
  size 591203178
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f6d21674325a8aaab36b8cf4642a6d5958787b319ca784ac8dfd0a1718a3756
3
  size 591203178
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5edb34d031c0c2b447f3eaadb401a4c1e7e7e6d8c096e28b7092e01a8bd48c92
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3fcb8b7132fdda989f7bbb14a5bf464435849629fe731ccbc64c4724068a57e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7a13d6eba883692f8ed583bb8ce176c7e7a1118cd4c39d5498dbe981adfa197
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e579802271638ff75fe7ba64560b3e21e4f7e26236b794157498845ba12537a4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7145409074669525,
5
  "eval_steps": 100,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -787,6 +787,396 @@
787
  "eval_news_finetune_val_samples_per_second": 1.395,
788
  "eval_news_finetune_val_steps_per_second": 1.395,
789
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
790
  }
791
  ],
792
  "logging_steps": 10,
@@ -806,7 +1196,7 @@
806
  "attributes": {}
807
  }
808
  },
809
- "total_flos": 2.760906963972096e+16,
810
  "train_batch_size": 1,
811
  "trial_name": null,
812
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0714540907466952,
5
  "eval_steps": 100,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
787
  "eval_news_finetune_val_samples_per_second": 1.395,
788
  "eval_news_finetune_val_steps_per_second": 1.395,
789
  "step": 1000
790
+ },
791
+ {
792
+ "epoch": 0.721686316541622,
793
+ "grad_norm": 1.5076738595962524,
794
+ "learning_rate": 9.409912607418172e-05,
795
+ "loss": 0.268,
796
+ "step": 1010
797
+ },
798
+ {
799
+ "epoch": 0.7288317256162915,
800
+ "grad_norm": 3.3230276107788086,
801
+ "learning_rate": 9.390160386775895e-05,
802
+ "loss": 0.3038,
803
+ "step": 1020
804
+ },
805
+ {
806
+ "epoch": 0.735977134690961,
807
+ "grad_norm": 1.699854850769043,
808
+ "learning_rate": 9.370104438953125e-05,
809
+ "loss": 0.2869,
810
+ "step": 1030
811
+ },
812
+ {
813
+ "epoch": 0.7431225437656306,
814
+ "grad_norm": 0.904507577419281,
815
+ "learning_rate": 9.349746151492902e-05,
816
+ "loss": 0.289,
817
+ "step": 1040
818
+ },
819
+ {
820
+ "epoch": 0.7502679528403001,
821
+ "grad_norm": 0.9463105201721191,
822
+ "learning_rate": 9.329086932855215e-05,
823
+ "loss": 0.3729,
824
+ "step": 1050
825
+ },
826
+ {
827
+ "epoch": 0.7574133619149697,
828
+ "grad_norm": 1.4746607542037964,
829
+ "learning_rate": 9.30812821231956e-05,
830
+ "loss": 0.2282,
831
+ "step": 1060
832
+ },
833
+ {
834
+ "epoch": 0.7645587709896392,
835
+ "grad_norm": 1.0270076990127563,
836
+ "learning_rate": 9.286871439886058e-05,
837
+ "loss": 0.3029,
838
+ "step": 1070
839
+ },
840
+ {
841
+ "epoch": 0.7717041800643086,
842
+ "grad_norm": 2.0656538009643555,
843
+ "learning_rate": 9.265318086175143e-05,
844
+ "loss": 0.3268,
845
+ "step": 1080
846
+ },
847
+ {
848
+ "epoch": 0.7788495891389782,
849
+ "grad_norm": 0.9798826575279236,
850
+ "learning_rate": 9.243469642325805e-05,
851
+ "loss": 0.2942,
852
+ "step": 1090
853
+ },
854
+ {
855
+ "epoch": 0.7859949982136477,
856
+ "grad_norm": 1.1419672966003418,
857
+ "learning_rate": 9.221327619892452e-05,
858
+ "loss": 0.3266,
859
+ "step": 1100
860
+ },
861
+ {
862
+ "epoch": 0.7859949982136477,
863
+ "eval_news_finetune_val_loss": 0.307956337928772,
864
+ "eval_news_finetune_val_runtime": 1003.1873,
865
+ "eval_news_finetune_val_samples_per_second": 1.396,
866
+ "eval_news_finetune_val_steps_per_second": 1.396,
867
+ "step": 1100
868
+ },
869
+ {
870
+ "epoch": 0.7931404072883173,
871
+ "grad_norm": 0.6810228228569031,
872
+ "learning_rate": 9.198893550740306e-05,
873
+ "loss": 0.3596,
874
+ "step": 1110
875
+ },
876
+ {
877
+ "epoch": 0.8002858163629868,
878
+ "grad_norm": 1.6553049087524414,
879
+ "learning_rate": 9.176168986939446e-05,
880
+ "loss": 0.3106,
881
+ "step": 1120
882
+ },
883
+ {
884
+ "epoch": 0.8074312254376563,
885
+ "grad_norm": 0.7749443650245667,
886
+ "learning_rate": 9.153155500657422e-05,
887
+ "loss": 0.3298,
888
+ "step": 1130
889
+ },
890
+ {
891
+ "epoch": 0.8145766345123259,
892
+ "grad_norm": 0.8693751096725464,
893
+ "learning_rate": 9.129854684050481e-05,
894
+ "loss": 0.279,
895
+ "step": 1140
896
+ },
897
+ {
898
+ "epoch": 0.8217220435869954,
899
+ "grad_norm": 1.1013332605361938,
900
+ "learning_rate": 9.10626814915343e-05,
901
+ "loss": 0.3195,
902
+ "step": 1150
903
+ },
904
+ {
905
+ "epoch": 0.8288674526616648,
906
+ "grad_norm": 1.2278695106506348,
907
+ "learning_rate": 9.082397527768092e-05,
908
+ "loss": 0.3027,
909
+ "step": 1160
910
+ },
911
+ {
912
+ "epoch": 0.8360128617363344,
913
+ "grad_norm": 2.173530101776123,
914
+ "learning_rate": 9.058244471350428e-05,
915
+ "loss": 0.2238,
916
+ "step": 1170
917
+ },
918
+ {
919
+ "epoch": 0.8431582708110039,
920
+ "grad_norm": 1.125986933708191,
921
+ "learning_rate": 9.033810650896274e-05,
922
+ "loss": 0.2399,
923
+ "step": 1180
924
+ },
925
+ {
926
+ "epoch": 0.8503036798856735,
927
+ "grad_norm": 0.6611151099205017,
928
+ "learning_rate": 9.009097756825737e-05,
929
+ "loss": 0.2736,
930
+ "step": 1190
931
+ },
932
+ {
933
+ "epoch": 0.857449088960343,
934
+ "grad_norm": 1.9068485498428345,
935
+ "learning_rate": 8.98410749886625e-05,
936
+ "loss": 0.2949,
937
+ "step": 1200
938
+ },
939
+ {
940
+ "epoch": 0.857449088960343,
941
+ "eval_news_finetune_val_loss": 0.31006094813346863,
942
+ "eval_news_finetune_val_runtime": 1002.7866,
943
+ "eval_news_finetune_val_samples_per_second": 1.396,
944
+ "eval_news_finetune_val_steps_per_second": 1.396,
945
+ "step": 1200
946
+ },
947
+ {
948
+ "epoch": 0.8645944980350125,
949
+ "grad_norm": 1.192031979560852,
950
+ "learning_rate": 8.958841605934278e-05,
951
+ "loss": 0.3657,
952
+ "step": 1210
953
+ },
954
+ {
955
+ "epoch": 0.8717399071096821,
956
+ "grad_norm": 1.2596725225448608,
957
+ "learning_rate": 8.933301826015715e-05,
958
+ "loss": 0.3068,
959
+ "step": 1220
960
+ },
961
+ {
962
+ "epoch": 0.8788853161843515,
963
+ "grad_norm": 1.4713683128356934,
964
+ "learning_rate": 8.907489926044945e-05,
965
+ "loss": 0.3122,
966
+ "step": 1230
967
+ },
968
+ {
969
+ "epoch": 0.886030725259021,
970
+ "grad_norm": 1.3583886623382568,
971
+ "learning_rate": 8.881407691782608e-05,
972
+ "loss": 0.2989,
973
+ "step": 1240
974
+ },
975
+ {
976
+ "epoch": 0.8931761343336906,
977
+ "grad_norm": 0.9863426089286804,
978
+ "learning_rate": 8.855056927692037e-05,
979
+ "loss": 0.2549,
980
+ "step": 1250
981
+ },
982
+ {
983
+ "epoch": 0.9003215434083601,
984
+ "grad_norm": 1.0579396486282349,
985
+ "learning_rate": 8.828439456814442e-05,
986
+ "loss": 0.2809,
987
+ "step": 1260
988
+ },
989
+ {
990
+ "epoch": 0.9074669524830297,
991
+ "grad_norm": 2.847482681274414,
992
+ "learning_rate": 8.801557120642766e-05,
993
+ "loss": 0.2933,
994
+ "step": 1270
995
+ },
996
+ {
997
+ "epoch": 0.9146123615576992,
998
+ "grad_norm": 0.8942415118217468,
999
+ "learning_rate": 8.774411778994295e-05,
1000
+ "loss": 0.2866,
1001
+ "step": 1280
1002
+ },
1003
+ {
1004
+ "epoch": 0.9217577706323687,
1005
+ "grad_norm": 1.297845721244812,
1006
+ "learning_rate": 8.747005309881984e-05,
1007
+ "loss": 0.2939,
1008
+ "step": 1290
1009
+ },
1010
+ {
1011
+ "epoch": 0.9289031797070382,
1012
+ "grad_norm": 1.2745181322097778,
1013
+ "learning_rate": 8.719339609384531e-05,
1014
+ "loss": 0.3018,
1015
+ "step": 1300
1016
+ },
1017
+ {
1018
+ "epoch": 0.9289031797070382,
1019
+ "eval_news_finetune_val_loss": 0.29822030663490295,
1020
+ "eval_news_finetune_val_runtime": 1002.5672,
1021
+ "eval_news_finetune_val_samples_per_second": 1.396,
1022
+ "eval_news_finetune_val_steps_per_second": 1.396,
1023
+ "step": 1300
1024
+ },
1025
+ {
1026
+ "epoch": 0.9360485887817077,
1027
+ "grad_norm": 1.3898978233337402,
1028
+ "learning_rate": 8.691416591515198e-05,
1029
+ "loss": 0.295,
1030
+ "step": 1310
1031
+ },
1032
+ {
1033
+ "epoch": 0.9431939978563773,
1034
+ "grad_norm": 1.1516591310501099,
1035
+ "learning_rate": 8.663238188089398e-05,
1036
+ "loss": 0.209,
1037
+ "step": 1320
1038
+ },
1039
+ {
1040
+ "epoch": 0.9503394069310468,
1041
+ "grad_norm": 0.9356768131256104,
1042
+ "learning_rate": 8.634806348591036e-05,
1043
+ "loss": 0.2904,
1044
+ "step": 1330
1045
+ },
1046
+ {
1047
+ "epoch": 0.9574848160057163,
1048
+ "grad_norm": 1.884950876235962,
1049
+ "learning_rate": 8.606123040037643e-05,
1050
+ "loss": 0.2607,
1051
+ "step": 1340
1052
+ },
1053
+ {
1054
+ "epoch": 0.9646302250803859,
1055
+ "grad_norm": 1.2719082832336426,
1056
+ "learning_rate": 8.577190246844291e-05,
1057
+ "loss": 0.3279,
1058
+ "step": 1350
1059
+ },
1060
+ {
1061
+ "epoch": 0.9717756341550554,
1062
+ "grad_norm": 0.935297429561615,
1063
+ "learning_rate": 8.548009970686302e-05,
1064
+ "loss": 0.3011,
1065
+ "step": 1360
1066
+ },
1067
+ {
1068
+ "epoch": 0.978921043229725,
1069
+ "grad_norm": 1.6732884645462036,
1070
+ "learning_rate": 8.51858423036076e-05,
1071
+ "loss": 0.2379,
1072
+ "step": 1370
1073
+ },
1074
+ {
1075
+ "epoch": 0.9860664523043944,
1076
+ "grad_norm": 0.6651692390441895,
1077
+ "learning_rate": 8.488915061646856e-05,
1078
+ "loss": 0.2599,
1079
+ "step": 1380
1080
+ },
1081
+ {
1082
+ "epoch": 0.9932118613790639,
1083
+ "grad_norm": 1.121752381324768,
1084
+ "learning_rate": 8.459004517165032e-05,
1085
+ "loss": 0.2265,
1086
+ "step": 1390
1087
+ },
1088
+ {
1089
+ "epoch": 1.0,
1090
+ "grad_norm": 0.5099928379058838,
1091
+ "learning_rate": 8.428854666234978e-05,
1092
+ "loss": 0.3301,
1093
+ "step": 1400
1094
+ },
1095
+ {
1096
+ "epoch": 1.0,
1097
+ "eval_news_finetune_val_loss": 0.28762951493263245,
1098
+ "eval_news_finetune_val_runtime": 1002.7793,
1099
+ "eval_news_finetune_val_samples_per_second": 1.396,
1100
+ "eval_news_finetune_val_steps_per_second": 1.396,
1101
+ "step": 1400
1102
+ },
1103
+ {
1104
+ "epoch": 1.0071454090746694,
1105
+ "grad_norm": 0.9986103177070618,
1106
+ "learning_rate": 8.398467594732478e-05,
1107
+ "loss": 0.2021,
1108
+ "step": 1410
1109
+ },
1110
+ {
1111
+ "epoch": 1.014290818149339,
1112
+ "grad_norm": 1.2675282955169678,
1113
+ "learning_rate": 8.367845404945084e-05,
1114
+ "loss": 0.2228,
1115
+ "step": 1420
1116
+ },
1117
+ {
1118
+ "epoch": 1.0214362272240085,
1119
+ "grad_norm": 0.8156709671020508,
1120
+ "learning_rate": 8.336990215426688e-05,
1121
+ "loss": 0.1947,
1122
+ "step": 1430
1123
+ },
1124
+ {
1125
+ "epoch": 1.0285816362986782,
1126
+ "grad_norm": 0.5374387502670288,
1127
+ "learning_rate": 8.305904160850941e-05,
1128
+ "loss": 0.2344,
1129
+ "step": 1440
1130
+ },
1131
+ {
1132
+ "epoch": 1.0357270453733476,
1133
+ "grad_norm": 0.6672261357307434,
1134
+ "learning_rate": 8.274589391863583e-05,
1135
+ "loss": 0.1919,
1136
+ "step": 1450
1137
+ },
1138
+ {
1139
+ "epoch": 1.0428724544480172,
1140
+ "grad_norm": 0.9803467988967896,
1141
+ "learning_rate": 8.243048074933634e-05,
1142
+ "loss": 0.2218,
1143
+ "step": 1460
1144
+ },
1145
+ {
1146
+ "epoch": 1.0500178635226867,
1147
+ "grad_norm": 1.482840657234192,
1148
+ "learning_rate": 8.21128239220353e-05,
1149
+ "loss": 0.2556,
1150
+ "step": 1470
1151
+ },
1152
+ {
1153
+ "epoch": 1.057163272597356,
1154
+ "grad_norm": 1.0589625835418701,
1155
+ "learning_rate": 8.179294541338135e-05,
1156
+ "loss": 0.2052,
1157
+ "step": 1480
1158
+ },
1159
+ {
1160
+ "epoch": 1.0643086816720257,
1161
+ "grad_norm": 0.8332052230834961,
1162
+ "learning_rate": 8.147086735372716e-05,
1163
+ "loss": 0.2386,
1164
+ "step": 1490
1165
+ },
1166
+ {
1167
+ "epoch": 1.0714540907466952,
1168
+ "grad_norm": 0.6018723845481873,
1169
+ "learning_rate": 8.114661202559828e-05,
1170
+ "loss": 0.1426,
1171
+ "step": 1500
1172
+ },
1173
+ {
1174
+ "epoch": 1.0714540907466952,
1175
+ "eval_news_finetune_val_loss": 0.30121028423309326,
1176
+ "eval_news_finetune_val_runtime": 1002.7457,
1177
+ "eval_news_finetune_val_samples_per_second": 1.396,
1178
+ "eval_news_finetune_val_steps_per_second": 1.396,
1179
+ "step": 1500
1180
  }
1181
  ],
1182
  "logging_steps": 10,
 
1196
  "attributes": {}
1197
  }
1198
  },
1199
+ "total_flos": 4.14970883106816e+16,
1200
  "train_batch_size": 1,
1201
  "trial_name": null,
1202
  "trial_params": null