dada22231 commited on
Commit
81e6ab8
·
verified ·
1 Parent(s): e6cc41a

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e81e7554f6a4e8dc1d6637458ad111412135558d4fa3da36246a6ce3a7f3582
3
  size 166182480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:133647ff75619b2ebf4b08b1907148f54f64d50a6540006b8b0cfd3d268e67a7
3
  size 166182480
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a24c4ba4fe6d4ef54df173ae57cdaa451bf814e7c0ccd1eee98ff9c9b08604d
3
  size 332574358
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b780e0bdd236c892f8c7d0c7d4afe550217aba06b3560dacba4ce990ecece4b
3
  size 332574358
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb69463d7c0255de870de267315257b4ab055b07250b9bf3d5f50f6e91ab28b2
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4d6427d6aa68c158ad80f76946f6fc44629c698f67492b8fb6d643dbc5eef98
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3187bc61197df87f0c3ee611adcb22372b70450b00d5a0c3b9ca0c3c6aa112eb
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2caf50116db0368fab40b4e34ae3b0c9f1a86e9da0bc7a8de5ff785b985711a
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b833491f3e58bff523508fa17e20e242e52c888acfebbf28b5bd968b816788b
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a955dabcc60ffe276aa50f92c60f21389f3a28e5589c5c081037f1d66876c9a3
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4c19b9c696d677fbac8fe64d1e779bd08ec3d7393548934388271f17014c89f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1380d7d4f110826396b67714006bc18fde61d5c81581b3cc018f32e55772014
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b898900b04322cf6c4f019c4f4ba26a4fda854c76bcf2313072c064f0fd1f3cb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:148dc60fce7a98d209219ab65863631c40408a69d4537618751b3440fe762b40
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 3.0905191579222446e-06,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-125",
4
- "epoch": 0.2761096155173604,
5
  "eval_steps": 25,
6
- "global_step": 125,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -930,6 +930,189 @@
930
  "eval_samples_per_second": 23.81,
931
  "eval_steps_per_second": 6.191,
932
  "step": 125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
933
  }
934
  ],
935
  "logging_steps": 1,
@@ -958,7 +1141,7 @@
958
  "attributes": {}
959
  }
960
  },
961
- "total_flos": 4.0620223627264e+17,
962
  "train_batch_size": 1,
963
  "trial_name": null,
964
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.842964704541373e-06,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
+ "epoch": 0.3313315386208325,
5
  "eval_steps": 25,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
930
  "eval_samples_per_second": 23.81,
931
  "eval_steps_per_second": 6.191,
932
  "step": 125
933
+ },
934
+ {
935
+ "epoch": 0.27831849244149925,
936
+ "grad_norm": 0.0002063347928924486,
937
+ "learning_rate": 3.9148747843544495e-05,
938
+ "loss": 0.0,
939
+ "step": 126
940
+ },
941
+ {
942
+ "epoch": 0.28052736936563816,
943
+ "grad_norm": 0.00025974729214794934,
944
+ "learning_rate": 3.846178285323835e-05,
945
+ "loss": 0.0,
946
+ "step": 127
947
+ },
948
+ {
949
+ "epoch": 0.28273624628977706,
950
+ "grad_norm": 0.00024177682644221932,
951
+ "learning_rate": 3.777924554357096e-05,
952
+ "loss": 0.0,
953
+ "step": 128
954
+ },
955
+ {
956
+ "epoch": 0.2849451232139159,
957
+ "grad_norm": 0.0002588094212114811,
958
+ "learning_rate": 3.710131864628451e-05,
959
+ "loss": 0.0,
960
+ "step": 129
961
+ },
962
+ {
963
+ "epoch": 0.2871540001380548,
964
+ "grad_norm": 0.0002662424521986395,
965
+ "learning_rate": 3.642818365880224e-05,
966
+ "loss": 0.0,
967
+ "step": 130
968
+ },
969
+ {
970
+ "epoch": 0.2893628770621937,
971
+ "grad_norm": 0.0004572535108309239,
972
+ "learning_rate": 3.576002079563732e-05,
973
+ "loss": 0.0,
974
+ "step": 131
975
+ },
976
+ {
977
+ "epoch": 0.29157175398633256,
978
+ "grad_norm": 0.00027820674586109817,
979
+ "learning_rate": 3.509700894014496e-05,
980
+ "loss": 0.0,
981
+ "step": 132
982
+ },
983
+ {
984
+ "epoch": 0.29378063091047146,
985
+ "grad_norm": 0.00010199982352787629,
986
+ "learning_rate": 3.443932559663107e-05,
987
+ "loss": 0.0,
988
+ "step": 133
989
+ },
990
+ {
991
+ "epoch": 0.29598950783461037,
992
+ "grad_norm": 0.00010178149386774749,
993
+ "learning_rate": 3.378714684283011e-05,
994
+ "loss": 0.0,
995
+ "step": 134
996
+ },
997
+ {
998
+ "epoch": 0.2981983847587492,
999
+ "grad_norm": 0.00010338863648939878,
1000
+ "learning_rate": 3.31406472827647e-05,
1001
+ "loss": 0.0,
1002
+ "step": 135
1003
+ },
1004
+ {
1005
+ "epoch": 0.3004072616828881,
1006
+ "grad_norm": 0.0001027277103275992,
1007
+ "learning_rate": 3.250000000000001e-05,
1008
+ "loss": 0.0,
1009
+ "step": 136
1010
+ },
1011
+ {
1012
+ "epoch": 0.30261613860702696,
1013
+ "grad_norm": 0.00010317438864149153,
1014
+ "learning_rate": 3.186537651130503e-05,
1015
+ "loss": 0.0,
1016
+ "step": 137
1017
+ },
1018
+ {
1019
+ "epoch": 0.30482501553116587,
1020
+ "grad_norm": 0.00016677897656336427,
1021
+ "learning_rate": 3.123694672073344e-05,
1022
+ "loss": 0.0,
1023
+ "step": 138
1024
+ },
1025
+ {
1026
+ "epoch": 0.30703389245530477,
1027
+ "grad_norm": 0.00022237653320189565,
1028
+ "learning_rate": 3.061487887413619e-05,
1029
+ "loss": 0.0,
1030
+ "step": 139
1031
+ },
1032
+ {
1033
+ "epoch": 0.3092427693794436,
1034
+ "grad_norm": 0.00023152329958975315,
1035
+ "learning_rate": 2.9999339514117912e-05,
1036
+ "loss": 0.0,
1037
+ "step": 140
1038
+ },
1039
+ {
1040
+ "epoch": 0.3114516463035825,
1041
+ "grad_norm": 0.00022528883710037917,
1042
+ "learning_rate": 2.9390493435449572e-05,
1043
+ "loss": 0.0,
1044
+ "step": 141
1045
+ },
1046
+ {
1047
+ "epoch": 0.3136605232277214,
1048
+ "grad_norm": 0.00024947745259851217,
1049
+ "learning_rate": 2.8788503640948912e-05,
1050
+ "loss": 0.0,
1051
+ "step": 142
1052
+ },
1053
+ {
1054
+ "epoch": 0.31586940015186027,
1055
+ "grad_norm": 0.00043007542262785137,
1056
+ "learning_rate": 2.8193531297840503e-05,
1057
+ "loss": 0.0,
1058
+ "step": 143
1059
+ },
1060
+ {
1061
+ "epoch": 0.31807827707599917,
1062
+ "grad_norm": 0.00035726267378777266,
1063
+ "learning_rate": 2.760573569460757e-05,
1064
+ "loss": 0.0,
1065
+ "step": 144
1066
+ },
1067
+ {
1068
+ "epoch": 0.3202871540001381,
1069
+ "grad_norm": 9.570374095346779e-05,
1070
+ "learning_rate": 2.702527419834653e-05,
1071
+ "loss": 0.0,
1072
+ "step": 145
1073
+ },
1074
+ {
1075
+ "epoch": 0.3224960309242769,
1076
+ "grad_norm": 9.726483403937891e-05,
1077
+ "learning_rate": 2.645230221263596e-05,
1078
+ "loss": 0.0,
1079
+ "step": 146
1080
+ },
1081
+ {
1082
+ "epoch": 0.3247049078484158,
1083
+ "grad_norm": 9.980611503124237e-05,
1084
+ "learning_rate": 2.5886973135931425e-05,
1085
+ "loss": 0.0,
1086
+ "step": 147
1087
+ },
1088
+ {
1089
+ "epoch": 0.3269137847725547,
1090
+ "grad_norm": 9.672047599451616e-05,
1091
+ "learning_rate": 2.53294383204969e-05,
1092
+ "loss": 0.0,
1093
+ "step": 148
1094
+ },
1095
+ {
1096
+ "epoch": 0.3291226616966936,
1097
+ "grad_norm": 9.55902723944746e-05,
1098
+ "learning_rate": 2.4779847031884175e-05,
1099
+ "loss": 0.0,
1100
+ "step": 149
1101
+ },
1102
+ {
1103
+ "epoch": 0.3313315386208325,
1104
+ "grad_norm": 9.5816605607979e-05,
1105
+ "learning_rate": 2.423834640897079e-05,
1106
+ "loss": 0.0,
1107
+ "step": 150
1108
+ },
1109
+ {
1110
+ "epoch": 0.3313315386208325,
1111
+ "eval_loss": 2.842964704541373e-06,
1112
+ "eval_runtime": 2.1034,
1113
+ "eval_samples_per_second": 23.771,
1114
+ "eval_steps_per_second": 6.18,
1115
+ "step": 150
1116
  }
1117
  ],
1118
  "logging_steps": 1,
 
1141
  "attributes": {}
1142
  }
1143
  },
1144
+ "total_flos": 4.87442683527168e+17,
1145
  "train_batch_size": 1,
1146
  "trial_name": null,
1147
  "trial_params": null