cilooor commited on
Commit
111faab
·
verified ·
1 Parent(s): 63b1c34

Training in progress, step 169, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de527414683c8b0bed0a78c4f0285fc12cf70935b48b0d7f6ad89125b80d8cd2
3
  size 156926880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb205bf96bc1e449692c3c3cf7a04e77e8774eef99517674be52d2b58dc00d3a
3
  size 156926880
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5c623cdad42407d532f2cd2b0e539c53401f7a0e948bedb05230e9fb8a66875
3
  size 79968772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de18f2c6fd17d0915b212e95220c9e0ab2f31466db846c413f841ab1844abaec
3
  size 79968772
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a5e41997852137eff5e8bdd6b2b44e24799f8f0f4f9c34656630f501c7c37a1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47b4bd283c9337c504f1be1381e1d6ff8b5e0796f19065ccf9b8185a17347f88
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e77d360cd203b2fed24862c9206a3e5f5157856fbd9bf2643f1d3eb87c6e566
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce7d2e8219e1343da35c24f4c17225d06d859aa5c80ad461abea4a9219c97b31
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.04182600975036621,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 2.6696230598669626,
5
  "eval_steps": 50,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1089,6 +1089,139 @@
1089
  "eval_samples_per_second": 28.094,
1090
  "eval_steps_per_second": 7.098,
1091
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1092
  }
1093
  ],
1094
  "logging_steps": 1,
@@ -1112,12 +1245,12 @@
1112
  "should_evaluate": false,
1113
  "should_log": false,
1114
  "should_save": true,
1115
- "should_training_stop": false
1116
  },
1117
  "attributes": {}
1118
  }
1119
  },
1120
- "total_flos": 5.96048917561344e+16,
1121
  "train_batch_size": 4,
1122
  "trial_name": null,
1123
  "trial_params": null
 
1
  {
2
  "best_metric": 0.04182600975036621,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
+ "epoch": 3.011086474501109,
5
  "eval_steps": 50,
6
+ "global_step": 169,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1089
  "eval_samples_per_second": 28.094,
1090
  "eval_steps_per_second": 7.098,
1091
  "step": 150
1092
+ },
1093
+ {
1094
+ "epoch": 2.6873614190687363,
1095
+ "grad_norm": 0.46853962540626526,
1096
+ "learning_rate": 2.85663688337436e-06,
1097
+ "loss": 0.0156,
1098
+ "step": 151
1099
+ },
1100
+ {
1101
+ "epoch": 2.70509977827051,
1102
+ "grad_norm": 0.45606735348701477,
1103
+ "learning_rate": 2.551858691034086e-06,
1104
+ "loss": 0.0086,
1105
+ "step": 152
1106
+ },
1107
+ {
1108
+ "epoch": 2.7228381374722836,
1109
+ "grad_norm": 0.39163070917129517,
1110
+ "learning_rate": 2.26365503861976e-06,
1111
+ "loss": 0.0061,
1112
+ "step": 153
1113
+ },
1114
+ {
1115
+ "epoch": 2.740576496674058,
1116
+ "grad_norm": 0.22401034832000732,
1117
+ "learning_rate": 1.992173140817682e-06,
1118
+ "loss": 0.0048,
1119
+ "step": 154
1120
+ },
1121
+ {
1122
+ "epoch": 2.7583148558758315,
1123
+ "grad_norm": 0.3343549370765686,
1124
+ "learning_rate": 1.737551670826774e-06,
1125
+ "loss": 0.0111,
1126
+ "step": 155
1127
+ },
1128
+ {
1129
+ "epoch": 2.776053215077605,
1130
+ "grad_norm": 0.33819764852523804,
1131
+ "learning_rate": 1.49992068952417e-06,
1132
+ "loss": 0.0101,
1133
+ "step": 156
1134
+ },
1135
+ {
1136
+ "epoch": 2.7937915742793793,
1137
+ "grad_norm": 0.3976474404335022,
1138
+ "learning_rate": 1.27940157903004e-06,
1139
+ "loss": 0.0145,
1140
+ "step": 157
1141
+ },
1142
+ {
1143
+ "epoch": 2.811529933481153,
1144
+ "grad_norm": 0.3741917014122009,
1145
+ "learning_rate": 1.0761069807054472e-06,
1146
+ "loss": 0.0116,
1147
+ "step": 158
1148
+ },
1149
+ {
1150
+ "epoch": 2.8292682926829267,
1151
+ "grad_norm": 0.6680158972740173,
1152
+ "learning_rate": 8.901407376150799e-07,
1153
+ "loss": 0.0174,
1154
+ "step": 159
1155
+ },
1156
+ {
1157
+ "epoch": 2.847006651884701,
1158
+ "grad_norm": 0.24637554585933685,
1159
+ "learning_rate": 7.215978414840828e-07,
1160
+ "loss": 0.0068,
1161
+ "step": 160
1162
+ },
1163
+ {
1164
+ "epoch": 2.8647450110864745,
1165
+ "grad_norm": 0.5326627492904663,
1166
+ "learning_rate": 5.705643841762314e-07,
1167
+ "loss": 0.0114,
1168
+ "step": 161
1169
+ },
1170
+ {
1171
+ "epoch": 2.882483370288248,
1172
+ "grad_norm": 0.42392855882644653,
1173
+ "learning_rate": 4.371175137181088e-07,
1174
+ "loss": 0.0097,
1175
+ "step": 162
1176
+ },
1177
+ {
1178
+ "epoch": 2.9002217294900223,
1179
+ "grad_norm": 0.5340198874473572,
1180
+ "learning_rate": 3.213253948918315e-07,
1181
+ "loss": 0.016,
1182
+ "step": 163
1183
+ },
1184
+ {
1185
+ "epoch": 2.917960088691796,
1186
+ "grad_norm": 1.3395206928253174,
1187
+ "learning_rate": 2.232471744164116e-07,
1188
+ "loss": 0.0197,
1189
+ "step": 164
1190
+ },
1191
+ {
1192
+ "epoch": 2.9356984478935697,
1193
+ "grad_norm": 0.18678541481494904,
1194
+ "learning_rate": 1.4293295073557144e-07,
1195
+ "loss": 0.0046,
1196
+ "step": 165
1197
+ },
1198
+ {
1199
+ "epoch": 2.953436807095344,
1200
+ "grad_norm": 0.3585246801376343,
1201
+ "learning_rate": 8.042374842740341e-08,
1202
+ "loss": 0.0088,
1203
+ "step": 166
1204
+ },
1205
+ {
1206
+ "epoch": 2.9711751662971175,
1207
+ "grad_norm": 0.2283589392900467,
1208
+ "learning_rate": 3.575149724897308e-08,
1209
+ "loss": 0.0052,
1210
+ "step": 167
1211
+ },
1212
+ {
1213
+ "epoch": 2.988913525498891,
1214
+ "grad_norm": 0.37096989154815674,
1215
+ "learning_rate": 8.939015826586738e-09,
1216
+ "loss": 0.007,
1217
+ "step": 168
1218
+ },
1219
+ {
1220
+ "epoch": 3.011086474501109,
1221
+ "grad_norm": 1.0814924240112305,
1222
+ "learning_rate": 0.0,
1223
+ "loss": 0.0184,
1224
+ "step": 169
1225
  }
1226
  ],
1227
  "logging_steps": 1,
 
1245
  "should_evaluate": false,
1246
  "should_log": false,
1247
  "should_save": true,
1248
+ "should_training_stop": true
1249
  },
1250
  "attributes": {}
1251
  }
1252
  },
1253
+ "total_flos": 6.715484471191142e+16,
1254
  "train_batch_size": 4,
1255
  "trial_name": null,
1256
  "trial_params": null