dada22231 commited on
Commit
2def4fa
·
verified ·
1 Parent(s): cd5ca8c

Training in progress, step 175, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:133647ff75619b2ebf4b08b1907148f54f64d50a6540006b8b0cfd3d268e67a7
3
  size 166182480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87f943437f6682f7ee7de160a0fb0dd7f1a7f9b7c1e5cb4bc2beb7dea62371d6
3
  size 166182480
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b780e0bdd236c892f8c7d0c7d4afe550217aba06b3560dacba4ce990ecece4b
3
  size 332574358
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:844a482769f0e2abd40ad13846907ca880baf715442ba45b6595fb1d2992d4c5
3
  size 332574358
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4d6427d6aa68c158ad80f76946f6fc44629c698f67492b8fb6d643dbc5eef98
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eafd79e713363198747e3075dba58fd79e2d3d1300105438732b2e0ee683c97
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2caf50116db0368fab40b4e34ae3b0c9f1a86e9da0bc7a8de5ff785b985711a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4a1fd21f28f132126ba0a3c0e9b7867579d24daca77e2d8e14bc16542a7a830
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a955dabcc60ffe276aa50f92c60f21389f3a28e5589c5c081037f1d66876c9a3
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d98523765fbe4ee7d22ab0a5ea68e4b5406a21e1b54e1b37a5358bbc38419dcd
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1380d7d4f110826396b67714006bc18fde61d5c81581b3cc018f32e55772014
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1907ecded23c99f86058c3975d42bb30b996a62807a726ac4088c81af5fe16c9
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:148dc60fce7a98d209219ab65863631c40408a69d4537618751b3440fe762b40
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9f689e3fd5df575eecb35b5e9b27f49e75a860ed67ef4150a0d2749c11e5d42
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.842964704541373e-06,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 0.3313315386208325,
5
  "eval_steps": 25,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1113,6 +1113,189 @@
1113
  "eval_samples_per_second": 23.771,
1114
  "eval_steps_per_second": 6.18,
1115
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1116
  }
1117
  ],
1118
  "logging_steps": 1,
@@ -1141,7 +1324,7 @@
1141
  "attributes": {}
1142
  }
1143
  },
1144
- "total_flos": 4.87442683527168e+17,
1145
  "train_batch_size": 1,
1146
  "trial_name": null,
1147
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.7154196686751675e-06,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-175",
4
+ "epoch": 0.38655346172430455,
5
  "eval_steps": 25,
6
+ "global_step": 175,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1113
  "eval_samples_per_second": 23.771,
1114
  "eval_steps_per_second": 6.18,
1115
  "step": 150
1116
+ },
1117
+ {
1118
+ "epoch": 0.3335404155449714,
1119
+ "grad_norm": 0.000154680063133128,
1120
+ "learning_rate": 2.370508142456748e-05,
1121
+ "loss": 0.0,
1122
+ "step": 151
1123
+ },
1124
+ {
1125
+ "epoch": 0.3357492924691102,
1126
+ "grad_norm": 0.00023134062939789146,
1127
+ "learning_rate": 2.3180194846605367e-05,
1128
+ "loss": 0.0,
1129
+ "step": 152
1130
+ },
1131
+ {
1132
+ "epoch": 0.33795816939324913,
1133
+ "grad_norm": 0.00024014056543819606,
1134
+ "learning_rate": 2.2663827199913588e-05,
1135
+ "loss": 0.0,
1136
+ "step": 153
1137
+ },
1138
+ {
1139
+ "epoch": 0.340167046317388,
1140
+ "grad_norm": 0.00021546079369727522,
1141
+ "learning_rate": 2.215611672859741e-05,
1142
+ "loss": 0.0,
1143
+ "step": 154
1144
+ },
1145
+ {
1146
+ "epoch": 0.3423759232415269,
1147
+ "grad_norm": 0.00024820741964504123,
1148
+ "learning_rate": 2.165719935902685e-05,
1149
+ "loss": 0.0,
1150
+ "step": 155
1151
+ },
1152
+ {
1153
+ "epoch": 0.3445848001656658,
1154
+ "grad_norm": 0.00044995194184593856,
1155
+ "learning_rate": 2.1167208663446025e-05,
1156
+ "loss": 0.0,
1157
+ "step": 156
1158
+ },
1159
+ {
1160
+ "epoch": 0.34679367708980463,
1161
+ "grad_norm": 0.00015662639634683728,
1162
+ "learning_rate": 2.068627582421254e-05,
1163
+ "loss": 0.0,
1164
+ "step": 157
1165
+ },
1166
+ {
1167
+ "epoch": 0.34900255401394353,
1168
+ "grad_norm": 9.385013981955126e-05,
1169
+ "learning_rate": 2.0214529598676836e-05,
1170
+ "loss": 0.0,
1171
+ "step": 158
1172
+ },
1173
+ {
1174
+ "epoch": 0.35121143093808244,
1175
+ "grad_norm": 9.166620293399319e-05,
1176
+ "learning_rate": 1.9752096284710738e-05,
1177
+ "loss": 0.0,
1178
+ "step": 159
1179
+ },
1180
+ {
1181
+ "epoch": 0.3534203078622213,
1182
+ "grad_norm": 9.074221452465281e-05,
1183
+ "learning_rate": 1.9299099686894423e-05,
1184
+ "loss": 0.0,
1185
+ "step": 160
1186
+ },
1187
+ {
1188
+ "epoch": 0.3556291847863602,
1189
+ "grad_norm": 9.394592780154198e-05,
1190
+ "learning_rate": 1.8855661083370986e-05,
1191
+ "loss": 0.0,
1192
+ "step": 161
1193
+ },
1194
+ {
1195
+ "epoch": 0.3578380617104991,
1196
+ "grad_norm": 9.24318956094794e-05,
1197
+ "learning_rate": 1.842189919337732e-05,
1198
+ "loss": 0.0,
1199
+ "step": 162
1200
+ },
1201
+ {
1202
+ "epoch": 0.36004693863463794,
1203
+ "grad_norm": 0.00011218286090297624,
1204
+ "learning_rate": 1.7997930145460136e-05,
1205
+ "loss": 0.0,
1206
+ "step": 163
1207
+ },
1208
+ {
1209
+ "epoch": 0.36225581555877684,
1210
+ "grad_norm": 0.00019017797603737563,
1211
+ "learning_rate": 1.758386744638546e-05,
1212
+ "loss": 0.0,
1213
+ "step": 164
1214
+ },
1215
+ {
1216
+ "epoch": 0.36446469248291574,
1217
+ "grad_norm": 0.00019140807853545994,
1218
+ "learning_rate": 1.7179821950750284e-05,
1219
+ "loss": 0.0,
1220
+ "step": 165
1221
+ },
1222
+ {
1223
+ "epoch": 0.3666735694070546,
1224
+ "grad_norm": 0.00020481123647186905,
1225
+ "learning_rate": 1.6785901831303956e-05,
1226
+ "loss": 0.0,
1227
+ "step": 166
1228
+ },
1229
+ {
1230
+ "epoch": 0.3688824463311935,
1231
+ "grad_norm": 0.00022451799304690212,
1232
+ "learning_rate": 1.6402212549987762e-05,
1233
+ "loss": 0.0,
1234
+ "step": 167
1235
+ },
1236
+ {
1237
+ "epoch": 0.37109132325533234,
1238
+ "grad_norm": 0.00030974153196439147,
1239
+ "learning_rate": 1.602885682970026e-05,
1240
+ "loss": 0.0,
1241
+ "step": 168
1242
+ },
1243
+ {
1244
+ "epoch": 0.37330020017947124,
1245
+ "grad_norm": 0.00048727114335633814,
1246
+ "learning_rate": 1.566593462679586e-05,
1247
+ "loss": 0.0,
1248
+ "step": 169
1249
+ },
1250
+ {
1251
+ "epoch": 0.37550907710361014,
1252
+ "grad_norm": 9.068298095371574e-05,
1253
+ "learning_rate": 1.531354310432403e-05,
1254
+ "loss": 0.0,
1255
+ "step": 170
1256
+ },
1257
+ {
1258
+ "epoch": 0.377717954027749,
1259
+ "grad_norm": 9.067923383554444e-05,
1260
+ "learning_rate": 1.4971776606016482e-05,
1261
+ "loss": 0.0,
1262
+ "step": 171
1263
+ },
1264
+ {
1265
+ "epoch": 0.3799268309518879,
1266
+ "grad_norm": 9.05448105186224e-05,
1267
+ "learning_rate": 1.464072663102903e-05,
1268
+ "loss": 0.0,
1269
+ "step": 172
1270
+ },
1271
+ {
1272
+ "epoch": 0.3821357078760268,
1273
+ "grad_norm": 9.20113452593796e-05,
1274
+ "learning_rate": 1.4320481809445051e-05,
1275
+ "loss": 0.0,
1276
+ "step": 173
1277
+ },
1278
+ {
1279
+ "epoch": 0.38434458480016565,
1280
+ "grad_norm": 9.098761074710637e-05,
1281
+ "learning_rate": 1.4011127878547087e-05,
1282
+ "loss": 0.0,
1283
+ "step": 174
1284
+ },
1285
+ {
1286
+ "epoch": 0.38655346172430455,
1287
+ "grad_norm": 9.139224857790396e-05,
1288
+ "learning_rate": 1.3712747659862896e-05,
1289
+ "loss": 0.0,
1290
+ "step": 175
1291
+ },
1292
+ {
1293
+ "epoch": 0.38655346172430455,
1294
+ "eval_loss": 2.7154196686751675e-06,
1295
+ "eval_runtime": 2.0847,
1296
+ "eval_samples_per_second": 23.985,
1297
+ "eval_steps_per_second": 6.236,
1298
+ "step": 175
1299
  }
1300
  ],
1301
  "logging_steps": 1,
 
1324
  "attributes": {}
1325
  }
1326
  },
1327
+ "total_flos": 5.68683130781696e+17,
1328
  "train_batch_size": 1,
1329
  "trial_name": null,
1330
  "trial_params": null