wcyat commited on
Commit
96f9de0
·
verified ·
1 Parent(s): c3d7f24

Training in progress, step 1655, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95ceecdd0b1aa2dbf1ec2f23a4ddd928595c382511d085f0d1663c9722b61ee1
3
  size 1304192904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:536b36c5409b325886b3735dcf0e4fb0c68ae9370a4055c3f8628339b2181e94
3
  size 1304192904
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6a4cdb23fbf57119940af942e16735f7e6ed513337f644de9af3a2da6bc01e6
3
  size 2608620781
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:742c802cf0eb2db4f090ace56f50c7ab57588c2a890ff9badd5f758bcb319a77
3
  size 2608620781
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0dde6cfb71c791201a1b9da1c3a5b4ebadc80456fb340adc64cdc8144e3ec77
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77f6e96927e70b3990c845f9f37f5cfa36bd0d8da4eaedfa781fe14d91872eb9
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94619124ebef073ef434567921b91695bdc23ddab6d107310abf209634914efe
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:769c097ffaacb42806711118e99a46206586325f4d499f09baf2724f8a595f75
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.2986587882041931,
3
  "best_model_checkpoint": "./results/checkpoint-180",
4
- "epoch": 4.531722054380665,
5
  "eval_steps": 20,
6
- "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1207,6 +1207,118 @@
1207
  "eval_samples_per_second": 9.87,
1208
  "eval_steps_per_second": 2.516,
1209
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1210
  }
1211
  ],
1212
  "logging_steps": 20,
@@ -1221,12 +1333,12 @@
1221
  "should_evaluate": false,
1222
  "should_log": false,
1223
  "should_save": true,
1224
- "should_training_stop": false
1225
  },
1226
  "attributes": {}
1227
  }
1228
  },
1229
- "total_flos": 4844949162060756.0,
1230
  "train_batch_size": 4,
1231
  "trial_name": null,
1232
  "trial_params": null
 
1
  {
2
  "best_metric": 0.2986587882041931,
3
  "best_model_checkpoint": "./results/checkpoint-180",
4
+ "epoch": 5.0,
5
  "eval_steps": 20,
6
+ "global_step": 1655,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1207
  "eval_samples_per_second": 9.87,
1208
  "eval_steps_per_second": 2.516,
1209
  "step": 1500
1210
+ },
1211
+ {
1212
+ "epoch": 4.59214501510574,
1213
+ "grad_norm": 0.01508911419659853,
1214
+ "learning_rate": 1.6314199395770395e-06,
1215
+ "loss": 0.2079,
1216
+ "step": 1520
1217
+ },
1218
+ {
1219
+ "epoch": 4.59214501510574,
1220
+ "eval_accuracy": 0.9150326797385621,
1221
+ "eval_loss": 0.4994471073150635,
1222
+ "eval_runtime": 16.27,
1223
+ "eval_samples_per_second": 9.404,
1224
+ "eval_steps_per_second": 2.397,
1225
+ "step": 1520
1226
+ },
1227
+ {
1228
+ "epoch": 4.652567975830816,
1229
+ "grad_norm": 0.037796132266521454,
1230
+ "learning_rate": 1.3897280966767373e-06,
1231
+ "loss": 0.1423,
1232
+ "step": 1540
1233
+ },
1234
+ {
1235
+ "epoch": 4.652567975830816,
1236
+ "eval_accuracy": 0.9150326797385621,
1237
+ "eval_loss": 0.4834767282009125,
1238
+ "eval_runtime": 15.2545,
1239
+ "eval_samples_per_second": 10.03,
1240
+ "eval_steps_per_second": 2.557,
1241
+ "step": 1540
1242
+ },
1243
+ {
1244
+ "epoch": 4.712990936555891,
1245
+ "grad_norm": 0.04188241437077522,
1246
+ "learning_rate": 1.1480362537764353e-06,
1247
+ "loss": 0.0009,
1248
+ "step": 1560
1249
+ },
1250
+ {
1251
+ "epoch": 4.712990936555891,
1252
+ "eval_accuracy": 0.9084967320261438,
1253
+ "eval_loss": 0.4825386703014374,
1254
+ "eval_runtime": 15.7256,
1255
+ "eval_samples_per_second": 9.729,
1256
+ "eval_steps_per_second": 2.48,
1257
+ "step": 1560
1258
+ },
1259
+ {
1260
+ "epoch": 4.7734138972809665,
1261
+ "grad_norm": 0.00891907513141632,
1262
+ "learning_rate": 9.063444108761329e-07,
1263
+ "loss": 0.0017,
1264
+ "step": 1580
1265
+ },
1266
+ {
1267
+ "epoch": 4.7734138972809665,
1268
+ "eval_accuracy": 0.9084967320261438,
1269
+ "eval_loss": 0.4918363392353058,
1270
+ "eval_runtime": 15.5035,
1271
+ "eval_samples_per_second": 9.869,
1272
+ "eval_steps_per_second": 2.516,
1273
+ "step": 1580
1274
+ },
1275
+ {
1276
+ "epoch": 4.833836858006042,
1277
+ "grad_norm": 0.02154299058020115,
1278
+ "learning_rate": 6.646525679758309e-07,
1279
+ "loss": 0.0648,
1280
+ "step": 1600
1281
+ },
1282
+ {
1283
+ "epoch": 4.833836858006042,
1284
+ "eval_accuracy": 0.9150326797385621,
1285
+ "eval_loss": 0.4916614294052124,
1286
+ "eval_runtime": 15.4748,
1287
+ "eval_samples_per_second": 9.887,
1288
+ "eval_steps_per_second": 2.52,
1289
+ "step": 1600
1290
+ },
1291
+ {
1292
+ "epoch": 4.8942598187311175,
1293
+ "grad_norm": 1.6405360698699951,
1294
+ "learning_rate": 4.2296072507552877e-07,
1295
+ "loss": 0.0531,
1296
+ "step": 1620
1297
+ },
1298
+ {
1299
+ "epoch": 4.8942598187311175,
1300
+ "eval_accuracy": 0.9084967320261438,
1301
+ "eval_loss": 0.49186328053474426,
1302
+ "eval_runtime": 15.5247,
1303
+ "eval_samples_per_second": 9.855,
1304
+ "eval_steps_per_second": 2.512,
1305
+ "step": 1620
1306
+ },
1307
+ {
1308
+ "epoch": 4.954682779456194,
1309
+ "grad_norm": 0.05020515248179436,
1310
+ "learning_rate": 1.812688821752266e-07,
1311
+ "loss": 0.0008,
1312
+ "step": 1640
1313
+ },
1314
+ {
1315
+ "epoch": 4.954682779456194,
1316
+ "eval_accuracy": 0.9084967320261438,
1317
+ "eval_loss": 0.49449607729911804,
1318
+ "eval_runtime": 15.5248,
1319
+ "eval_samples_per_second": 9.855,
1320
+ "eval_steps_per_second": 2.512,
1321
+ "step": 1640
1322
  }
1323
  ],
1324
  "logging_steps": 20,
 
1333
  "should_evaluate": false,
1334
  "should_log": false,
1335
  "should_save": true,
1336
+ "should_training_stop": true
1337
  },
1338
  "attributes": {}
1339
  }
1340
  },
1341
+ "total_flos": 5352059977451376.0,
1342
  "train_batch_size": 4,
1343
  "trial_name": null,
1344
  "trial_params": null