lesso11 commited on
Commit
1b30c1b
·
verified ·
1 Parent(s): 7603573

Training in progress, step 9000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dbdc76f4c9b984be0302eb41232d54260782b41955e9f1bbf9bb95f96a58b6f
3
  size 2373352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfe4d248a55091015dd44003f7230da96ba40bfc0bfb7c6c63512095fc60193b
3
  size 2373352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a69884c223aa8a06685705af1b8ca9f2af295b6dc7cde4da9c00c153cdf848a1
3
  size 4899962
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:493107a5ed5fc4d8569b5f3bb1e8bf556a0daaf2ca3af4822164d14a5bcf28ac
3
  size 4899962
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:090d2226d5becd4ac05cc146a931a60227af444a21ce770ca03c93e34c7b55cc
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95bd392ae4a56787ff5724cd96c68985512246001ca168d60d466f980146e191
3
  size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86795bab370d3f087ed17dee47b662a3f7e7afd3dbb186d7ada00e7f580ae03c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35f0d9e5b08bcdb9ff94ecc831a9c75c033cad53fcf1082d39ad1ef8a7a6ad63
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 11.74162483215332,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-8500",
4
- "epoch": 2.5779058306164226,
5
  "eval_steps": 500,
6
- "global_step": 8500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1341,6 +1341,84 @@
1341
  "eval_samples_per_second": 67.14,
1342
  "eval_steps_per_second": 16.791,
1343
  "step": 8500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1344
  }
1345
  ],
1346
  "logging_steps": 50,
@@ -1355,7 +1433,7 @@
1355
  "early_stopping_threshold": 0.0
1356
  },
1357
  "attributes": {
1358
- "early_stopping_patience_counter": 0
1359
  }
1360
  },
1361
  "TrainerControl": {
@@ -1364,12 +1442,12 @@
1364
  "should_evaluate": false,
1365
  "should_log": false,
1366
  "should_save": true,
1367
- "should_training_stop": false
1368
  },
1369
  "attributes": {}
1370
  }
1371
  },
1372
- "total_flos": 9372455976960000.0,
1373
  "train_batch_size": 4,
1374
  "trial_name": null,
1375
  "trial_params": null
 
1
  {
2
  "best_metric": 11.74162483215332,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-8500",
4
+ "epoch": 2.729547350064448,
5
  "eval_steps": 500,
6
+ "global_step": 9000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1341
  "eval_samples_per_second": 67.14,
1342
  "eval_steps_per_second": 16.791,
1343
  "step": 8500
1344
+ },
1345
+ {
1346
+ "epoch": 2.593069982561225,
1347
+ "grad_norm": 0.088604636490345,
1348
+ "learning_rate": 1.3281706212767156e-06,
1349
+ "loss": 11.7569,
1350
+ "step": 8550
1351
+ },
1352
+ {
1353
+ "epoch": 2.6082341345060276,
1354
+ "grad_norm": 0.08613212406635284,
1355
+ "learning_rate": 1.049882140725071e-06,
1356
+ "loss": 11.7495,
1357
+ "step": 8600
1358
+ },
1359
+ {
1360
+ "epoch": 2.62339828645083,
1361
+ "grad_norm": 0.09652528911828995,
1362
+ "learning_rate": 8.041291538967689e-07,
1363
+ "loss": 11.7611,
1364
+ "step": 8650
1365
+ },
1366
+ {
1367
+ "epoch": 2.638562438395633,
1368
+ "grad_norm": 0.08829577267169952,
1369
+ "learning_rate": 5.909882111579044e-07,
1370
+ "loss": 11.7421,
1371
+ "step": 8700
1372
+ },
1373
+ {
1374
+ "epoch": 2.653726590340435,
1375
+ "grad_norm": 0.08825016021728516,
1376
+ "learning_rate": 4.1052570444699797e-07,
1377
+ "loss": 11.7531,
1378
+ "step": 8750
1379
+ },
1380
+ {
1381
+ "epoch": 2.668890742285238,
1382
+ "grad_norm": 0.09674602001905441,
1383
+ "learning_rate": 2.627978465944208e-07,
1384
+ "loss": 11.7567,
1385
+ "step": 8800
1386
+ },
1387
+ {
1388
+ "epoch": 2.68405489423004,
1389
+ "grad_norm": 0.09206220507621765,
1390
+ "learning_rate": 1.4785065381238165e-07,
1391
+ "loss": 11.7585,
1392
+ "step": 8850
1393
+ },
1394
+ {
1395
+ "epoch": 2.699219046174843,
1396
+ "grad_norm": 0.08380258083343506,
1397
+ "learning_rate": 6.571993136129994e-08,
1398
+ "loss": 11.7515,
1399
+ "step": 8900
1400
+ },
1401
+ {
1402
+ "epoch": 2.714383198119645,
1403
+ "grad_norm": 0.0812031477689743,
1404
+ "learning_rate": 1.6431262396651435e-08,
1405
+ "loss": 11.7497,
1406
+ "step": 8950
1407
+ },
1408
+ {
1409
+ "epoch": 2.729547350064448,
1410
+ "grad_norm": 0.08813946694135666,
1411
+ "learning_rate": 0.0,
1412
+ "loss": 11.7544,
1413
+ "step": 9000
1414
+ },
1415
+ {
1416
+ "epoch": 2.729547350064448,
1417
+ "eval_loss": 11.74162769317627,
1418
+ "eval_runtime": 88.5243,
1419
+ "eval_samples_per_second": 62.74,
1420
+ "eval_steps_per_second": 15.691,
1421
+ "step": 9000
1422
  }
1423
  ],
1424
  "logging_steps": 50,
 
1433
  "early_stopping_threshold": 0.0
1434
  },
1435
  "attributes": {
1436
+ "early_stopping_patience_counter": 1
1437
  }
1438
  },
1439
  "TrainerControl": {
 
1442
  "should_evaluate": false,
1443
  "should_log": false,
1444
  "should_save": true,
1445
+ "should_training_stop": true
1446
  },
1447
  "attributes": {}
1448
  }
1449
  },
1450
+ "total_flos": 9923764764672000.0,
1451
  "train_batch_size": 4,
1452
  "trial_name": null,
1453
  "trial_params": null