lesso06 commited on
Commit
ec09a10
·
verified ·
1 Parent(s): 327030b

Training in progress, step 9000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f28de0041a3996b92bdb106301466f87586414e5ae7e2fd505f291b51c856ec9
3
  size 2373352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d2fecf77d5e89bebc7e52b1e7ad4c3700095ba5ed6bf915b63994136730606c
3
  size 2373352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1c22ce2b4809c251cd311d01b0d15a3af6840e923cc4f5931c495ec63e7fcfe
3
  size 4899962
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5fa1150d4dd02cbaddb889caf0142f62f01de73f9465e86ec66568c57020a43
3
  size 4899962
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:270b6287a73e2067396ab6a3e2393f5969fb8398ad020b771c6503e2e2615a33
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6430fbc179aa58d1d62e103957529b88997bd94d6e982250b44b521c1bed6dbe
3
  size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30dcb9447a9b271d3d135b50e56f91526062051264deca7285bd511e97fe15b3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ac049b3a3b9b8a4c04401dcee0ce067c5d564756ad040ddd31682eb4723f5c4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 11.740056037902832,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-8000",
4
- "epoch": 2.5779058306164226,
5
  "eval_steps": 500,
6
- "global_step": 8500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1341,6 +1341,84 @@
1341
  "eval_samples_per_second": 56.958,
1342
  "eval_steps_per_second": 14.245,
1343
  "step": 8500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1344
  }
1345
  ],
1346
  "logging_steps": 50,
@@ -1355,7 +1433,7 @@
1355
  "early_stopping_threshold": 0.0
1356
  },
1357
  "attributes": {
1358
- "early_stopping_patience_counter": 1
1359
  }
1360
  },
1361
  "TrainerControl": {
@@ -1364,12 +1442,12 @@
1364
  "should_evaluate": false,
1365
  "should_log": false,
1366
  "should_save": true,
1367
- "should_training_stop": false
1368
  },
1369
  "attributes": {}
1370
  }
1371
  },
1372
- "total_flos": 9372318253056000.0,
1373
  "train_batch_size": 4,
1374
  "trial_name": null,
1375
  "trial_params": null
 
1
  {
2
  "best_metric": 11.740056037902832,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-8000",
4
+ "epoch": 2.729547350064448,
5
  "eval_steps": 500,
6
+ "global_step": 9000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1341
  "eval_samples_per_second": 56.958,
1342
  "eval_steps_per_second": 14.245,
1343
  "step": 8500
1344
+ },
1345
+ {
1346
+ "epoch": 2.593069982561225,
1347
+ "grad_norm": 0.09178922325372696,
1348
+ "learning_rate": 1.2966973838057032e-06,
1349
+ "loss": 11.7532,
1350
+ "step": 8550
1351
+ },
1352
+ {
1353
+ "epoch": 2.6082341345060276,
1354
+ "grad_norm": 0.0969797670841217,
1355
+ "learning_rate": 1.0250034170112066e-06,
1356
+ "loss": 11.7431,
1357
+ "step": 8600
1358
+ },
1359
+ {
1360
+ "epoch": 2.62339828645083,
1361
+ "grad_norm": 0.09565451741218567,
1362
+ "learning_rate": 7.850739606764662e-07,
1363
+ "loss": 11.7516,
1364
+ "step": 8650
1365
+ },
1366
+ {
1367
+ "epoch": 2.638562438395633,
1368
+ "grad_norm": 0.09562574326992035,
1369
+ "learning_rate": 5.769837511778591e-07,
1370
+ "loss": 11.7592,
1371
+ "step": 8700
1372
+ },
1373
+ {
1374
+ "epoch": 2.653726590340435,
1375
+ "grad_norm": 0.10010024160146713,
1376
+ "learning_rate": 4.0079760718522074e-07,
1377
+ "loss": 11.7424,
1378
+ "step": 8750
1379
+ },
1380
+ {
1381
+ "epoch": 2.668890742285238,
1382
+ "grad_norm": 0.09608300030231476,
1383
+ "learning_rate": 2.5657040947133024e-07,
1384
+ "loss": 11.7489,
1385
+ "step": 8800
1386
+ },
1387
+ {
1388
+ "epoch": 2.68405489423004,
1389
+ "grad_norm": 0.0922103077173233,
1390
+ "learning_rate": 1.443470838168276e-07,
1391
+ "loss": 11.7536,
1392
+ "step": 8850
1393
+ },
1394
+ {
1395
+ "epoch": 2.699219046174843,
1396
+ "grad_norm": 0.08855465799570084,
1397
+ "learning_rate": 6.416258701624544e-08,
1398
+ "loss": 11.7358,
1399
+ "step": 8900
1400
+ },
1401
+ {
1402
+ "epoch": 2.714383198119645,
1403
+ "grad_norm": 0.09823824465274811,
1404
+ "learning_rate": 1.6041895989147846e-08,
1405
+ "loss": 11.7648,
1406
+ "step": 8950
1407
+ },
1408
+ {
1409
+ "epoch": 2.729547350064448,
1410
+ "grad_norm": 0.09125322103500366,
1411
+ "learning_rate": 0.0,
1412
+ "loss": 11.7279,
1413
+ "step": 9000
1414
+ },
1415
+ {
1416
+ "epoch": 2.729547350064448,
1417
+ "eval_loss": 11.740180015563965,
1418
+ "eval_runtime": 82.7885,
1419
+ "eval_samples_per_second": 67.087,
1420
+ "eval_steps_per_second": 16.778,
1421
+ "step": 9000
1422
  }
1423
  ],
1424
  "logging_steps": 50,
 
1433
  "early_stopping_threshold": 0.0
1434
  },
1435
  "attributes": {
1436
+ "early_stopping_patience_counter": 2
1437
  }
1438
  },
1439
  "TrainerControl": {
 
1442
  "should_evaluate": false,
1443
  "should_log": false,
1444
  "should_save": true,
1445
+ "should_training_stop": true
1446
  },
1447
  "attributes": {}
1448
  }
1449
  },
1450
+ "total_flos": 9923902488576000.0,
1451
  "train_batch_size": 4,
1452
  "trial_name": null,
1453
  "trial_params": null