lesso09 commited on
Commit
284e4e1
·
verified ·
1 Parent(s): ffe60a5

Training in progress, step 9000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9a09bf824d1bff7f721989176c12ec92b02b40ca78e44d7c8e4c738314072ac
3
  size 2373352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb70ce200be730fd5499defcbfcb51225d5644fc671500c2d1e76726ae7a7b69
3
  size 2373352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e83caef5497bb67bd04b39079e16ace1f78ec4498cbd170d4a7b16c79af73738
3
  size 4899962
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:880fe8b8b6ca7aa262d4bc8e11156fdeb25e208eed751939d85c62702edcc8a6
3
  size 4899962
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8646e34fc890e21269375270a6e0ce2a32bcc2b1ad55fb196f6b1c226c2916f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a517b9ab4a35704cbd42e43ce8e92db5b3f21b720728caa748dca82d0f6c7c9
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04dd85931f65b5c9f16df9d76eecddafd1b76e4a002da7e064432627ec5efeee
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:183dda8021864ec2543194942c7e9cf691d24dcbf84a0d87f80ed861dca5e467
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 11.543472290039062,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-8500",
4
- "epoch": 2.443406395975566,
5
  "eval_steps": 500,
6
- "global_step": 8500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1341,6 +1341,84 @@
1341
  "eval_samples_per_second": 66.152,
1342
  "eval_steps_per_second": 16.541,
1343
  "step": 8500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1344
  }
1345
  ],
1346
  "logging_steps": 50,
@@ -1355,7 +1433,7 @@
1355
  "early_stopping_threshold": 0.0
1356
  },
1357
  "attributes": {
1358
- "early_stopping_patience_counter": 0
1359
  }
1360
  },
1361
  "TrainerControl": {
@@ -1364,12 +1442,12 @@
1364
  "should_evaluate": false,
1365
  "should_log": false,
1366
  "should_save": true,
1367
- "should_training_stop": false
1368
  },
1369
  "attributes": {}
1370
  }
1371
  },
1372
- "total_flos": 9372593700864000.0,
1373
  "train_batch_size": 4,
1374
  "trial_name": null,
1375
  "trial_params": null
 
1
  {
2
  "best_metric": 11.543472290039062,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-8500",
4
+ "epoch": 2.5871361839741285,
5
  "eval_steps": 500,
6
+ "global_step": 9000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1341
  "eval_samples_per_second": 66.152,
1342
  "eval_steps_per_second": 16.541,
1343
  "step": 8500
1344
+ },
1345
+ {
1346
+ "epoch": 2.457779374775422,
1347
+ "grad_norm": 0.19527162611484528,
1348
+ "learning_rate": 1.3155813262883107e-06,
1349
+ "loss": 11.552,
1350
+ "step": 8550
1351
+ },
1352
+ {
1353
+ "epoch": 2.4721523535752783,
1354
+ "grad_norm": 0.20165836811065674,
1355
+ "learning_rate": 1.0399306512395251e-06,
1356
+ "loss": 11.5652,
1357
+ "step": 8600
1358
+ },
1359
+ {
1360
+ "epoch": 2.486525332375135,
1361
+ "grad_norm": 0.20157189667224884,
1362
+ "learning_rate": 7.965070766086479e-07,
1363
+ "loss": 11.5603,
1364
+ "step": 8650
1365
+ },
1366
+ {
1367
+ "epoch": 2.500898311174991,
1368
+ "grad_norm": 0.2076597958803177,
1369
+ "learning_rate": 5.853864271658863e-07,
1370
+ "loss": 11.5602,
1371
+ "step": 8700
1372
+ },
1373
+ {
1374
+ "epoch": 2.515271289974847,
1375
+ "grad_norm": 0.2005164921283722,
1376
+ "learning_rate": 4.066344655422871e-07,
1377
+ "loss": 11.5624,
1378
+ "step": 8750
1379
+ },
1380
+ {
1381
+ "epoch": 2.5296442687747036,
1382
+ "grad_norm": 0.1953943967819214,
1383
+ "learning_rate": 2.603068717451846e-07,
1384
+ "loss": 11.5487,
1385
+ "step": 8800
1386
+ },
1387
+ {
1388
+ "epoch": 2.5440172475745597,
1389
+ "grad_norm": 0.22445988655090332,
1390
+ "learning_rate": 1.4644922581416003e-07,
1391
+ "loss": 11.5831,
1392
+ "step": 8850
1393
+ },
1394
+ {
1395
+ "epoch": 2.5583902263744163,
1396
+ "grad_norm": 0.2075551301240921,
1397
+ "learning_rate": 6.509699362327815e-08,
1398
+ "loss": 11.5445,
1399
+ "step": 8900
1400
+ },
1401
+ {
1402
+ "epoch": 2.5727632051742724,
1403
+ "grad_norm": 0.19453804194927216,
1404
+ "learning_rate": 1.6275515833650003e-08,
1405
+ "loss": 11.5572,
1406
+ "step": 8950
1407
+ },
1408
+ {
1409
+ "epoch": 2.5871361839741285,
1410
+ "grad_norm": 0.2139836996793747,
1411
+ "learning_rate": 0.0,
1412
+ "loss": 11.5535,
1413
+ "step": 9000
1414
+ },
1415
+ {
1416
+ "epoch": 2.5871361839741285,
1417
+ "eval_loss": 11.543502807617188,
1418
+ "eval_runtime": 128.3762,
1419
+ "eval_samples_per_second": 45.639,
1420
+ "eval_steps_per_second": 11.412,
1421
+ "step": 9000
1422
  }
1423
  ],
1424
  "logging_steps": 50,
 
1433
  "early_stopping_threshold": 0.0
1434
  },
1435
  "attributes": {
1436
+ "early_stopping_patience_counter": 1
1437
  }
1438
  },
1439
  "TrainerControl": {
 
1442
  "should_evaluate": false,
1443
  "should_log": false,
1444
  "should_save": true,
1445
+ "should_training_stop": true
1446
  },
1447
  "attributes": {}
1448
  }
1449
  },
1450
+ "total_flos": 9923902488576000.0,
1451
  "train_batch_size": 4,
1452
  "trial_name": null,
1453
  "trial_params": null