besimray commited on
Commit
5d28a66
·
verified ·
1 Parent(s): dfeec4c

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77a8c1d55ad0560d1fea317d62755fa3ac455956550bdc8beebf8fdb5cbad3e7
3
  size 90207248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d787544b08ab0d4cade0305dad7f50350a2c24c6c0ee0b603d14c35c509da9f
3
  size 90207248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c7035dcff0de3120190b5471e24169132d7aba9ca8b90a8256fe59a3a6c52f0
3
  size 46057082
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4ee7fc5f75ec00f16358de8c2685a30e8a2532aa952083a168fe80d6f9bcaeb
3
  size 46057082
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e31a2c3d0797ac580a92fccc8f6f77fb2572c5039b438dfe92ee0b2018c8aaea
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99096d4e8555c6ef597909d68cc884f8aec314568300e99269d6d682793a4a68
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4598db665e6346ba448110213cdb9bf4e0fd12577c6b8ba0e9bdef835e311c10
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a55eea102467b61b8b63052cdc819cee63bc2a6becf7451f13fc332baf0514f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7732155919075012,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-170",
4
- "epoch": 1.1326378539493294,
5
  "eval_steps": 10,
6
- "global_step": 190,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1497,6 +1497,84 @@
1497
  "eval_samples_per_second": 7.273,
1498
  "eval_steps_per_second": 1.479,
1499
  "step": 190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1500
  }
1501
  ],
1502
  "logging_steps": 1,
@@ -1511,7 +1589,7 @@
1511
  "early_stopping_threshold": 0.0
1512
  },
1513
  "attributes": {
1514
- "early_stopping_patience_counter": 2
1515
  }
1516
  },
1517
  "TrainerControl": {
@@ -1520,12 +1598,12 @@
1520
  "should_evaluate": false,
1521
  "should_log": false,
1522
  "should_save": true,
1523
- "should_training_stop": false
1524
  },
1525
  "attributes": {}
1526
  }
1527
  },
1528
- "total_flos": 9.288846023432602e+16,
1529
  "train_batch_size": 5,
1530
  "trial_name": null,
1531
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7732155919075012,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-170",
4
+ "epoch": 1.1922503725782414,
5
  "eval_steps": 10,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1497
  "eval_samples_per_second": 7.273,
1498
  "eval_steps_per_second": 1.479,
1499
  "step": 190
1500
+ },
1501
+ {
1502
+ "epoch": 1.1385991058122205,
1503
+ "grad_norm": 0.2825652062892914,
1504
+ "learning_rate": 0.0001773311242813613,
1505
+ "loss": 0.7241,
1506
+ "step": 191
1507
+ },
1508
+ {
1509
+ "epoch": 1.1445603576751118,
1510
+ "grad_norm": 0.22261716425418854,
1511
+ "learning_rate": 0.00017709000624184162,
1512
+ "loss": 0.6043,
1513
+ "step": 192
1514
+ },
1515
+ {
1516
+ "epoch": 1.150521609538003,
1517
+ "grad_norm": 0.2786444425582886,
1518
+ "learning_rate": 0.00017684777842299205,
1519
+ "loss": 0.8017,
1520
+ "step": 193
1521
+ },
1522
+ {
1523
+ "epoch": 1.1564828614008942,
1524
+ "grad_norm": 0.28150370717048645,
1525
+ "learning_rate": 0.0001766044443118978,
1526
+ "loss": 0.6926,
1527
+ "step": 194
1528
+ },
1529
+ {
1530
+ "epoch": 1.1624441132637853,
1531
+ "grad_norm": 0.24119311571121216,
1532
+ "learning_rate": 0.0001763600074115703,
1533
+ "loss": 0.5431,
1534
+ "step": 195
1535
+ },
1536
+ {
1537
+ "epoch": 1.1684053651266766,
1538
+ "grad_norm": 0.25659507513046265,
1539
+ "learning_rate": 0.00017611447124089649,
1540
+ "loss": 0.7129,
1541
+ "step": 196
1542
+ },
1543
+ {
1544
+ "epoch": 1.174366616989568,
1545
+ "grad_norm": 0.2532925605773926,
1546
+ "learning_rate": 0.00017586783933458834,
1547
+ "loss": 0.7385,
1548
+ "step": 197
1549
+ },
1550
+ {
1551
+ "epoch": 1.180327868852459,
1552
+ "grad_norm": 0.2979128360748291,
1553
+ "learning_rate": 0.00017562011524313185,
1554
+ "loss": 0.8338,
1555
+ "step": 198
1556
+ },
1557
+ {
1558
+ "epoch": 1.1862891207153503,
1559
+ "grad_norm": 0.27080920338630676,
1560
+ "learning_rate": 0.00017537130253273613,
1561
+ "loss": 0.7389,
1562
+ "step": 199
1563
+ },
1564
+ {
1565
+ "epoch": 1.1922503725782414,
1566
+ "grad_norm": 0.29549166560173035,
1567
+ "learning_rate": 0.0001751214047852818,
1568
+ "loss": 0.6476,
1569
+ "step": 200
1570
+ },
1571
+ {
1572
+ "epoch": 1.1922503725782414,
1573
+ "eval_loss": 0.7744897603988647,
1574
+ "eval_runtime": 24.342,
1575
+ "eval_samples_per_second": 7.271,
1576
+ "eval_steps_per_second": 1.479,
1577
+ "step": 200
1578
  }
1579
  ],
1580
  "logging_steps": 1,
 
1589
  "early_stopping_threshold": 0.0
1590
  },
1591
  "attributes": {
1592
+ "early_stopping_patience_counter": 3
1593
  }
1594
  },
1595
  "TrainerControl": {
 
1598
  "should_evaluate": false,
1599
  "should_log": false,
1600
  "should_save": true,
1601
+ "should_training_stop": true
1602
  },
1603
  "attributes": {}
1604
  }
1605
  },
1606
+ "total_flos": 9.778247816026522e+16,
1607
  "train_batch_size": 5,
1608
  "trial_name": null,
1609
  "trial_params": null