DuongTrongChi commited on
Commit
45b99fa
·
verified ·
1 Parent(s): 072e90a

Training in progress, step 245, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d7f289734593a3f7ee939562d24196078125028a858356847a6bb957f59a600
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f46b71e95a580f108269251cfe3defd72132358de06c0d928b83f8aefd50304d
3
  size 73911112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6942bdb9d086b28b0925199c7fe1fdaa7b25eebd757be776937103f7a5f230b8
3
  size 37430836
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99106769548268574aadd158585d14f464e53c8ad6243c5158dc3c182ccfe6a5
3
  size 37430836
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db79331c8992e3a7af2306518ac9df31b4a39a542a11b37b6cba8afaa66b1eba
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1164145c3cb1723f88e8b2678949166f87c794da442d33d7cc2eddf22f1da61
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.663923182441701,
5
  "eval_steps": 500,
6
- "global_step": 242,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1701,6 +1701,27 @@
1701
  "learning_rate": 9.242424242424242e-05,
1702
  "loss": 1.2732,
1703
  "step": 242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1704
  }
1705
  ],
1706
  "logging_steps": 1,
@@ -1720,7 +1741,7 @@
1720
  "attributes": {}
1721
  }
1722
  },
1723
- "total_flos": 2.995266789026488e+17,
1724
  "train_batch_size": 4,
1725
  "trial_name": null,
1726
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6721536351165981,
5
  "eval_steps": 500,
6
+ "global_step": 245,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1701
  "learning_rate": 9.242424242424242e-05,
1702
  "loss": 1.2732,
1703
  "step": 242
1704
+ },
1705
+ {
1706
+ "epoch": 0.6666666666666666,
1707
+ "grad_norm": 0.16092891991138458,
1708
+ "learning_rate": 9.166666666666667e-05,
1709
+ "loss": 1.1607,
1710
+ "step": 243
1711
+ },
1712
+ {
1713
+ "epoch": 0.6694101508916324,
1714
+ "grad_norm": 0.1687157154083252,
1715
+ "learning_rate": 9.090909090909092e-05,
1716
+ "loss": 1.1149,
1717
+ "step": 244
1718
+ },
1719
+ {
1720
+ "epoch": 0.6721536351165981,
1721
+ "grad_norm": 0.18511663377285004,
1722
+ "learning_rate": 9.015151515151515e-05,
1723
+ "loss": 1.1934,
1724
+ "step": 245
1725
  }
1726
  ],
1727
  "logging_steps": 1,
 
1741
  "attributes": {}
1742
  }
1743
  },
1744
+ "total_flos": 3.031087305198797e+17,
1745
  "train_batch_size": 4,
1746
  "trial_name": null,
1747
  "trial_params": null