DuongTrongChi commited on
Commit
be76ce1
·
verified ·
1 Parent(s): cf0bbf6

Training in progress, step 253, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51d670e3d79bfffb5e042b5a3ce721b3dac237b5e9f862d93b021230929bd440
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:267e55ad7755dee82f46dba719e2576737b5ea75c1b5332dddec46d9480748b8
3
  size 73911112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92c4b9a1038c3864c6aebfb5f80fb47be5cb30b63f2ac18316cd50f58f7c5813
3
  size 37430836
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af61f8af3746a56d2a96b75db1276aa71c671ea7b6ce344d3463d7e1c88aad06
3
  size 37430836
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7657b3ddb2232aa8012a32fb00c3fb92277ec0be13fceb0c90bdaa595cabccac
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e2b56357fd5416f4374499b12d61ad0d3c5063c6569083ab07fd9fe96b503d0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6886145404663924,
5
  "eval_steps": 500,
6
- "global_step": 251,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1764,6 +1764,20 @@
1764
  "learning_rate": 8.560606060606061e-05,
1765
  "loss": 1.1737,
1766
  "step": 251
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1767
  }
1768
  ],
1769
  "logging_steps": 1,
@@ -1783,7 +1797,7 @@
1783
  "attributes": {}
1784
  }
1785
  },
1786
- "total_flos": 3.1050607333850726e+17,
1787
  "train_batch_size": 4,
1788
  "trial_name": null,
1789
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6941015089163237,
5
  "eval_steps": 500,
6
+ "global_step": 253,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1764
  "learning_rate": 8.560606060606061e-05,
1765
  "loss": 1.1737,
1766
  "step": 251
1767
+ },
1768
+ {
1769
+ "epoch": 0.691358024691358,
1770
+ "grad_norm": 0.18938620388507843,
1771
+ "learning_rate": 8.484848484848486e-05,
1772
+ "loss": 1.148,
1773
+ "step": 252
1774
+ },
1775
+ {
1776
+ "epoch": 0.6941015089163237,
1777
+ "grad_norm": 0.14747262001037598,
1778
+ "learning_rate": 8.40909090909091e-05,
1779
+ "loss": 1.1205,
1780
+ "step": 253
1781
  }
1782
  ],
1783
  "logging_steps": 1,
 
1797
  "attributes": {}
1798
  }
1799
  },
1800
+ "total_flos": 3.127961028837335e+17,
1801
  "train_batch_size": 4,
1802
  "trial_name": null,
1803
  "trial_params": null