DuongTrongChi commited on
Commit
91a7749
·
verified ·
1 Parent(s): 8140316

Training in progress, step 264, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7503bcb2c3f34a1858d513a2596512e90f6888a1c9572e7fe5512c62e91046c
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53a1f1c3e2a923e2982197cd900077574f24b219229aa019511f875042742237
3
  size 73911112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5faf222eddf43df3300085931995fa326f25cdcf01906bf0ee0b2737cdd72beb
3
  size 37431220
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06ce5385174b266947c6bbd0cded6839fb22865d7049d84dd957347235517437
3
  size 37431220
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e6d08bfa0ecdc29e58b4a5362f7ced16937eb7fa35e931e744fd7df0b5c2675
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b539f01b7ad3345aa5a738855f4967eed8463acf8b5cd8b2265ca0e07cf0599
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7133058984910837,
5
  "eval_steps": 500,
6
- "global_step": 260,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1827,6 +1827,34 @@
1827
  "learning_rate": 7.878787878787879e-05,
1828
  "loss": 1.1713,
1829
  "step": 260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1830
  }
1831
  ],
1832
  "logging_steps": 1,
@@ -1846,7 +1874,7 @@
1846
  "attributes": {}
1847
  }
1848
  },
1849
- "total_flos": 3.2152157285707776e+17,
1850
  "train_batch_size": 4,
1851
  "trial_name": null,
1852
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7242798353909465,
5
  "eval_steps": 500,
6
+ "global_step": 264,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1827
  "learning_rate": 7.878787878787879e-05,
1828
  "loss": 1.1713,
1829
  "step": 260
1830
+ },
1831
+ {
1832
+ "epoch": 0.7160493827160493,
1833
+ "grad_norm": 0.16383200883865356,
1834
+ "learning_rate": 7.803030303030304e-05,
1835
+ "loss": 1.1593,
1836
+ "step": 261
1837
+ },
1838
+ {
1839
+ "epoch": 0.7187928669410151,
1840
+ "grad_norm": 0.16724437475204468,
1841
+ "learning_rate": 7.727272727272727e-05,
1842
+ "loss": 1.3,
1843
+ "step": 262
1844
+ },
1845
+ {
1846
+ "epoch": 0.7215363511659808,
1847
+ "grad_norm": 0.18244515359401703,
1848
+ "learning_rate": 7.651515151515152e-05,
1849
+ "loss": 1.264,
1850
+ "step": 263
1851
+ },
1852
+ {
1853
+ "epoch": 0.7242798353909465,
1854
+ "grad_norm": 0.15599456429481506,
1855
+ "learning_rate": 7.575757575757576e-05,
1856
+ "loss": 1.2125,
1857
+ "step": 264
1858
  }
1859
  ],
1860
  "logging_steps": 1,
 
1874
  "attributes": {}
1875
  }
1876
  },
1877
+ "total_flos": 3.264356883202867e+17,
1878
  "train_batch_size": 4,
1879
  "trial_name": null,
1880
  "trial_params": null