DuongTrongChi commited on
Commit
dde1665
·
verified ·
1 Parent(s): 60d7f55

Training in progress, step 274, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ad73acc7a44c1fcd562883c85272c2e33e24df2154f4b3d1668ef117da46401
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e419c6b9165e42320f1a7472a3846377e8c91119c563d4005aff51a69aa4685
3
  size 73911112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:064b2a9e45b619870f65440ad9ebda6135d621bd81ea4fa5c9d2073e6847a8a5
3
  size 37431220
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28cdb21230b7824d55a4297c2ab3edbca84bc6dbaeedc644b7618d78672df3e0
3
  size 37431220
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9d3048c859719401ed6523107918b9f1d6ac0f3668115efc907a5a4c443c081
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70901b94b4e1e6b99201989f00c77c6a02ae46cc48fbef26b5bd41545d57275c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7352537722908093,
5
  "eval_steps": 500,
6
- "global_step": 268,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1883,6 +1883,48 @@
1883
  "learning_rate": 7.272727272727273e-05,
1884
  "loss": 1.2259,
1885
  "step": 268
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1886
  }
1887
  ],
1888
  "logging_steps": 1,
@@ -1902,7 +1944,7 @@
1902
  "attributes": {}
1903
  }
1904
  },
1905
- "total_flos": 3.3135793961210266e+17,
1906
  "train_batch_size": 4,
1907
  "trial_name": null,
1908
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7517146776406035,
5
  "eval_steps": 500,
6
+ "global_step": 274,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1883
  "learning_rate": 7.272727272727273e-05,
1884
  "loss": 1.2259,
1885
  "step": 268
1886
+ },
1887
+ {
1888
+ "epoch": 0.7379972565157751,
1889
+ "grad_norm": 0.15109026432037354,
1890
+ "learning_rate": 7.196969696969698e-05,
1891
+ "loss": 1.2103,
1892
+ "step": 269
1893
+ },
1894
+ {
1895
+ "epoch": 0.7407407407407407,
1896
+ "grad_norm": 0.15917524695396423,
1897
+ "learning_rate": 7.121212121212121e-05,
1898
+ "loss": 1.1641,
1899
+ "step": 270
1900
+ },
1901
+ {
1902
+ "epoch": 0.7434842249657064,
1903
+ "grad_norm": 0.16973435878753662,
1904
+ "learning_rate": 7.045454545454546e-05,
1905
+ "loss": 1.1128,
1906
+ "step": 271
1907
+ },
1908
+ {
1909
+ "epoch": 0.7462277091906722,
1910
+ "grad_norm": 0.16686728596687317,
1911
+ "learning_rate": 6.96969696969697e-05,
1912
+ "loss": 1.1665,
1913
+ "step": 272
1914
+ },
1915
+ {
1916
+ "epoch": 0.7489711934156379,
1917
+ "grad_norm": 0.16650721430778503,
1918
+ "learning_rate": 6.893939393939395e-05,
1919
+ "loss": 1.1626,
1920
+ "step": 273
1921
+ },
1922
+ {
1923
+ "epoch": 0.7517146776406035,
1924
+ "grad_norm": 0.1571473777294159,
1925
+ "learning_rate": 6.818181818181818e-05,
1926
+ "loss": 1.1167,
1927
+ "step": 274
1928
  }
1929
  ],
1930
  "logging_steps": 1,
 
1944
  "attributes": {}
1945
  }
1946
  },
1947
+ "total_flos": 3.387494336322847e+17,
1948
  "train_batch_size": 4,
1949
  "trial_name": null,
1950
  "trial_params": null