DuongTrongChi commited on
Commit
0c13098
·
verified ·
1 Parent(s): 2bbf76d

Training in progress, step 278, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e419c6b9165e42320f1a7472a3846377e8c91119c563d4005aff51a69aa4685
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8cf8057910d23a062865985ba852f2938eae817046dfaf5681661132eaf58de
3
  size 73911112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28cdb21230b7824d55a4297c2ab3edbca84bc6dbaeedc644b7618d78672df3e0
3
  size 37431220
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3538e3394b23b3adf8ac36f79431127cf553e2ff2786b3ddb23d8b296f28eeaa
3
  size 37431220
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70901b94b4e1e6b99201989f00c77c6a02ae46cc48fbef26b5bd41545d57275c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0786adcac2c7889cfa58c3afdb23ea4f83c558cdd0755806988521c514ef08d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7517146776406035,
5
  "eval_steps": 500,
6
- "global_step": 274,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1925,6 +1925,34 @@
1925
  "learning_rate": 6.818181818181818e-05,
1926
  "loss": 1.1167,
1927
  "step": 274
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1928
  }
1929
  ],
1930
  "logging_steps": 1,
@@ -1944,7 +1972,7 @@
1944
  "attributes": {}
1945
  }
1946
  },
1947
- "total_flos": 3.387494336322847e+17,
1948
  "train_batch_size": 4,
1949
  "trial_name": null,
1950
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7626886145404664,
5
  "eval_steps": 500,
6
+ "global_step": 278,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1925
  "learning_rate": 6.818181818181818e-05,
1926
  "loss": 1.1167,
1927
  "step": 274
1928
+ },
1929
+ {
1930
+ "epoch": 0.7544581618655692,
1931
+ "grad_norm": 0.15378378331661224,
1932
+ "learning_rate": 6.742424242424242e-05,
1933
+ "loss": 1.2406,
1934
+ "step": 275
1935
+ },
1936
+ {
1937
+ "epoch": 0.757201646090535,
1938
+ "grad_norm": 0.16972492635250092,
1939
+ "learning_rate": 6.666666666666667e-05,
1940
+ "loss": 1.2506,
1941
+ "step": 276
1942
+ },
1943
+ {
1944
+ "epoch": 0.7599451303155007,
1945
+ "grad_norm": 0.16112364828586578,
1946
+ "learning_rate": 6.59090909090909e-05,
1947
+ "loss": 1.1676,
1948
+ "step": 277
1949
+ },
1950
+ {
1951
+ "epoch": 0.7626886145404664,
1952
+ "grad_norm": 0.1625635176897049,
1953
+ "learning_rate": 6.515151515151516e-05,
1954
+ "loss": 1.1607,
1955
+ "step": 278
1956
  }
1957
  ],
1958
  "logging_steps": 1,
 
1972
  "attributes": {}
1973
  }
1974
  },
1975
+ "total_flos": 3.438408126791516e+17,
1976
  "train_batch_size": 4,
1977
  "trial_name": null,
1978
  "trial_params": null