DuongTrongChi commited on
Commit
74d0323
·
verified ·
1 Parent(s): 41e5e0e

Training in progress, step 284, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8cf8057910d23a062865985ba852f2938eae817046dfaf5681661132eaf58de
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:426a166c05ce6a9c2a3792ab7d5ae7d338239f2ba38be047201caa54ba30a791
3
  size 73911112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3538e3394b23b3adf8ac36f79431127cf553e2ff2786b3ddb23d8b296f28eeaa
3
  size 37431220
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f0ddfa6ca4416d999191fb8e260b82136a5552597c0f780e612c9c54f372809
3
  size 37431220
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0786adcac2c7889cfa58c3afdb23ea4f83c558cdd0755806988521c514ef08d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f5d774857468bd6c5b471d495c3b4439bbde7fd03bb61dbdcc2120a4b2a557c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7626886145404664,
5
  "eval_steps": 500,
6
- "global_step": 278,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1953,6 +1953,48 @@
1953
  "learning_rate": 6.515151515151516e-05,
1954
  "loss": 1.1607,
1955
  "step": 278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1956
  }
1957
  ],
1958
  "logging_steps": 1,
@@ -1972,7 +2014,7 @@
1972
  "attributes": {}
1973
  }
1974
  },
1975
- "total_flos": 3.438408126791516e+17,
1976
  "train_batch_size": 4,
1977
  "trial_name": null,
1978
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7791495198902606,
5
  "eval_steps": 500,
6
+ "global_step": 284,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1953
  "learning_rate": 6.515151515151516e-05,
1954
  "loss": 1.1607,
1955
  "step": 278
1956
+ },
1957
+ {
1958
+ "epoch": 0.7654320987654321,
1959
+ "grad_norm": 0.164581760764122,
1960
+ "learning_rate": 6.439393939393939e-05,
1961
+ "loss": 1.2018,
1962
+ "step": 279
1963
+ },
1964
+ {
1965
+ "epoch": 0.7681755829903978,
1966
+ "grad_norm": 0.1688939481973648,
1967
+ "learning_rate": 6.363636363636364e-05,
1968
+ "loss": 1.1698,
1969
+ "step": 280
1970
+ },
1971
+ {
1972
+ "epoch": 0.7709190672153635,
1973
+ "grad_norm": 0.1699894666671753,
1974
+ "learning_rate": 6.287878787878788e-05,
1975
+ "loss": 1.2385,
1976
+ "step": 281
1977
+ },
1978
+ {
1979
+ "epoch": 0.7736625514403292,
1980
+ "grad_norm": 0.16270920634269714,
1981
+ "learning_rate": 6.212121212121213e-05,
1982
+ "loss": 1.2186,
1983
+ "step": 282
1984
+ },
1985
+ {
1986
+ "epoch": 0.7764060356652949,
1987
+ "grad_norm": 0.1695300042629242,
1988
+ "learning_rate": 6.136363636363636e-05,
1989
+ "loss": 1.1761,
1990
+ "step": 283
1991
+ },
1992
+ {
1993
+ "epoch": 0.7791495198902606,
1994
+ "grad_norm": 0.15952111780643463,
1995
+ "learning_rate": 6.060606060606061e-05,
1996
+ "loss": 1.1664,
1997
+ "step": 284
1998
  }
1999
  ],
2000
  "logging_steps": 1,
 
2014
  "attributes": {}
2015
  }
2016
  },
2017
+ "total_flos": 3.5131733923058074e+17,
2018
  "train_batch_size": 4,
2019
  "trial_name": null,
2020
  "trial_params": null