DuongTrongChi commited on
Commit
b6125fa
·
verified ·
1 Parent(s): 1d84634

Training in progress, step 293, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:121295b2b22ccacbb358f407723b1ecbe5fc47272371aae6c4ebe1121aa8e6c5
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5203def3dfaa585f2709854cb7081b5402cb1703c0ef445c0dc93d47a9fca86
3
  size 73911112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07b1cfc78181126f7ff856497c9c819f03a388066e7f612ef97f28acc940ea43
3
  size 37431220
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5587744a36631b994d493b042e7e0940c3917efca8cab938c21c474e8c07d606
3
  size 37431220
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a413486fbe876b5d13f54e47216896bd97b3e8e9188c7f5c0091ef8570249ad4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a3dae860874f4f9ec7b9723c4571a24fa5277582ebca2166c3d9dd7790aae41
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7928669410150891,
5
  "eval_steps": 500,
6
- "global_step": 289,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2030,6 +2030,34 @@
2030
  "learning_rate": 5.6818181818181825e-05,
2031
  "loss": 1.1278,
2032
  "step": 289
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2033
  }
2034
  ],
2035
  "logging_steps": 1,
@@ -2049,7 +2077,7 @@
2049
  "attributes": {}
2050
  }
2051
  },
2052
- "total_flos": 3.575919377014456e+17,
2053
  "train_batch_size": 4,
2054
  "trial_name": null,
2055
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.803840877914952,
5
  "eval_steps": 500,
6
+ "global_step": 293,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2030
  "learning_rate": 5.6818181818181825e-05,
2031
  "loss": 1.1278,
2032
  "step": 289
2033
+ },
2034
+ {
2035
+ "epoch": 0.7956104252400549,
2036
+ "grad_norm": 0.16730915009975433,
2037
+ "learning_rate": 5.606060606060606e-05,
2038
+ "loss": 1.1749,
2039
+ "step": 290
2040
+ },
2041
+ {
2042
+ "epoch": 0.7983539094650206,
2043
+ "grad_norm": 0.1525382548570633,
2044
+ "learning_rate": 5.5303030303030304e-05,
2045
+ "loss": 1.2134,
2046
+ "step": 291
2047
+ },
2048
+ {
2049
+ "epoch": 0.8010973936899863,
2050
+ "grad_norm": 0.17000265419483185,
2051
+ "learning_rate": 5.4545454545454546e-05,
2052
+ "loss": 1.1661,
2053
+ "step": 292
2054
+ },
2055
+ {
2056
+ "epoch": 0.803840877914952,
2057
+ "grad_norm": 0.16208770871162415,
2058
+ "learning_rate": 5.378787878787879e-05,
2059
+ "loss": 1.1965,
2060
+ "step": 293
2061
  }
2062
  ],
2063
  "logging_steps": 1,
 
2077
  "attributes": {}
2078
  }
2079
  },
2080
+ "total_flos": 3.62592285449429e+17,
2081
  "train_batch_size": 4,
2082
  "trial_name": null,
2083
  "trial_params": null