DuongTrongChi commited on
Commit
056558a
·
verified ·
1 Parent(s): 2db82cd

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5203def3dfaa585f2709854cb7081b5402cb1703c0ef445c0dc93d47a9fca86
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09e42426f9664501195569e96f5a7ee0fc64f3214acdb3a4a74cd24a1fb23b0a
3
  size 73911112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5587744a36631b994d493b042e7e0940c3917efca8cab938c21c474e8c07d606
3
  size 37431220
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:054f9aaa885b76d5260a4407dab68a0639efed60781407198b8607dce79f34e2
3
  size 37431220
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a3dae860874f4f9ec7b9723c4571a24fa5277582ebca2166c3d9dd7790aae41
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2864730966c44a4d9e96d3cf646276472e29551ad2c5f921b0c4c643dd9b0a39
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.803840877914952,
5
  "eval_steps": 500,
6
- "global_step": 293,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2058,6 +2058,55 @@
2058
  "learning_rate": 5.378787878787879e-05,
2059
  "loss": 1.1965,
2060
  "step": 293
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2061
  }
2062
  ],
2063
  "logging_steps": 1,
@@ -2077,7 +2126,7 @@
2077
  "attributes": {}
2078
  }
2079
  },
2080
- "total_flos": 3.62592285449429e+17,
2081
  "train_batch_size": 4,
2082
  "trial_name": null,
2083
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.823045267489712,
5
  "eval_steps": 500,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2058
  "learning_rate": 5.378787878787879e-05,
2059
  "loss": 1.1965,
2060
  "step": 293
2061
+ },
2062
+ {
2063
+ "epoch": 0.8065843621399177,
2064
+ "grad_norm": 0.15758417546749115,
2065
+ "learning_rate": 5.303030303030303e-05,
2066
+ "loss": 1.2708,
2067
+ "step": 294
2068
+ },
2069
+ {
2070
+ "epoch": 0.8093278463648834,
2071
+ "grad_norm": 0.16219446063041687,
2072
+ "learning_rate": 5.2272727272727274e-05,
2073
+ "loss": 1.175,
2074
+ "step": 295
2075
+ },
2076
+ {
2077
+ "epoch": 0.8120713305898491,
2078
+ "grad_norm": 0.181773379445076,
2079
+ "learning_rate": 5.151515151515152e-05,
2080
+ "loss": 1.0874,
2081
+ "step": 296
2082
+ },
2083
+ {
2084
+ "epoch": 0.8148148148148148,
2085
+ "grad_norm": 0.1878584921360016,
2086
+ "learning_rate": 5.075757575757576e-05,
2087
+ "loss": 1.2115,
2088
+ "step": 297
2089
+ },
2090
+ {
2091
+ "epoch": 0.8175582990397805,
2092
+ "grad_norm": 0.1562959998846054,
2093
+ "learning_rate": 5e-05,
2094
+ "loss": 1.1326,
2095
+ "step": 298
2096
+ },
2097
+ {
2098
+ "epoch": 0.8203017832647462,
2099
+ "grad_norm": 0.1604573130607605,
2100
+ "learning_rate": 4.9242424242424245e-05,
2101
+ "loss": 1.1464,
2102
+ "step": 299
2103
+ },
2104
+ {
2105
+ "epoch": 0.823045267489712,
2106
+ "grad_norm": 0.17849043011665344,
2107
+ "learning_rate": 4.848484848484849e-05,
2108
+ "loss": 1.0647,
2109
+ "step": 300
2110
  }
2111
  ],
2112
  "logging_steps": 1,
 
2126
  "attributes": {}
2127
  }
2128
  },
2129
+ "total_flos": 3.711016873109668e+17,
2130
  "train_batch_size": 4,
2131
  "trial_name": null,
2132
  "trial_params": null