DuongTrongChi commited on
Commit
a7a94a9
·
verified ·
1 Parent(s): e9f38c9

Training in progress, step 317, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c7c3c17c79bfe154805c8425180fc8d90019d8728ec51988b6a862dbd598007
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6875a604710246338188ff29d5cbc563ba25baf48c6d1e3cc1d0b7e80ff70d7
3
  size 73911112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1ba5c923acaf2e1ed47acc58d6e9aa13d44a747e391f8730de77c1999d30e90
3
  size 37431220
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c5e4fb6735708560c95e693cdfd10bf7dce1637be1427c5932901b16d00b7e4
3
  size 37431220
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:325d5ceabf189137a8e26a47ad2db0bafe3423961947489f0a619958b73d9909
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94632ab6bd0f8c5abd180847d8d11ab76bee1258bb52de95785e40d5948b6c6c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8449931412894376,
5
  "eval_steps": 500,
6
- "global_step": 308,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2163,6 +2163,69 @@
2163
  "learning_rate": 4.242424242424243e-05,
2164
  "loss": 1.1626,
2165
  "step": 308
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2166
  }
2167
  ],
2168
  "logging_steps": 1,
@@ -2182,7 +2245,7 @@
2182
  "attributes": {}
2183
  }
2184
  },
2185
- "total_flos": 3.813357723602903e+17,
2186
  "train_batch_size": 4,
2187
  "trial_name": null,
2188
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.869684499314129,
5
  "eval_steps": 500,
6
+ "global_step": 317,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2163
  "learning_rate": 4.242424242424243e-05,
2164
  "loss": 1.1626,
2165
  "step": 308
2166
+ },
2167
+ {
2168
+ "epoch": 0.8477366255144033,
2169
+ "grad_norm": 0.1584944874048233,
2170
+ "learning_rate": 4.166666666666667e-05,
2171
+ "loss": 1.2979,
2172
+ "step": 309
2173
+ },
2174
+ {
2175
+ "epoch": 0.850480109739369,
2176
+ "grad_norm": 0.16384479403495789,
2177
+ "learning_rate": 4.0909090909090915e-05,
2178
+ "loss": 1.1728,
2179
+ "step": 310
2180
+ },
2181
+ {
2182
+ "epoch": 0.8532235939643347,
2183
+ "grad_norm": 0.16321486234664917,
2184
+ "learning_rate": 4.015151515151515e-05,
2185
+ "loss": 1.1622,
2186
+ "step": 311
2187
+ },
2188
+ {
2189
+ "epoch": 0.8559670781893004,
2190
+ "grad_norm": 0.1510697901248932,
2191
+ "learning_rate": 3.939393939393939e-05,
2192
+ "loss": 1.2212,
2193
+ "step": 312
2194
+ },
2195
+ {
2196
+ "epoch": 0.8587105624142661,
2197
+ "grad_norm": 0.15566690266132355,
2198
+ "learning_rate": 3.8636363636363636e-05,
2199
+ "loss": 1.2098,
2200
+ "step": 313
2201
+ },
2202
+ {
2203
+ "epoch": 0.8614540466392319,
2204
+ "grad_norm": 0.1588331162929535,
2205
+ "learning_rate": 3.787878787878788e-05,
2206
+ "loss": 1.2401,
2207
+ "step": 314
2208
+ },
2209
+ {
2210
+ "epoch": 0.8641975308641975,
2211
+ "grad_norm": 0.15694394707679749,
2212
+ "learning_rate": 3.712121212121212e-05,
2213
+ "loss": 1.2619,
2214
+ "step": 315
2215
+ },
2216
+ {
2217
+ "epoch": 0.8669410150891632,
2218
+ "grad_norm": 0.15612713992595673,
2219
+ "learning_rate": 3.6363636363636364e-05,
2220
+ "loss": 1.1761,
2221
+ "step": 316
2222
+ },
2223
+ {
2224
+ "epoch": 0.869684499314129,
2225
+ "grad_norm": 0.15668469667434692,
2226
+ "learning_rate": 3.560606060606061e-05,
2227
+ "loss": 1.1711,
2228
+ "step": 317
2229
  }
2230
  ],
2231
  "logging_steps": 1,
 
2245
  "attributes": {}
2246
  }
2247
  },
2248
+ "total_flos": 3.928463201780613e+17,
2249
  "train_batch_size": 4,
2250
  "trial_name": null,
2251
  "trial_params": null