somaia02 commited on
Commit
2d39cb9
·
1 Parent(s): f1ac00d

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ee5c23e36a220ddfb323d32d463c212f902254630a4ca1f579a0caca6ef359f
3
  size 2692157
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4051c707e9d67cfe29ad210c13fe89896f2c3fc7cf4353ff0fb5017475ff87c2
3
  size 2692157
last-checkpoint/global_step1839/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40824c42ea14ffeefd7fc75cd0767d78e088e442096118184cd860122e366dfb
3
+ size 1116913857
last-checkpoint/global_step1839/zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45f320f119d8b290b3be2c511810b2696c0144fca6384c664d24443c2feb1b97
3
+ size 999614
last-checkpoint/global_step1839/zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:210158d4a933b97f035d06057819acb408cc38ea82d84bcbaa6243e46378d82a
3
+ size 999614
last-checkpoint/global_step1839/zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4050ab88638e3a1c731aeb5de94a73c52d97d464a389d00feab9db01d92065b
3
+ size 999614
last-checkpoint/global_step1839/zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:924a6351282a50ab0db8244df05142c7c75fb84fae16813815d76ad2ebe6b52a
3
+ size 999614
last-checkpoint/global_step1839/zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdd4bbeedd997bdc8c69ce87b18768ce2ec7f18c9d29f66971da01ccc7c5f1a4
3
+ size 999614
last-checkpoint/global_step1839/zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5554373702a027b6b6219676101faa9edc6278f11a98aa439f715e4c85eeef11
3
+ size 999614
last-checkpoint/global_step1839/zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44f58b2871e135df7183956c33ee7549b753ca6637f8453873b8a7ca48213716
3
+ size 999614
last-checkpoint/global_step1839/zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b8244a27d8c81a5b345b19ad02a8279ae5bfa5e0adf4fa97bbde27336042b7f
3
+ size 999614
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1226
 
1
+ global_step1839
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c31e33d70b3fe7f61d71a2b4a5353d94b31f406bedabb2083e35ac5bbc106208
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41c5681d7b524818ae5579d838cfcbe25ce71cbb169b0e4fe25d42c99edbaa8f
3
  size 21687
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38fe384dd20308f07ec5585d9912cb965830bcc11808cbdc846275b932905119
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:271acd33082424567a4b8921da0385eeb19fdf6b335d06b40bad42f873f9cc25
3
  size 21687
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39ce95745d8ff40a4f8ee5a97f3689eaa9467213f9b27ce66ca594efc37198cf
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ace92afd9705e6c007ccf569351131cf4a5b6d8ed4d24ab9a103d9a45bff8bd
3
  size 21687
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c349f517e129599281e2e4a383da6c74c212dfb7e6bad5c542f121cde888a562
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18546c9b491ceea5ed238dedef178d53954a71de51aba303b160c7cce45eef1f
3
  size 21687
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df27a56b73699e712acf3ed2a1bec8b4dc1f7438a81b82196f0c2f34f00b7551
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b15878d0078501da1d9bbcc201adbc13f2502d53bc7ffcec18b7192e7f8278a
3
  size 21687
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34696be6d136ea03ea905aa2941fd96da980291d6b92db0de6b0a3b4d6447267
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c88208606cc56bfd290ffae1a66345a8d63667df9957ad9300eb23e099a4a13
3
  size 21687
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0e2cbda44be671f5b71ae2543f2cec67328a527bccee980bbad52fe0ebf7dd5
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b8dcfd13c9d761e6230bb5a862ca244f59def6f1d3abc7ceb716b463cb00455
3
  size 21687
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4070b9e9beda56664b5815d344e6c0f296840034807befc40392beac83c285b1
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1ea431e317d84ef8ed752665a6947660d8675f81d803ed57c7af6c0c6638e06
3
  size 21687
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.6314801573753357,
3
- "best_model_checkpoint": "outputs/checkpoint-1226",
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 1226,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -167,13 +167,93 @@
167
  "eval_samples_per_second": 564.759,
168
  "eval_steps_per_second": 17.845,
169
  "step": 1226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  }
171
  ],
172
  "logging_steps": 50,
173
  "max_steps": 9195,
174
  "num_train_epochs": 15,
175
  "save_steps": 500,
176
- "total_flos": 2038462404362240.0,
177
  "trial_name": null,
178
  "trial_params": null
179
  }
 
1
  {
2
+ "best_metric": 0.5994039177894592,
3
+ "best_model_checkpoint": "outputs/checkpoint-1839",
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 1839,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
167
  "eval_samples_per_second": 564.759,
168
  "eval_steps_per_second": 17.845,
169
  "step": 1226
170
+ },
171
+ {
172
+ "epoch": 2.04,
173
+ "learning_rate": 4.5692926969522716e-05,
174
+ "loss": 0.7375,
175
+ "step": 1250
176
+ },
177
+ {
178
+ "epoch": 2.12,
179
+ "learning_rate": 4.540540540540541e-05,
180
+ "loss": 0.7342,
181
+ "step": 1300
182
+ },
183
+ {
184
+ "epoch": 2.2,
185
+ "learning_rate": 4.51178838412881e-05,
186
+ "loss": 0.7308,
187
+ "step": 1350
188
+ },
189
+ {
190
+ "epoch": 2.28,
191
+ "learning_rate": 4.483036227717079e-05,
192
+ "loss": 0.7157,
193
+ "step": 1400
194
+ },
195
+ {
196
+ "epoch": 2.37,
197
+ "learning_rate": 4.454284071305348e-05,
198
+ "loss": 0.7032,
199
+ "step": 1450
200
+ },
201
+ {
202
+ "epoch": 2.45,
203
+ "learning_rate": 4.425531914893617e-05,
204
+ "loss": 0.7114,
205
+ "step": 1500
206
+ },
207
+ {
208
+ "epoch": 2.53,
209
+ "learning_rate": 4.396779758481886e-05,
210
+ "loss": 0.7192,
211
+ "step": 1550
212
+ },
213
+ {
214
+ "epoch": 2.61,
215
+ "learning_rate": 4.3680276020701557e-05,
216
+ "loss": 0.7132,
217
+ "step": 1600
218
+ },
219
+ {
220
+ "epoch": 2.69,
221
+ "learning_rate": 4.339275445658424e-05,
222
+ "loss": 0.7106,
223
+ "step": 1650
224
+ },
225
+ {
226
+ "epoch": 2.77,
227
+ "learning_rate": 4.310523289246694e-05,
228
+ "loss": 0.6874,
229
+ "step": 1700
230
+ },
231
+ {
232
+ "epoch": 2.85,
233
+ "learning_rate": 4.281771132834963e-05,
234
+ "loss": 0.6983,
235
+ "step": 1750
236
+ },
237
+ {
238
+ "epoch": 2.94,
239
+ "learning_rate": 4.253018976423232e-05,
240
+ "loss": 0.6989,
241
+ "step": 1800
242
+ },
243
+ {
244
+ "epoch": 3.0,
245
+ "eval_loss": 0.5994039177894592,
246
+ "eval_runtime": 2.0675,
247
+ "eval_samples_per_second": 566.385,
248
+ "eval_steps_per_second": 17.896,
249
+ "step": 1839
250
  }
251
  ],
252
  "logging_steps": 50,
253
  "max_steps": 9195,
254
  "num_train_epochs": 15,
255
  "save_steps": 500,
256
+ "total_flos": 3058064064249856.0,
257
  "trial_name": null,
258
  "trial_params": null
259
  }