Pranay17 commited on
Commit
e21b5a1
·
verified ·
1 Parent(s): 0c7e056

Training in progress, step 2500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d748f56c86d9b2ec091d44a85ccd94175df63c0bf630350ae13455b500f5873c
3
  size 42002584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3f3c155fe4ab7a572e2a5fa77a282892e0e22cc6fc61a8c53fab449042d36d5
3
  size 42002584
last-checkpoint/global_step2500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7123c17202c44d1134a2cc67dc65c0b8e5a4ed55c834053daf4f258264a8d8e
3
+ size 251710672
last-checkpoint/global_step2500/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6804f47defbca7f6cdbaa7926c39523f91414e3d1d47ca0d149cd29f923df239
3
+ size 47955328
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2000
 
1
+ global_step2500
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12ff687702c9cb54cfdeb1509074bb19e28d6929ec859a93af35778558181b6e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f01a4ea3836466236461a7e0a4041dbb7858ed9853ebc8a640dc7aeab392d3fa
3
  size 14244
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.396011396011396,
5
  "eval_steps": 1000,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -294,6 +294,76 @@
294
  "learning_rate": 0.0001002002002002002,
295
  "loss": 0.0611,
296
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
  }
298
  ],
299
  "logging_steps": 50,
@@ -313,7 +383,7 @@
313
  "attributes": {}
314
  }
315
  },
316
- "total_flos": 5.442536950726656e+16,
317
  "train_batch_size": 2,
318
  "trial_name": null,
319
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.245014245014245,
5
  "eval_steps": 1000,
6
+ "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
294
  "learning_rate": 0.0001002002002002002,
295
  "loss": 0.0611,
296
  "step": 2000
297
+ },
298
+ {
299
+ "epoch": 11.68091168091168,
300
+ "grad_norm": 0.2988643944263458,
301
+ "learning_rate": 9.76976976976977e-05,
302
+ "loss": 0.0607,
303
+ "step": 2050
304
+ },
305
+ {
306
+ "epoch": 11.965811965811966,
307
+ "grad_norm": 0.8949713110923767,
308
+ "learning_rate": 9.51951951951952e-05,
309
+ "loss": 0.0632,
310
+ "step": 2100
311
+ },
312
+ {
313
+ "epoch": 12.250712250712251,
314
+ "grad_norm": 0.11667460948228836,
315
+ "learning_rate": 9.26926926926927e-05,
316
+ "loss": 0.0605,
317
+ "step": 2150
318
+ },
319
+ {
320
+ "epoch": 12.535612535612536,
321
+ "grad_norm": 0.1387569159269333,
322
+ "learning_rate": 9.019019019019019e-05,
323
+ "loss": 0.0605,
324
+ "step": 2200
325
+ },
326
+ {
327
+ "epoch": 12.820512820512821,
328
+ "grad_norm": 0.4826744794845581,
329
+ "learning_rate": 8.76876876876877e-05,
330
+ "loss": 0.0619,
331
+ "step": 2250
332
+ },
333
+ {
334
+ "epoch": 13.105413105413106,
335
+ "grad_norm": 0.09396378695964813,
336
+ "learning_rate": 8.518518518518518e-05,
337
+ "loss": 0.0588,
338
+ "step": 2300
339
+ },
340
+ {
341
+ "epoch": 13.39031339031339,
342
+ "grad_norm": 0.6452879905700684,
343
+ "learning_rate": 8.268268268268269e-05,
344
+ "loss": 0.0584,
345
+ "step": 2350
346
+ },
347
+ {
348
+ "epoch": 13.675213675213675,
349
+ "grad_norm": 0.5694031119346619,
350
+ "learning_rate": 8.018018018018019e-05,
351
+ "loss": 0.0582,
352
+ "step": 2400
353
+ },
354
+ {
355
+ "epoch": 13.96011396011396,
356
+ "grad_norm": 0.34324464201927185,
357
+ "learning_rate": 7.767767767767768e-05,
358
+ "loss": 0.0602,
359
+ "step": 2450
360
+ },
361
+ {
362
+ "epoch": 14.245014245014245,
363
+ "grad_norm": 0.07841510325670242,
364
+ "learning_rate": 7.517517517517519e-05,
365
+ "loss": 0.0593,
366
+ "step": 2500
367
  }
368
  ],
369
  "logging_steps": 50,
 
383
  "attributes": {}
384
  }
385
  },
386
+ "total_flos": 6.806026140306637e+16,
387
  "train_batch_size": 2,
388
  "trial_name": null,
389
  "trial_params": null