Pranay17 commited on
Commit
68d2a4a
·
verified ·
1 Parent(s): eee21bd

Training in progress, step 3500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d68bcc11de96c82cc302aacd29d6f456247d63f2f4d39307769746456f2d36d
3
  size 42002584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c2daf94e67a402aba974312ff99a669e5dcbffb99717ae00d0646b838a479df
3
  size 42002584
last-checkpoint/global_step3500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3a15607644f09940b16d9a686c6970d247e5ab5505d5df9f79e020811ff1d74
3
+ size 251710672
last-checkpoint/global_step3500/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85302cb8c5b8b44f532f11cc6124f01954264ccc8cda5ac9805eeb1961a7132b
3
+ size 153747385
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step3000
 
1
+ global_step3500
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7bfa14c5410b710fcae3d8fcfac63a80363ede0d2705f50a4f85d135deef8000
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff3e76396f70d39c4ab50031685ac2fc3f2b68f98254d9bc8fd8a09838383894
3
  size 14244
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 17.094017094017094,
5
  "eval_steps": 1000,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -434,6 +434,76 @@
434
  "learning_rate": 5.015015015015015e-05,
435
  "loss": 0.0586,
436
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
  }
438
  ],
439
  "logging_steps": 50,
@@ -453,7 +523,7 @@
453
  "attributes": {}
454
  }
455
  },
456
- "total_flos": 9.09931874049065e+16,
457
  "train_batch_size": 2,
458
  "trial_name": null,
459
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 19.943019943019944,
5
  "eval_steps": 1000,
6
+ "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
434
  "learning_rate": 5.015015015015015e-05,
435
  "loss": 0.0586,
436
  "step": 3000
437
+ },
438
+ {
439
+ "epoch": 17.37891737891738,
440
+ "grad_norm": 0.09752911329269409,
441
+ "learning_rate": 4.764764764764765e-05,
442
+ "loss": 0.058,
443
+ "step": 3050
444
+ },
445
+ {
446
+ "epoch": 17.663817663817664,
447
+ "grad_norm": 0.11784069985151291,
448
+ "learning_rate": 4.5145145145145146e-05,
449
+ "loss": 0.0595,
450
+ "step": 3100
451
+ },
452
+ {
453
+ "epoch": 17.94871794871795,
454
+ "grad_norm": 0.461434006690979,
455
+ "learning_rate": 4.264264264264264e-05,
456
+ "loss": 0.061,
457
+ "step": 3150
458
+ },
459
+ {
460
+ "epoch": 18.233618233618234,
461
+ "grad_norm": 0.13216863572597504,
462
+ "learning_rate": 4.014014014014014e-05,
463
+ "loss": 0.0602,
464
+ "step": 3200
465
+ },
466
+ {
467
+ "epoch": 18.51851851851852,
468
+ "grad_norm": 0.078556589782238,
469
+ "learning_rate": 3.763763763763764e-05,
470
+ "loss": 0.058,
471
+ "step": 3250
472
+ },
473
+ {
474
+ "epoch": 18.803418803418804,
475
+ "grad_norm": 0.09973672032356262,
476
+ "learning_rate": 3.513513513513514e-05,
477
+ "loss": 0.0578,
478
+ "step": 3300
479
+ },
480
+ {
481
+ "epoch": 19.08831908831909,
482
+ "grad_norm": 0.08268678188323975,
483
+ "learning_rate": 3.263263263263263e-05,
484
+ "loss": 0.0599,
485
+ "step": 3350
486
+ },
487
+ {
488
+ "epoch": 19.373219373219374,
489
+ "grad_norm": 0.12324529886245728,
490
+ "learning_rate": 3.013013013013013e-05,
491
+ "loss": 0.059,
492
+ "step": 3400
493
+ },
494
+ {
495
+ "epoch": 19.65811965811966,
496
+ "grad_norm": 0.11340611428022385,
497
+ "learning_rate": 2.762762762762763e-05,
498
+ "loss": 0.0572,
499
+ "step": 3450
500
+ },
501
+ {
502
+ "epoch": 19.943019943019944,
503
+ "grad_norm": 0.08004830777645111,
504
+ "learning_rate": 2.5125125125125126e-05,
505
+ "loss": 0.0597,
506
+ "step": 3500
507
  }
508
  ],
509
  "logging_steps": 50,
 
523
  "attributes": {}
524
  }
525
  },
526
+ "total_flos": 1.0604175892781466e+17,
527
  "train_batch_size": 2,
528
  "trial_name": null,
529
  "trial_params": null