Pranay17 commited on
Commit
5e1811a
·
verified ·
1 Parent(s): 754202f

Training in progress, step 3500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77e3105355c2df4b040acd4c2944a96c6e4176ce252181e7699f9ea948f127e2
3
  size 42002584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc6487ab00d7819edaaf1f49a88363d57abccd183ee61af4353c764f4f6f23f5
3
  size 42002584
last-checkpoint/global_step3500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a260abc6e640dd8e3daa5a349ce4b73fe05ceb36c58ed9ea47b4c296d54fc85
3
+ size 251710672
last-checkpoint/global_step3500/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e1f4f50743d9108416e8549518fd567fec0053de9a0d77241fa590e4176271a
3
+ size 47955328
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step3000
 
1
+ global_step3500
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b552361bcc18d8148b831b255d9e181d4beeb2d1ba3aee8779f232860eb665c7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68c17378dafa1374ee684c365b3aeb8454074c8a8957207088418a6e24a4f102
3
  size 14244
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 17.094017094017094,
5
  "eval_steps": 1000,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -434,6 +434,76 @@
434
  "learning_rate": 5.015015015015015e-05,
435
  "loss": 0.0534,
436
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
  }
438
  ],
439
  "logging_steps": 50,
@@ -453,7 +523,7 @@
453
  "attributes": {}
454
  }
455
  },
456
- "total_flos": 8.179327598749286e+16,
457
  "train_batch_size": 2,
458
  "trial_name": null,
459
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 19.943019943019944,
5
  "eval_steps": 1000,
6
+ "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
434
  "learning_rate": 5.015015015015015e-05,
435
  "loss": 0.0534,
436
  "step": 3000
437
+ },
438
+ {
439
+ "epoch": 17.37891737891738,
440
+ "grad_norm": 0.06696134060621262,
441
+ "learning_rate": 4.764764764764765e-05,
442
+ "loss": 0.0534,
443
+ "step": 3050
444
+ },
445
+ {
446
+ "epoch": 17.663817663817664,
447
+ "grad_norm": 0.07864591479301453,
448
+ "learning_rate": 4.5145145145145146e-05,
449
+ "loss": 0.0543,
450
+ "step": 3100
451
+ },
452
+ {
453
+ "epoch": 17.94871794871795,
454
+ "grad_norm": 0.04788205400109291,
455
+ "learning_rate": 4.264264264264264e-05,
456
+ "loss": 0.055,
457
+ "step": 3150
458
+ },
459
+ {
460
+ "epoch": 18.233618233618234,
461
+ "grad_norm": 0.08312636613845825,
462
+ "learning_rate": 4.014014014014014e-05,
463
+ "loss": 0.0556,
464
+ "step": 3200
465
+ },
466
+ {
467
+ "epoch": 18.51851851851852,
468
+ "grad_norm": 0.050425197929143906,
469
+ "learning_rate": 3.763763763763764e-05,
470
+ "loss": 0.0531,
471
+ "step": 3250
472
+ },
473
+ {
474
+ "epoch": 18.803418803418804,
475
+ "grad_norm": 0.06965469568967819,
476
+ "learning_rate": 3.513513513513514e-05,
477
+ "loss": 0.0533,
478
+ "step": 3300
479
+ },
480
+ {
481
+ "epoch": 19.08831908831909,
482
+ "grad_norm": 0.05819595977663994,
483
+ "learning_rate": 3.263263263263263e-05,
484
+ "loss": 0.0554,
485
+ "step": 3350
486
+ },
487
+ {
488
+ "epoch": 19.373219373219374,
489
+ "grad_norm": 0.06934893876314163,
490
+ "learning_rate": 3.013013013013013e-05,
491
+ "loss": 0.0545,
492
+ "step": 3400
493
+ },
494
+ {
495
+ "epoch": 19.65811965811966,
496
+ "grad_norm": 0.0720212310552597,
497
+ "learning_rate": 2.762762762762763e-05,
498
+ "loss": 0.0524,
499
+ "step": 3450
500
+ },
501
+ {
502
+ "epoch": 19.943019943019944,
503
+ "grad_norm": 0.04772196337580681,
504
+ "learning_rate": 2.5125125125125126e-05,
505
+ "loss": 0.0553,
506
+ "step": 3500
507
  }
508
  ],
509
  "logging_steps": 50,
 
523
  "attributes": {}
524
  }
525
  },
526
+ "total_flos": 9.530782880523878e+16,
527
  "train_batch_size": 2,
528
  "trial_name": null,
529
  "trial_params": null