somaia02 commited on
Commit
5fc0168
·
1 Parent(s): 19dfcf7

Training in progress, step 5500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:065b0553ba7b625058bbc54e3b3e27c491e8ccc08fd588730cb0ecf41b13d909
3
  size 5323528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afdc7bd9d3bef6c5e894b53ed72a94b99726290b734e0bf561bc5081e613a3a5
3
  size 5323528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07c15daf481efa3729e5a025f1cd3f5b2786cecfaaa7fdaaaf1ae1c82d0daee5
3
  size 10707706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b8dc6e6ab63f8f2ab0a2ef7abcaaafacf3195f20706e2cf750f59052bce8207
3
  size 10707706
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c099333e451994d42ef6a1a4186a67f46e72967ddbe49de900a735186c809291
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab1e40e99fab0633bd87e6ef55b536c62fe829ab788d235ef7008de5c52a1ab6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd435f2d2df02dd69f00b4ffef1b8f08ed9d2c925d3492fd0d5a1484e32202ff
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30332e8503dd44f865572ef943e9da8b0c9c1c0a4084d30212d8b7e0b9a4d2d8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.4176868200302124,
3
- "best_model_checkpoint": "bart_lora_outputs\\checkpoint-5000",
4
- "epoch": 8.156606851549755,
5
  "eval_steps": 100,
6
- "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3407,13 +3407,353 @@
3407
  "eval_samples_per_second": 92.029,
3408
  "eval_steps_per_second": 11.553,
3409
  "step": 5000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3410
  }
3411
  ],
3412
  "logging_steps": 10,
3413
  "max_steps": 6130,
3414
  "num_train_epochs": 10,
3415
  "save_steps": 500,
3416
- "total_flos": 9451155609649152.0,
3417
  "trial_name": null,
3418
  "trial_params": null
3419
  }
 
1
  {
2
+ "best_metric": 0.417066365480423,
3
+ "best_model_checkpoint": "bart_lora_outputs\\checkpoint-5500",
4
+ "epoch": 8.97226753670473,
5
  "eval_steps": 100,
6
+ "global_step": 5500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3407
  "eval_samples_per_second": 92.029,
3408
  "eval_steps_per_second": 11.553,
3409
  "step": 5000
3410
+ },
3411
+ {
3412
+ "epoch": 8.17,
3413
+ "learning_rate": 0.00019893428063943162,
3414
+ "loss": 0.3893,
3415
+ "step": 5010
3416
+ },
3417
+ {
3418
+ "epoch": 8.19,
3419
+ "learning_rate": 0.00019715808170515098,
3420
+ "loss": 0.3948,
3421
+ "step": 5020
3422
+ },
3423
+ {
3424
+ "epoch": 8.21,
3425
+ "learning_rate": 0.00019538188277087035,
3426
+ "loss": 0.3831,
3427
+ "step": 5030
3428
+ },
3429
+ {
3430
+ "epoch": 8.22,
3431
+ "learning_rate": 0.0001936056838365897,
3432
+ "loss": 0.3828,
3433
+ "step": 5040
3434
+ },
3435
+ {
3436
+ "epoch": 8.24,
3437
+ "learning_rate": 0.00019182948490230906,
3438
+ "loss": 0.3986,
3439
+ "step": 5050
3440
+ },
3441
+ {
3442
+ "epoch": 8.25,
3443
+ "learning_rate": 0.00019005328596802842,
3444
+ "loss": 0.3826,
3445
+ "step": 5060
3446
+ },
3447
+ {
3448
+ "epoch": 8.27,
3449
+ "learning_rate": 0.0001882770870337478,
3450
+ "loss": 0.4053,
3451
+ "step": 5070
3452
+ },
3453
+ {
3454
+ "epoch": 8.29,
3455
+ "learning_rate": 0.00018650088809946713,
3456
+ "loss": 0.399,
3457
+ "step": 5080
3458
+ },
3459
+ {
3460
+ "epoch": 8.3,
3461
+ "learning_rate": 0.0001847246891651865,
3462
+ "loss": 0.4096,
3463
+ "step": 5090
3464
+ },
3465
+ {
3466
+ "epoch": 8.32,
3467
+ "learning_rate": 0.00018294849023090586,
3468
+ "loss": 0.3931,
3469
+ "step": 5100
3470
+ },
3471
+ {
3472
+ "epoch": 8.32,
3473
+ "eval_loss": 0.4182140529155731,
3474
+ "eval_runtime": 12.7337,
3475
+ "eval_samples_per_second": 91.961,
3476
+ "eval_steps_per_second": 11.544,
3477
+ "step": 5100
3478
+ },
3479
+ {
3480
+ "epoch": 8.34,
3481
+ "learning_rate": 0.0001811722912966252,
3482
+ "loss": 0.4117,
3483
+ "step": 5110
3484
+ },
3485
+ {
3486
+ "epoch": 8.35,
3487
+ "learning_rate": 0.0001793960923623446,
3488
+ "loss": 0.3754,
3489
+ "step": 5120
3490
+ },
3491
+ {
3492
+ "epoch": 8.37,
3493
+ "learning_rate": 0.00017761989342806396,
3494
+ "loss": 0.3688,
3495
+ "step": 5130
3496
+ },
3497
+ {
3498
+ "epoch": 8.38,
3499
+ "learning_rate": 0.00017584369449378333,
3500
+ "loss": 0.4199,
3501
+ "step": 5140
3502
+ },
3503
+ {
3504
+ "epoch": 8.4,
3505
+ "learning_rate": 0.00017406749555950267,
3506
+ "loss": 0.4011,
3507
+ "step": 5150
3508
+ },
3509
+ {
3510
+ "epoch": 8.42,
3511
+ "learning_rate": 0.00017229129662522203,
3512
+ "loss": 0.4013,
3513
+ "step": 5160
3514
+ },
3515
+ {
3516
+ "epoch": 8.43,
3517
+ "learning_rate": 0.0001705150976909414,
3518
+ "loss": 0.414,
3519
+ "step": 5170
3520
+ },
3521
+ {
3522
+ "epoch": 8.45,
3523
+ "learning_rate": 0.00016873889875666074,
3524
+ "loss": 0.3682,
3525
+ "step": 5180
3526
+ },
3527
+ {
3528
+ "epoch": 8.47,
3529
+ "learning_rate": 0.0001669626998223801,
3530
+ "loss": 0.377,
3531
+ "step": 5190
3532
+ },
3533
+ {
3534
+ "epoch": 8.48,
3535
+ "learning_rate": 0.00016518650088809947,
3536
+ "loss": 0.3904,
3537
+ "step": 5200
3538
+ },
3539
+ {
3540
+ "epoch": 8.48,
3541
+ "eval_loss": 0.41562286019325256,
3542
+ "eval_runtime": 12.9077,
3543
+ "eval_samples_per_second": 90.721,
3544
+ "eval_steps_per_second": 11.389,
3545
+ "step": 5200
3546
+ },
3547
+ {
3548
+ "epoch": 8.5,
3549
+ "learning_rate": 0.00016341030195381884,
3550
+ "loss": 0.3955,
3551
+ "step": 5210
3552
+ },
3553
+ {
3554
+ "epoch": 8.52,
3555
+ "learning_rate": 0.00016163410301953818,
3556
+ "loss": 0.386,
3557
+ "step": 5220
3558
+ },
3559
+ {
3560
+ "epoch": 8.53,
3561
+ "learning_rate": 0.00015985790408525754,
3562
+ "loss": 0.3862,
3563
+ "step": 5230
3564
+ },
3565
+ {
3566
+ "epoch": 8.55,
3567
+ "learning_rate": 0.0001580817051509769,
3568
+ "loss": 0.3979,
3569
+ "step": 5240
3570
+ },
3571
+ {
3572
+ "epoch": 8.56,
3573
+ "learning_rate": 0.00015630550621669628,
3574
+ "loss": 0.381,
3575
+ "step": 5250
3576
+ },
3577
+ {
3578
+ "epoch": 8.58,
3579
+ "learning_rate": 0.00015452930728241561,
3580
+ "loss": 0.4016,
3581
+ "step": 5260
3582
+ },
3583
+ {
3584
+ "epoch": 8.6,
3585
+ "learning_rate": 0.000152753108348135,
3586
+ "loss": 0.4191,
3587
+ "step": 5270
3588
+ },
3589
+ {
3590
+ "epoch": 8.61,
3591
+ "learning_rate": 0.00015097690941385437,
3592
+ "loss": 0.3832,
3593
+ "step": 5280
3594
+ },
3595
+ {
3596
+ "epoch": 8.63,
3597
+ "learning_rate": 0.00014920071047957371,
3598
+ "loss": 0.3934,
3599
+ "step": 5290
3600
+ },
3601
+ {
3602
+ "epoch": 8.65,
3603
+ "learning_rate": 0.00014742451154529308,
3604
+ "loss": 0.3968,
3605
+ "step": 5300
3606
+ },
3607
+ {
3608
+ "epoch": 8.65,
3609
+ "eval_loss": 0.41458660364151,
3610
+ "eval_runtime": 12.7995,
3611
+ "eval_samples_per_second": 91.488,
3612
+ "eval_steps_per_second": 11.485,
3613
+ "step": 5300
3614
+ },
3615
+ {
3616
+ "epoch": 8.66,
3617
+ "learning_rate": 0.00014564831261101245,
3618
+ "loss": 0.409,
3619
+ "step": 5310
3620
+ },
3621
+ {
3622
+ "epoch": 8.68,
3623
+ "learning_rate": 0.0001438721136767318,
3624
+ "loss": 0.3797,
3625
+ "step": 5320
3626
+ },
3627
+ {
3628
+ "epoch": 8.69,
3629
+ "learning_rate": 0.00014209591474245115,
3630
+ "loss": 0.3895,
3631
+ "step": 5330
3632
+ },
3633
+ {
3634
+ "epoch": 8.71,
3635
+ "learning_rate": 0.00014031971580817052,
3636
+ "loss": 0.3722,
3637
+ "step": 5340
3638
+ },
3639
+ {
3640
+ "epoch": 8.73,
3641
+ "learning_rate": 0.00013854351687388988,
3642
+ "loss": 0.4083,
3643
+ "step": 5350
3644
+ },
3645
+ {
3646
+ "epoch": 8.74,
3647
+ "learning_rate": 0.00013676731793960922,
3648
+ "loss": 0.4032,
3649
+ "step": 5360
3650
+ },
3651
+ {
3652
+ "epoch": 8.76,
3653
+ "learning_rate": 0.0001349911190053286,
3654
+ "loss": 0.3985,
3655
+ "step": 5370
3656
+ },
3657
+ {
3658
+ "epoch": 8.78,
3659
+ "learning_rate": 0.00013321492007104796,
3660
+ "loss": 0.3894,
3661
+ "step": 5380
3662
+ },
3663
+ {
3664
+ "epoch": 8.79,
3665
+ "learning_rate": 0.00013143872113676732,
3666
+ "loss": 0.3924,
3667
+ "step": 5390
3668
+ },
3669
+ {
3670
+ "epoch": 8.81,
3671
+ "learning_rate": 0.00012966252220248666,
3672
+ "loss": 0.3677,
3673
+ "step": 5400
3674
+ },
3675
+ {
3676
+ "epoch": 8.81,
3677
+ "eval_loss": 0.41405031085014343,
3678
+ "eval_runtime": 12.6828,
3679
+ "eval_samples_per_second": 92.33,
3680
+ "eval_steps_per_second": 11.591,
3681
+ "step": 5400
3682
+ },
3683
+ {
3684
+ "epoch": 8.83,
3685
+ "learning_rate": 0.00012788632326820603,
3686
+ "loss": 0.3836,
3687
+ "step": 5410
3688
+ },
3689
+ {
3690
+ "epoch": 8.84,
3691
+ "learning_rate": 0.0001261101243339254,
3692
+ "loss": 0.4018,
3693
+ "step": 5420
3694
+ },
3695
+ {
3696
+ "epoch": 8.86,
3697
+ "learning_rate": 0.00012433392539964476,
3698
+ "loss": 0.4,
3699
+ "step": 5430
3700
+ },
3701
+ {
3702
+ "epoch": 8.87,
3703
+ "learning_rate": 0.00012255772646536413,
3704
+ "loss": 0.3869,
3705
+ "step": 5440
3706
+ },
3707
+ {
3708
+ "epoch": 8.89,
3709
+ "learning_rate": 0.00012078152753108348,
3710
+ "loss": 0.4168,
3711
+ "step": 5450
3712
+ },
3713
+ {
3714
+ "epoch": 8.91,
3715
+ "learning_rate": 0.00011900532859680283,
3716
+ "loss": 0.3929,
3717
+ "step": 5460
3718
+ },
3719
+ {
3720
+ "epoch": 8.92,
3721
+ "learning_rate": 0.00011722912966252221,
3722
+ "loss": 0.4153,
3723
+ "step": 5470
3724
+ },
3725
+ {
3726
+ "epoch": 8.94,
3727
+ "learning_rate": 0.00011545293072824157,
3728
+ "loss": 0.4017,
3729
+ "step": 5480
3730
+ },
3731
+ {
3732
+ "epoch": 8.96,
3733
+ "learning_rate": 0.00011367673179396093,
3734
+ "loss": 0.3844,
3735
+ "step": 5490
3736
+ },
3737
+ {
3738
+ "epoch": 8.97,
3739
+ "learning_rate": 0.00011190053285968029,
3740
+ "loss": 0.3823,
3741
+ "step": 5500
3742
+ },
3743
+ {
3744
+ "epoch": 8.97,
3745
+ "eval_loss": 0.417066365480423,
3746
+ "eval_runtime": 12.7641,
3747
+ "eval_samples_per_second": 91.742,
3748
+ "eval_steps_per_second": 11.517,
3749
+ "step": 5500
3750
  }
3751
  ],
3752
  "logging_steps": 10,
3753
  "max_steps": 6130,
3754
  "num_train_epochs": 10,
3755
  "save_steps": 500,
3756
+ "total_flos": 1.039214421393408e+16,
3757
  "trial_name": null,
3758
  "trial_params": null
3759
  }