error577 commited on
Commit
e19bf18
·
verified ·
1 Parent(s): 6607d1b

Training in progress, step 550, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4f480d1202a174c70b1202204e7b19ce30e680e9aa677d4a6aa9b51470f4816
3
  size 323014168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d702064a54a83cfe7011e94312c6740c68631ec38198bb112e73283b5b9325d
3
  size 323014168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bed13abc506cd54f99481dbc28a31b45e3993ee70918e050ddaa7b4666bf34a
3
  size 165484738
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2354310a4c79f7892941438ca7a0bf30a918f8b24d806f087ecb08bdf61012e2
3
  size 165484738
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de2a2fb86838d3020d5803839893bd1dcef4db60ee5326a49eb5f9bfb377bf78
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42b5bf42e6137ca7b21462d382307354d023593dbf4c25759316f573752c36b8
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd4448a479fe2c3c13bb81ad3c5c2101e846d955cf940ee0558a49a098dd9051
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d1caf05e3c7d3f6b37ac8a69117422ba2bf5b941e02ad9725bc89f907b4e6ef
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.372147798538208,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
- "epoch": 0.8487163165711861,
5
  "eval_steps": 50,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3595,6 +3595,364 @@
3595
  "eval_samples_per_second": 2.925,
3596
  "eval_steps_per_second": 2.925,
3597
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3598
  }
3599
  ],
3600
  "logging_steps": 1,
@@ -3623,7 +3981,7 @@
3623
  "attributes": {}
3624
  }
3625
  },
3626
- "total_flos": 3.2402043214390886e+17,
3627
  "train_batch_size": 1,
3628
  "trial_name": null,
3629
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.37203362584114075,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-550",
4
+ "epoch": 0.9335879482283047,
5
  "eval_steps": 50,
6
+ "global_step": 550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3595
  "eval_samples_per_second": 2.925,
3596
  "eval_steps_per_second": 2.925,
3597
  "step": 500
3598
+ },
3599
+ {
3600
+ "epoch": 0.8504137492043284,
3601
+ "grad_norm": 0.23917846381664276,
3602
+ "learning_rate": 0.00015190394441942843,
3603
+ "loss": 0.8684,
3604
+ "step": 501
3605
+ },
3606
+ {
3607
+ "epoch": 0.8521111818374708,
3608
+ "grad_norm": 0.27708154916763306,
3609
+ "learning_rate": 0.0001514279750909365,
3610
+ "loss": 1.0004,
3611
+ "step": 502
3612
+ },
3613
+ {
3614
+ "epoch": 0.8538086144706132,
3615
+ "grad_norm": 0.25657930970191956,
3616
+ "learning_rate": 0.00015095199138275128,
3617
+ "loss": 0.7568,
3618
+ "step": 503
3619
+ },
3620
+ {
3621
+ "epoch": 0.8555060471037556,
3622
+ "grad_norm": 0.2313452512025833,
3623
+ "learning_rate": 0.00015047599808802332,
3624
+ "loss": 0.8288,
3625
+ "step": 504
3626
+ },
3627
+ {
3628
+ "epoch": 0.8572034797368979,
3629
+ "grad_norm": 0.2528156042098999,
3630
+ "learning_rate": 0.00015,
3631
+ "loss": 1.011,
3632
+ "step": 505
3633
+ },
3634
+ {
3635
+ "epoch": 0.8589009123700403,
3636
+ "grad_norm": 0.1938907355070114,
3637
+ "learning_rate": 0.00014952400191197665,
3638
+ "loss": 0.5598,
3639
+ "step": 506
3640
+ },
3641
+ {
3642
+ "epoch": 0.8605983450031827,
3643
+ "grad_norm": 0.23120371997356415,
3644
+ "learning_rate": 0.00014904800861724872,
3645
+ "loss": 0.6959,
3646
+ "step": 507
3647
+ },
3648
+ {
3649
+ "epoch": 0.862295777636325,
3650
+ "grad_norm": 0.23072639107704163,
3651
+ "learning_rate": 0.00014857202490906347,
3652
+ "loss": 0.7868,
3653
+ "step": 508
3654
+ },
3655
+ {
3656
+ "epoch": 0.8639932102694674,
3657
+ "grad_norm": 0.21651454269886017,
3658
+ "learning_rate": 0.00014809605558057157,
3659
+ "loss": 0.6725,
3660
+ "step": 509
3661
+ },
3662
+ {
3663
+ "epoch": 0.8656906429026098,
3664
+ "grad_norm": 0.1905306875705719,
3665
+ "learning_rate": 0.0001476201054247788,
3666
+ "loss": 0.5756,
3667
+ "step": 510
3668
+ },
3669
+ {
3670
+ "epoch": 0.8673880755357521,
3671
+ "grad_norm": 0.35889434814453125,
3672
+ "learning_rate": 0.00014714417923449797,
3673
+ "loss": 0.6115,
3674
+ "step": 511
3675
+ },
3676
+ {
3677
+ "epoch": 0.8690855081688945,
3678
+ "grad_norm": 0.19174250960350037,
3679
+ "learning_rate": 0.00014666828180230057,
3680
+ "loss": 0.4859,
3681
+ "step": 512
3682
+ },
3683
+ {
3684
+ "epoch": 0.8707829408020369,
3685
+ "grad_norm": 0.2138870805501938,
3686
+ "learning_rate": 0.0001461924179204684,
3687
+ "loss": 0.6636,
3688
+ "step": 513
3689
+ },
3690
+ {
3691
+ "epoch": 0.8724803734351793,
3692
+ "grad_norm": 0.17262116074562073,
3693
+ "learning_rate": 0.00014571659238094556,
3694
+ "loss": 0.42,
3695
+ "step": 514
3696
+ },
3697
+ {
3698
+ "epoch": 0.8741778060683216,
3699
+ "grad_norm": 0.22339358925819397,
3700
+ "learning_rate": 0.00014524080997528987,
3701
+ "loss": 0.6612,
3702
+ "step": 515
3703
+ },
3704
+ {
3705
+ "epoch": 0.875875238701464,
3706
+ "grad_norm": 0.1979471892118454,
3707
+ "learning_rate": 0.0001447650754946249,
3708
+ "loss": 0.5441,
3709
+ "step": 516
3710
+ },
3711
+ {
3712
+ "epoch": 0.8775726713346064,
3713
+ "grad_norm": 0.20259279012680054,
3714
+ "learning_rate": 0.00014428939372959152,
3715
+ "loss": 0.5254,
3716
+ "step": 517
3717
+ },
3718
+ {
3719
+ "epoch": 0.8792701039677487,
3720
+ "grad_norm": 0.12251409888267517,
3721
+ "learning_rate": 0.0001438137694702999,
3722
+ "loss": 0.2171,
3723
+ "step": 518
3724
+ },
3725
+ {
3726
+ "epoch": 0.8809675366008911,
3727
+ "grad_norm": 0.16714578866958618,
3728
+ "learning_rate": 0.00014333820750628105,
3729
+ "loss": 0.311,
3730
+ "step": 519
3731
+ },
3732
+ {
3733
+ "epoch": 0.8826649692340335,
3734
+ "grad_norm": 0.24203087389469147,
3735
+ "learning_rate": 0.00014286271262643866,
3736
+ "loss": 0.6175,
3737
+ "step": 520
3738
+ },
3739
+ {
3740
+ "epoch": 0.8843624018671759,
3741
+ "grad_norm": 0.1858789026737213,
3742
+ "learning_rate": 0.00014238728961900088,
3743
+ "loss": 0.3565,
3744
+ "step": 521
3745
+ },
3746
+ {
3747
+ "epoch": 0.8860598345003182,
3748
+ "grad_norm": 0.09111540019512177,
3749
+ "learning_rate": 0.00014191194327147212,
3750
+ "loss": 0.1199,
3751
+ "step": 522
3752
+ },
3753
+ {
3754
+ "epoch": 0.8877572671334606,
3755
+ "grad_norm": 0.13533198833465576,
3756
+ "learning_rate": 0.00014143667837058477,
3757
+ "loss": 0.2471,
3758
+ "step": 523
3759
+ },
3760
+ {
3761
+ "epoch": 0.889454699766603,
3762
+ "grad_norm": 0.17338241636753082,
3763
+ "learning_rate": 0.00014096149970225122,
3764
+ "loss": 0.3255,
3765
+ "step": 524
3766
+ },
3767
+ {
3768
+ "epoch": 0.8911521323997453,
3769
+ "grad_norm": 0.05573137849569321,
3770
+ "learning_rate": 0.00014048641205151533,
3771
+ "loss": 0.0455,
3772
+ "step": 525
3773
+ },
3774
+ {
3775
+ "epoch": 0.8928495650328877,
3776
+ "grad_norm": 0.007357200141996145,
3777
+ "learning_rate": 0.0001400114202025044,
3778
+ "loss": 0.0004,
3779
+ "step": 526
3780
+ },
3781
+ {
3782
+ "epoch": 0.8945469976660301,
3783
+ "grad_norm": 0.00043303659185767174,
3784
+ "learning_rate": 0.00013953652893838119,
3785
+ "loss": 0.0,
3786
+ "step": 527
3787
+ },
3788
+ {
3789
+ "epoch": 0.8962444302991726,
3790
+ "grad_norm": 0.02541971206665039,
3791
+ "learning_rate": 0.0001390617430412954,
3792
+ "loss": 0.0028,
3793
+ "step": 528
3794
+ },
3795
+ {
3796
+ "epoch": 0.8979418629323149,
3797
+ "grad_norm": 0.010525004006922245,
3798
+ "learning_rate": 0.0001385870672923357,
3799
+ "loss": 0.0005,
3800
+ "step": 529
3801
+ },
3802
+ {
3803
+ "epoch": 0.8996392955654573,
3804
+ "grad_norm": 0.03903070092201233,
3805
+ "learning_rate": 0.0001381125064714817,
3806
+ "loss": 0.0003,
3807
+ "step": 530
3808
+ },
3809
+ {
3810
+ "epoch": 0.9013367281985997,
3811
+ "grad_norm": 0.010076366364955902,
3812
+ "learning_rate": 0.00013763806535755562,
3813
+ "loss": 0.0002,
3814
+ "step": 531
3815
+ },
3816
+ {
3817
+ "epoch": 0.903034160831742,
3818
+ "grad_norm": 0.0008758578333072364,
3819
+ "learning_rate": 0.00013716374872817407,
3820
+ "loss": 0.0,
3821
+ "step": 532
3822
+ },
3823
+ {
3824
+ "epoch": 0.9047315934648844,
3825
+ "grad_norm": 0.0009034467511810362,
3826
+ "learning_rate": 0.0001366895613597003,
3827
+ "loss": 0.0,
3828
+ "step": 533
3829
+ },
3830
+ {
3831
+ "epoch": 0.9064290260980268,
3832
+ "grad_norm": 0.0004988125874660909,
3833
+ "learning_rate": 0.00013621550802719588,
3834
+ "loss": 0.0,
3835
+ "step": 534
3836
+ },
3837
+ {
3838
+ "epoch": 0.9081264587311692,
3839
+ "grad_norm": 0.012061301618814468,
3840
+ "learning_rate": 0.00013574159350437261,
3841
+ "loss": 0.0006,
3842
+ "step": 535
3843
+ },
3844
+ {
3845
+ "epoch": 0.9098238913643115,
3846
+ "grad_norm": 0.0005069606122560799,
3847
+ "learning_rate": 0.0001352678225635444,
3848
+ "loss": 0.0,
3849
+ "step": 536
3850
+ },
3851
+ {
3852
+ "epoch": 0.9115213239974539,
3853
+ "grad_norm": 0.003097748151049018,
3854
+ "learning_rate": 0.00013479419997557948,
3855
+ "loss": 0.0001,
3856
+ "step": 537
3857
+ },
3858
+ {
3859
+ "epoch": 0.9132187566305963,
3860
+ "grad_norm": 0.010489325039088726,
3861
+ "learning_rate": 0.000134320730509852,
3862
+ "loss": 0.0002,
3863
+ "step": 538
3864
+ },
3865
+ {
3866
+ "epoch": 0.9149161892637386,
3867
+ "grad_norm": 0.00030282657826319337,
3868
+ "learning_rate": 0.00013384741893419415,
3869
+ "loss": 0.0,
3870
+ "step": 539
3871
+ },
3872
+ {
3873
+ "epoch": 0.916613621896881,
3874
+ "grad_norm": 0.0403389073908329,
3875
+ "learning_rate": 0.00013337427001484836,
3876
+ "loss": 0.0005,
3877
+ "step": 540
3878
+ },
3879
+ {
3880
+ "epoch": 0.9183110545300234,
3881
+ "grad_norm": 0.003200069535523653,
3882
+ "learning_rate": 0.0001329012885164189,
3883
+ "loss": 0.0001,
3884
+ "step": 541
3885
+ },
3886
+ {
3887
+ "epoch": 0.9200084871631657,
3888
+ "grad_norm": 0.007805091328918934,
3889
+ "learning_rate": 0.00013242847920182424,
3890
+ "loss": 0.0002,
3891
+ "step": 542
3892
+ },
3893
+ {
3894
+ "epoch": 0.9217059197963081,
3895
+ "grad_norm": 0.004255454055964947,
3896
+ "learning_rate": 0.000131955846832249,
3897
+ "loss": 0.0001,
3898
+ "step": 543
3899
+ },
3900
+ {
3901
+ "epoch": 0.9234033524294505,
3902
+ "grad_norm": 0.0008626742055639625,
3903
+ "learning_rate": 0.00013148339616709577,
3904
+ "loss": 0.0,
3905
+ "step": 544
3906
+ },
3907
+ {
3908
+ "epoch": 0.9251007850625929,
3909
+ "grad_norm": 0.005825830157846212,
3910
+ "learning_rate": 0.00013101113196393758,
3911
+ "loss": 0.0002,
3912
+ "step": 545
3913
+ },
3914
+ {
3915
+ "epoch": 0.9267982176957352,
3916
+ "grad_norm": 0.00038926751585677266,
3917
+ "learning_rate": 0.00013053905897846972,
3918
+ "loss": 0.0,
3919
+ "step": 546
3920
+ },
3921
+ {
3922
+ "epoch": 0.9284956503288776,
3923
+ "grad_norm": 0.025748664513230324,
3924
+ "learning_rate": 0.00013006718196446188,
3925
+ "loss": 0.0007,
3926
+ "step": 547
3927
+ },
3928
+ {
3929
+ "epoch": 0.93019308296202,
3930
+ "grad_norm": 0.000722411903552711,
3931
+ "learning_rate": 0.0001295955056737104,
3932
+ "loss": 0.0,
3933
+ "step": 548
3934
+ },
3935
+ {
3936
+ "epoch": 0.9318905155951623,
3937
+ "grad_norm": 0.013827555812895298,
3938
+ "learning_rate": 0.0001291240348559902,
3939
+ "loss": 0.0003,
3940
+ "step": 549
3941
+ },
3942
+ {
3943
+ "epoch": 0.9335879482283047,
3944
+ "grad_norm": 0.0006142717902548611,
3945
+ "learning_rate": 0.00012865277425900724,
3946
+ "loss": 0.0,
3947
+ "step": 550
3948
+ },
3949
+ {
3950
+ "epoch": 0.9335879482283047,
3951
+ "eval_loss": 0.37203362584114075,
3952
+ "eval_runtime": 65.9103,
3953
+ "eval_samples_per_second": 2.928,
3954
+ "eval_steps_per_second": 2.928,
3955
+ "step": 550
3956
  }
3957
  ],
3958
  "logging_steps": 1,
 
3981
  "attributes": {}
3982
  }
3983
  },
3984
+ "total_flos": 3.562049824924631e+17,
3985
  "train_batch_size": 1,
3986
  "trial_name": null,
3987
  "trial_params": null