romainnn commited on
Commit
89eed8f
·
verified ·
1 Parent(s): 59b6cd7

Training in progress, step 3700, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25667b8b40bceb551d651eda441384e7d4c360afba846c324182c13f74489016
3
  size 35237104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ee66a361296b6ddade4373f90ebf70dfec210f4c2cc8a673761b669a0a19a29
3
  size 35237104
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5378e1331e1884f4cb3cc4ad37a630956509cbb51a098d4df96e0875aebdcdd7
3
  size 18810356
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b69ade3d8143fa53df3c44e13e0e86f751c9d91f48e8ba8422b235fd7a4ef95c
3
  size 18810356
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db293ecacfa0c706e71b97c67145783b75c1d2229e10f0604ff699399c8208fc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:525e871b8bfa9d0c55029d3a5724dab788324d84396021519791e25b39fc6797
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:842fa94d5f2db38489ef77d690d25cef5f836e9e0bda0fd5e5503ed4a3ee9dd0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:110477147f82823f2afe8f6b04f642e31b0df79e35f16b64a881cf01711c33d2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 2.8474695682525635,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-3600",
4
- "epoch": 1.0019132431210214,
5
  "eval_steps": 100,
6
- "global_step": 3600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -25503,6 +25503,714 @@
25503
  "eval_samples_per_second": 59.111,
25504
  "eval_steps_per_second": 14.778,
25505
  "step": 3600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25506
  }
25507
  ],
25508
  "logging_steps": 1,
@@ -25517,7 +26225,7 @@
25517
  "early_stopping_threshold": 0.0
25518
  },
25519
  "attributes": {
25520
- "early_stopping_patience_counter": 0
25521
  }
25522
  },
25523
  "TrainerControl": {
@@ -25531,7 +26239,7 @@
25531
  "attributes": {}
25532
  }
25533
  },
25534
- "total_flos": 2.59543548297216e+17,
25535
  "train_batch_size": 4,
25536
  "trial_name": null,
25537
  "trial_params": null
 
1
  {
2
  "best_metric": 2.8474695682525635,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-3600",
4
+ "epoch": 1.0297422339722406,
5
  "eval_steps": 100,
6
+ "global_step": 3700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
25503
  "eval_samples_per_second": 59.111,
25504
  "eval_steps_per_second": 14.778,
25505
  "step": 3600
25506
+ },
25507
+ {
25508
+ "epoch": 1.0021915330295335,
25509
+ "grad_norm": 2.4727015495300293,
25510
+ "learning_rate": 9.989056736705608e-05,
25511
+ "loss": 2.7356,
25512
+ "step": 3601
25513
+ },
25514
+ {
25515
+ "epoch": 1.0024698229380458,
25516
+ "grad_norm": 2.1826629638671875,
25517
+ "learning_rate": 9.984679434323399e-05,
25518
+ "loss": 2.4974,
25519
+ "step": 3602
25520
+ },
25521
+ {
25522
+ "epoch": 1.0027481128465578,
25523
+ "grad_norm": 2.0469789505004883,
25524
+ "learning_rate": 9.98030213487673e-05,
25525
+ "loss": 2.3743,
25526
+ "step": 3603
25527
+ },
25528
+ {
25529
+ "epoch": 1.0030264027550702,
25530
+ "grad_norm": 2.1204702854156494,
25531
+ "learning_rate": 9.975924839204333e-05,
25532
+ "loss": 2.3914,
25533
+ "step": 3604
25534
+ },
25535
+ {
25536
+ "epoch": 1.0033046926635822,
25537
+ "grad_norm": 2.30515456199646,
25538
+ "learning_rate": 9.971547548144934e-05,
25539
+ "loss": 2.7669,
25540
+ "step": 3605
25541
+ },
25542
+ {
25543
+ "epoch": 1.0035829825720946,
25544
+ "grad_norm": 2.2537689208984375,
25545
+ "learning_rate": 9.96717026253725e-05,
25546
+ "loss": 2.2713,
25547
+ "step": 3606
25548
+ },
25549
+ {
25550
+ "epoch": 1.0038612724806066,
25551
+ "grad_norm": 2.1024792194366455,
25552
+ "learning_rate": 9.962792983220014e-05,
25553
+ "loss": 2.6992,
25554
+ "step": 3607
25555
+ },
25556
+ {
25557
+ "epoch": 1.004139562389119,
25558
+ "grad_norm": 2.6211812496185303,
25559
+ "learning_rate": 9.958415711031944e-05,
25560
+ "loss": 2.553,
25561
+ "step": 3608
25562
+ },
25563
+ {
25564
+ "epoch": 1.004417852297631,
25565
+ "grad_norm": 2.4872612953186035,
25566
+ "learning_rate": 9.954038446811755e-05,
25567
+ "loss": 2.6366,
25568
+ "step": 3609
25569
+ },
25570
+ {
25571
+ "epoch": 1.0046961422061433,
25572
+ "grad_norm": 2.327535629272461,
25573
+ "learning_rate": 9.949661191398175e-05,
25574
+ "loss": 2.7231,
25575
+ "step": 3610
25576
+ },
25577
+ {
25578
+ "epoch": 1.0049744321146554,
25579
+ "grad_norm": 2.1828579902648926,
25580
+ "learning_rate": 9.94528394562992e-05,
25581
+ "loss": 2.4099,
25582
+ "step": 3611
25583
+ },
25584
+ {
25585
+ "epoch": 1.0052527220231677,
25586
+ "grad_norm": 2.1962826251983643,
25587
+ "learning_rate": 9.940906710345698e-05,
25588
+ "loss": 2.4376,
25589
+ "step": 3612
25590
+ },
25591
+ {
25592
+ "epoch": 1.0055310119316798,
25593
+ "grad_norm": 2.220290422439575,
25594
+ "learning_rate": 9.936529486384234e-05,
25595
+ "loss": 2.3303,
25596
+ "step": 3613
25597
+ },
25598
+ {
25599
+ "epoch": 1.0058093018401921,
25600
+ "grad_norm": 2.429093599319458,
25601
+ "learning_rate": 9.932152274584232e-05,
25602
+ "loss": 2.5576,
25603
+ "step": 3614
25604
+ },
25605
+ {
25606
+ "epoch": 1.0060875917487042,
25607
+ "grad_norm": 2.545867681503296,
25608
+ "learning_rate": 9.927775075784403e-05,
25609
+ "loss": 2.5247,
25610
+ "step": 3615
25611
+ },
25612
+ {
25613
+ "epoch": 1.0063658816572163,
25614
+ "grad_norm": 2.177238702774048,
25615
+ "learning_rate": 9.923397890823453e-05,
25616
+ "loss": 2.2422,
25617
+ "step": 3616
25618
+ },
25619
+ {
25620
+ "epoch": 1.0066441715657286,
25621
+ "grad_norm": 2.516214370727539,
25622
+ "learning_rate": 9.91902072054009e-05,
25623
+ "loss": 2.6979,
25624
+ "step": 3617
25625
+ },
25626
+ {
25627
+ "epoch": 1.0069224614742407,
25628
+ "grad_norm": 2.583171844482422,
25629
+ "learning_rate": 9.914643565773008e-05,
25630
+ "loss": 2.4948,
25631
+ "step": 3618
25632
+ },
25633
+ {
25634
+ "epoch": 1.007200751382753,
25635
+ "grad_norm": 2.1928822994232178,
25636
+ "learning_rate": 9.910266427360913e-05,
25637
+ "loss": 2.4619,
25638
+ "step": 3619
25639
+ },
25640
+ {
25641
+ "epoch": 1.007479041291265,
25642
+ "grad_norm": 2.3109724521636963,
25643
+ "learning_rate": 9.905889306142497e-05,
25644
+ "loss": 2.4164,
25645
+ "step": 3620
25646
+ },
25647
+ {
25648
+ "epoch": 1.0077573311997774,
25649
+ "grad_norm": 2.3388402462005615,
25650
+ "learning_rate": 9.901512202956447e-05,
25651
+ "loss": 2.5941,
25652
+ "step": 3621
25653
+ },
25654
+ {
25655
+ "epoch": 1.0080356211082895,
25656
+ "grad_norm": 2.36480450630188,
25657
+ "learning_rate": 9.89713511864146e-05,
25658
+ "loss": 2.5486,
25659
+ "step": 3622
25660
+ },
25661
+ {
25662
+ "epoch": 1.0083139110168018,
25663
+ "grad_norm": 2.3684072494506836,
25664
+ "learning_rate": 9.892758054036216e-05,
25665
+ "loss": 2.6478,
25666
+ "step": 3623
25667
+ },
25668
+ {
25669
+ "epoch": 1.0085922009253139,
25670
+ "grad_norm": 2.2883782386779785,
25671
+ "learning_rate": 9.888381009979394e-05,
25672
+ "loss": 2.3874,
25673
+ "step": 3624
25674
+ },
25675
+ {
25676
+ "epoch": 1.0088704908338262,
25677
+ "grad_norm": 2.400454521179199,
25678
+ "learning_rate": 9.884003987309676e-05,
25679
+ "loss": 2.6688,
25680
+ "step": 3625
25681
+ },
25682
+ {
25683
+ "epoch": 1.0091487807423383,
25684
+ "grad_norm": 2.411891222000122,
25685
+ "learning_rate": 9.879626986865735e-05,
25686
+ "loss": 2.8327,
25687
+ "step": 3626
25688
+ },
25689
+ {
25690
+ "epoch": 1.0094270706508506,
25691
+ "grad_norm": 2.746245861053467,
25692
+ "learning_rate": 9.875250009486232e-05,
25693
+ "loss": 2.7147,
25694
+ "step": 3627
25695
+ },
25696
+ {
25697
+ "epoch": 1.0097053605593627,
25698
+ "grad_norm": 2.195439577102661,
25699
+ "learning_rate": 9.870873056009841e-05,
25700
+ "loss": 2.2958,
25701
+ "step": 3628
25702
+ },
25703
+ {
25704
+ "epoch": 1.009983650467875,
25705
+ "grad_norm": 2.4186742305755615,
25706
+ "learning_rate": 9.866496127275216e-05,
25707
+ "loss": 2.3944,
25708
+ "step": 3629
25709
+ },
25710
+ {
25711
+ "epoch": 1.010261940376387,
25712
+ "grad_norm": 2.4659276008605957,
25713
+ "learning_rate": 9.862119224121011e-05,
25714
+ "loss": 2.8833,
25715
+ "step": 3630
25716
+ },
25717
+ {
25718
+ "epoch": 1.0105402302848994,
25719
+ "grad_norm": 3.256303071975708,
25720
+ "learning_rate": 9.85774234738588e-05,
25721
+ "loss": 2.5439,
25722
+ "step": 3631
25723
+ },
25724
+ {
25725
+ "epoch": 1.0108185201934115,
25726
+ "grad_norm": 2.459998369216919,
25727
+ "learning_rate": 9.85336549790847e-05,
25728
+ "loss": 2.4565,
25729
+ "step": 3632
25730
+ },
25731
+ {
25732
+ "epoch": 1.0110968101019238,
25733
+ "grad_norm": 2.466240167617798,
25734
+ "learning_rate": 9.848988676527411e-05,
25735
+ "loss": 2.4005,
25736
+ "step": 3633
25737
+ },
25738
+ {
25739
+ "epoch": 1.0113751000104358,
25740
+ "grad_norm": 2.684922218322754,
25741
+ "learning_rate": 9.844611884081348e-05,
25742
+ "loss": 2.4699,
25743
+ "step": 3634
25744
+ },
25745
+ {
25746
+ "epoch": 1.0116533899189482,
25747
+ "grad_norm": 2.9135866165161133,
25748
+ "learning_rate": 9.840235121408902e-05,
25749
+ "loss": 3.0818,
25750
+ "step": 3635
25751
+ },
25752
+ {
25753
+ "epoch": 1.0119316798274602,
25754
+ "grad_norm": 2.5571932792663574,
25755
+ "learning_rate": 9.835858389348701e-05,
25756
+ "loss": 2.6709,
25757
+ "step": 3636
25758
+ },
25759
+ {
25760
+ "epoch": 1.0122099697359725,
25761
+ "grad_norm": 2.159505605697632,
25762
+ "learning_rate": 9.831481688739362e-05,
25763
+ "loss": 2.3602,
25764
+ "step": 3637
25765
+ },
25766
+ {
25767
+ "epoch": 1.0124882596444846,
25768
+ "grad_norm": 2.178410291671753,
25769
+ "learning_rate": 9.827105020419494e-05,
25770
+ "loss": 2.3776,
25771
+ "step": 3638
25772
+ },
25773
+ {
25774
+ "epoch": 1.012766549552997,
25775
+ "grad_norm": 2.4218437671661377,
25776
+ "learning_rate": 9.8227283852277e-05,
25777
+ "loss": 2.1694,
25778
+ "step": 3639
25779
+ },
25780
+ {
25781
+ "epoch": 1.013044839461509,
25782
+ "grad_norm": 3.1513845920562744,
25783
+ "learning_rate": 9.818351784002586e-05,
25784
+ "loss": 2.9845,
25785
+ "step": 3640
25786
+ },
25787
+ {
25788
+ "epoch": 1.0133231293700211,
25789
+ "grad_norm": 2.8680331707000732,
25790
+ "learning_rate": 9.813975217582739e-05,
25791
+ "loss": 2.5105,
25792
+ "step": 3641
25793
+ },
25794
+ {
25795
+ "epoch": 1.0136014192785334,
25796
+ "grad_norm": 2.3797199726104736,
25797
+ "learning_rate": 9.809598686806746e-05,
25798
+ "loss": 2.4647,
25799
+ "step": 3642
25800
+ },
25801
+ {
25802
+ "epoch": 1.0138797091870455,
25803
+ "grad_norm": 2.403562068939209,
25804
+ "learning_rate": 9.805222192513184e-05,
25805
+ "loss": 2.6245,
25806
+ "step": 3643
25807
+ },
25808
+ {
25809
+ "epoch": 1.0141579990955578,
25810
+ "grad_norm": 2.2409002780914307,
25811
+ "learning_rate": 9.800845735540627e-05,
25812
+ "loss": 2.393,
25813
+ "step": 3644
25814
+ },
25815
+ {
25816
+ "epoch": 1.01443628900407,
25817
+ "grad_norm": 2.2305660247802734,
25818
+ "learning_rate": 9.796469316727641e-05,
25819
+ "loss": 2.5389,
25820
+ "step": 3645
25821
+ },
25822
+ {
25823
+ "epoch": 1.0147145789125822,
25824
+ "grad_norm": 2.433889389038086,
25825
+ "learning_rate": 9.792092936912777e-05,
25826
+ "loss": 2.7513,
25827
+ "step": 3646
25828
+ },
25829
+ {
25830
+ "epoch": 1.0149928688210943,
25831
+ "grad_norm": 2.397838830947876,
25832
+ "learning_rate": 9.78771659693459e-05,
25833
+ "loss": 2.7513,
25834
+ "step": 3647
25835
+ },
25836
+ {
25837
+ "epoch": 1.0152711587296066,
25838
+ "grad_norm": 2.5803353786468506,
25839
+ "learning_rate": 9.783340297631623e-05,
25840
+ "loss": 2.5487,
25841
+ "step": 3648
25842
+ },
25843
+ {
25844
+ "epoch": 1.0155494486381187,
25845
+ "grad_norm": 2.5709424018859863,
25846
+ "learning_rate": 9.778964039842404e-05,
25847
+ "loss": 2.4367,
25848
+ "step": 3649
25849
+ },
25850
+ {
25851
+ "epoch": 1.015827738546631,
25852
+ "grad_norm": 2.5421626567840576,
25853
+ "learning_rate": 9.774587824405466e-05,
25854
+ "loss": 3.0246,
25855
+ "step": 3650
25856
+ },
25857
+ {
25858
+ "epoch": 1.016106028455143,
25859
+ "grad_norm": 2.4680778980255127,
25860
+ "learning_rate": 9.770211652159327e-05,
25861
+ "loss": 2.4571,
25862
+ "step": 3651
25863
+ },
25864
+ {
25865
+ "epoch": 1.0163843183636554,
25866
+ "grad_norm": 2.4402754306793213,
25867
+ "learning_rate": 9.76583552394249e-05,
25868
+ "loss": 2.3657,
25869
+ "step": 3652
25870
+ },
25871
+ {
25872
+ "epoch": 1.0166626082721675,
25873
+ "grad_norm": 2.582365036010742,
25874
+ "learning_rate": 9.761459440593466e-05,
25875
+ "loss": 2.6948,
25876
+ "step": 3653
25877
+ },
25878
+ {
25879
+ "epoch": 1.0169408981806798,
25880
+ "grad_norm": 2.3168914318084717,
25881
+ "learning_rate": 9.757083402950742e-05,
25882
+ "loss": 2.3346,
25883
+ "step": 3654
25884
+ },
25885
+ {
25886
+ "epoch": 1.0172191880891919,
25887
+ "grad_norm": 2.390580654144287,
25888
+ "learning_rate": 9.752707411852802e-05,
25889
+ "loss": 2.7386,
25890
+ "step": 3655
25891
+ },
25892
+ {
25893
+ "epoch": 1.0174974779977042,
25894
+ "grad_norm": 2.624013900756836,
25895
+ "learning_rate": 9.748331468138124e-05,
25896
+ "loss": 2.4795,
25897
+ "step": 3656
25898
+ },
25899
+ {
25900
+ "epoch": 1.0177757679062163,
25901
+ "grad_norm": 2.4292471408843994,
25902
+ "learning_rate": 9.743955572645174e-05,
25903
+ "loss": 2.7798,
25904
+ "step": 3657
25905
+ },
25906
+ {
25907
+ "epoch": 1.0180540578147286,
25908
+ "grad_norm": 2.503296136856079,
25909
+ "learning_rate": 9.739579726212406e-05,
25910
+ "loss": 2.6261,
25911
+ "step": 3658
25912
+ },
25913
+ {
25914
+ "epoch": 1.0183323477232407,
25915
+ "grad_norm": 2.2126271724700928,
25916
+ "learning_rate": 9.735203929678272e-05,
25917
+ "loss": 2.2868,
25918
+ "step": 3659
25919
+ },
25920
+ {
25921
+ "epoch": 1.018610637631753,
25922
+ "grad_norm": 2.1973748207092285,
25923
+ "learning_rate": 9.730828183881208e-05,
25924
+ "loss": 2.4527,
25925
+ "step": 3660
25926
+ },
25927
+ {
25928
+ "epoch": 1.018888927540265,
25929
+ "grad_norm": 2.435417652130127,
25930
+ "learning_rate": 9.726452489659638e-05,
25931
+ "loss": 2.5638,
25932
+ "step": 3661
25933
+ },
25934
+ {
25935
+ "epoch": 1.0191672174487774,
25936
+ "grad_norm": 2.363333225250244,
25937
+ "learning_rate": 9.722076847851988e-05,
25938
+ "loss": 2.4653,
25939
+ "step": 3662
25940
+ },
25941
+ {
25942
+ "epoch": 1.0194455073572894,
25943
+ "grad_norm": 2.7161386013031006,
25944
+ "learning_rate": 9.717701259296665e-05,
25945
+ "loss": 2.4748,
25946
+ "step": 3663
25947
+ },
25948
+ {
25949
+ "epoch": 1.0197237972658018,
25950
+ "grad_norm": 2.3049778938293457,
25951
+ "learning_rate": 9.713325724832059e-05,
25952
+ "loss": 2.5271,
25953
+ "step": 3664
25954
+ },
25955
+ {
25956
+ "epoch": 1.0200020871743138,
25957
+ "grad_norm": 2.264054775238037,
25958
+ "learning_rate": 9.708950245296569e-05,
25959
+ "loss": 2.4146,
25960
+ "step": 3665
25961
+ },
25962
+ {
25963
+ "epoch": 1.020280377082826,
25964
+ "grad_norm": 2.786287784576416,
25965
+ "learning_rate": 9.704574821528566e-05,
25966
+ "loss": 2.6383,
25967
+ "step": 3666
25968
+ },
25969
+ {
25970
+ "epoch": 1.0205586669913382,
25971
+ "grad_norm": 2.3080055713653564,
25972
+ "learning_rate": 9.700199454366415e-05,
25973
+ "loss": 2.5227,
25974
+ "step": 3667
25975
+ },
25976
+ {
25977
+ "epoch": 1.0208369568998503,
25978
+ "grad_norm": 2.4426419734954834,
25979
+ "learning_rate": 9.695824144648478e-05,
25980
+ "loss": 2.4908,
25981
+ "step": 3668
25982
+ },
25983
+ {
25984
+ "epoch": 1.0211152468083626,
25985
+ "grad_norm": 2.478846788406372,
25986
+ "learning_rate": 9.691448893213095e-05,
25987
+ "loss": 2.5778,
25988
+ "step": 3669
25989
+ },
25990
+ {
25991
+ "epoch": 1.0213935367168747,
25992
+ "grad_norm": 2.3766379356384277,
25993
+ "learning_rate": 9.687073700898598e-05,
25994
+ "loss": 2.5546,
25995
+ "step": 3670
25996
+ },
25997
+ {
25998
+ "epoch": 1.021671826625387,
25999
+ "grad_norm": 2.3187801837921143,
26000
+ "learning_rate": 9.682698568543317e-05,
26001
+ "loss": 2.5168,
26002
+ "step": 3671
26003
+ },
26004
+ {
26005
+ "epoch": 1.0219501165338991,
26006
+ "grad_norm": 2.4878695011138916,
26007
+ "learning_rate": 9.678323496985557e-05,
26008
+ "loss": 2.7055,
26009
+ "step": 3672
26010
+ },
26011
+ {
26012
+ "epoch": 1.0222284064424114,
26013
+ "grad_norm": 2.6090481281280518,
26014
+ "learning_rate": 9.673948487063614e-05,
26015
+ "loss": 2.6359,
26016
+ "step": 3673
26017
+ },
26018
+ {
26019
+ "epoch": 1.0225066963509235,
26020
+ "grad_norm": 2.3619978427886963,
26021
+ "learning_rate": 9.669573539615782e-05,
26022
+ "loss": 2.4851,
26023
+ "step": 3674
26024
+ },
26025
+ {
26026
+ "epoch": 1.0227849862594358,
26027
+ "grad_norm": 2.271355390548706,
26028
+ "learning_rate": 9.665198655480334e-05,
26029
+ "loss": 2.7666,
26030
+ "step": 3675
26031
+ },
26032
+ {
26033
+ "epoch": 1.023063276167948,
26034
+ "grad_norm": 2.36423659324646,
26035
+ "learning_rate": 9.660823835495531e-05,
26036
+ "loss": 2.3988,
26037
+ "step": 3676
26038
+ },
26039
+ {
26040
+ "epoch": 1.0233415660764602,
26041
+ "grad_norm": 2.4783449172973633,
26042
+ "learning_rate": 9.656449080499627e-05,
26043
+ "loss": 2.4864,
26044
+ "step": 3677
26045
+ },
26046
+ {
26047
+ "epoch": 1.0236198559849723,
26048
+ "grad_norm": 2.3957090377807617,
26049
+ "learning_rate": 9.65207439133086e-05,
26050
+ "loss": 2.3701,
26051
+ "step": 3678
26052
+ },
26053
+ {
26054
+ "epoch": 1.0238981458934846,
26055
+ "grad_norm": 2.900683879852295,
26056
+ "learning_rate": 9.64769976882745e-05,
26057
+ "loss": 2.5155,
26058
+ "step": 3679
26059
+ },
26060
+ {
26061
+ "epoch": 1.0241764358019967,
26062
+ "grad_norm": 2.5048580169677734,
26063
+ "learning_rate": 9.643325213827619e-05,
26064
+ "loss": 2.5991,
26065
+ "step": 3680
26066
+ },
26067
+ {
26068
+ "epoch": 1.024454725710509,
26069
+ "grad_norm": 2.4622344970703125,
26070
+ "learning_rate": 9.638950727169564e-05,
26071
+ "loss": 2.5138,
26072
+ "step": 3681
26073
+ },
26074
+ {
26075
+ "epoch": 1.024733015619021,
26076
+ "grad_norm": 2.480517625808716,
26077
+ "learning_rate": 9.63457630969147e-05,
26078
+ "loss": 2.6391,
26079
+ "step": 3682
26080
+ },
26081
+ {
26082
+ "epoch": 1.0250113055275334,
26083
+ "grad_norm": 2.688997268676758,
26084
+ "learning_rate": 9.630201962231511e-05,
26085
+ "loss": 2.3735,
26086
+ "step": 3683
26087
+ },
26088
+ {
26089
+ "epoch": 1.0252895954360455,
26090
+ "grad_norm": 2.426302909851074,
26091
+ "learning_rate": 9.62582768562785e-05,
26092
+ "loss": 2.2221,
26093
+ "step": 3684
26094
+ },
26095
+ {
26096
+ "epoch": 1.0255678853445578,
26097
+ "grad_norm": 2.492079496383667,
26098
+ "learning_rate": 9.621453480718634e-05,
26099
+ "loss": 2.9364,
26100
+ "step": 3685
26101
+ },
26102
+ {
26103
+ "epoch": 1.0258461752530699,
26104
+ "grad_norm": 2.756873846054077,
26105
+ "learning_rate": 9.61707934834199e-05,
26106
+ "loss": 2.6593,
26107
+ "step": 3686
26108
+ },
26109
+ {
26110
+ "epoch": 1.0261244651615822,
26111
+ "grad_norm": 2.2915287017822266,
26112
+ "learning_rate": 9.612705289336045e-05,
26113
+ "loss": 2.3577,
26114
+ "step": 3687
26115
+ },
26116
+ {
26117
+ "epoch": 1.0264027550700943,
26118
+ "grad_norm": 2.240419387817383,
26119
+ "learning_rate": 9.6083313045389e-05,
26120
+ "loss": 2.4756,
26121
+ "step": 3688
26122
+ },
26123
+ {
26124
+ "epoch": 1.0266810449786066,
26125
+ "grad_norm": 2.4698326587677,
26126
+ "learning_rate": 9.603957394788644e-05,
26127
+ "loss": 2.5557,
26128
+ "step": 3689
26129
+ },
26130
+ {
26131
+ "epoch": 1.0269593348871187,
26132
+ "grad_norm": 2.5737481117248535,
26133
+ "learning_rate": 9.599583560923359e-05,
26134
+ "loss": 2.6166,
26135
+ "step": 3690
26136
+ },
26137
+ {
26138
+ "epoch": 1.0272376247956307,
26139
+ "grad_norm": 2.226649761199951,
26140
+ "learning_rate": 9.595209803781102e-05,
26141
+ "loss": 2.4705,
26142
+ "step": 3691
26143
+ },
26144
+ {
26145
+ "epoch": 1.027515914704143,
26146
+ "grad_norm": 2.4698619842529297,
26147
+ "learning_rate": 9.590836124199921e-05,
26148
+ "loss": 2.4206,
26149
+ "step": 3692
26150
+ },
26151
+ {
26152
+ "epoch": 1.0277942046126551,
26153
+ "grad_norm": 2.4154977798461914,
26154
+ "learning_rate": 9.586462523017852e-05,
26155
+ "loss": 2.6235,
26156
+ "step": 3693
26157
+ },
26158
+ {
26159
+ "epoch": 1.0280724945211674,
26160
+ "grad_norm": 2.2395994663238525,
26161
+ "learning_rate": 9.58208900107291e-05,
26162
+ "loss": 2.5879,
26163
+ "step": 3694
26164
+ },
26165
+ {
26166
+ "epoch": 1.0283507844296795,
26167
+ "grad_norm": 2.263139486312866,
26168
+ "learning_rate": 9.577715559203095e-05,
26169
+ "loss": 2.794,
26170
+ "step": 3695
26171
+ },
26172
+ {
26173
+ "epoch": 1.0286290743381918,
26174
+ "grad_norm": 2.4009816646575928,
26175
+ "learning_rate": 9.573342198246397e-05,
26176
+ "loss": 2.8207,
26177
+ "step": 3696
26178
+ },
26179
+ {
26180
+ "epoch": 1.028907364246704,
26181
+ "grad_norm": 2.4740641117095947,
26182
+ "learning_rate": 9.568968919040787e-05,
26183
+ "loss": 2.7345,
26184
+ "step": 3697
26185
+ },
26186
+ {
26187
+ "epoch": 1.0291856541552162,
26188
+ "grad_norm": 2.556521415710449,
26189
+ "learning_rate": 9.564595722424217e-05,
26190
+ "loss": 2.6085,
26191
+ "step": 3698
26192
+ },
26193
+ {
26194
+ "epoch": 1.0294639440637283,
26195
+ "grad_norm": 2.379560708999634,
26196
+ "learning_rate": 9.560222609234633e-05,
26197
+ "loss": 2.4874,
26198
+ "step": 3699
26199
+ },
26200
+ {
26201
+ "epoch": 1.0297422339722406,
26202
+ "grad_norm": 2.2581701278686523,
26203
+ "learning_rate": 9.555849580309954e-05,
26204
+ "loss": 2.4456,
26205
+ "step": 3700
26206
+ },
26207
+ {
26208
+ "epoch": 1.0297422339722406,
26209
+ "eval_loss": 2.8660240173339844,
26210
+ "eval_runtime": 84.4219,
26211
+ "eval_samples_per_second": 59.226,
26212
+ "eval_steps_per_second": 14.807,
26213
+ "step": 3700
26214
  }
26215
  ],
26216
  "logging_steps": 1,
 
26225
  "early_stopping_threshold": 0.0
26226
  },
26227
  "attributes": {
26228
+ "early_stopping_patience_counter": 1
26229
  }
26230
  },
26231
  "TrainerControl": {
 
26239
  "attributes": {}
26240
  }
26241
  },
26242
+ "total_flos": 2.66753091305472e+17,
26243
  "train_batch_size": 4,
26244
  "trial_name": null,
26245
  "trial_params": null