CocoRoF commited on
Commit
ea56879
·
verified ·
1 Parent(s): 4ca780d

Training in progress, step 55305, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55cb1880d40c3d134639f9c78780fa699879268ce641a9683262789b7d6c1264
3
  size 306619286
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17f126e47a46daca975d4610fafaec402d025df406bc1ef1091fb3bb633e95a0
3
  size 306619286
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75f02914cf090b930189b526baa66e8d886e3a2e64ce91f1b47ac5c4ff6fffa7
3
  size 919972410
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb3ad1610c33c264ac4cc15bacd96d9f3fa7c6e6119230a90ef857002b3788af
3
  size 919972410
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:967c116cfad1c7605a72e42ef0cb63974f19c71b7038332b297d81e82a22bbe7
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4482438756e43b359aec133e7b920aff9ca62a0599618ff5293c529cfac8e76
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9944828802032362,
5
  "eval_steps": 5000,
6
- "global_step": 55000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -38595,6 +38595,216 @@
38595
  "eval_samples_per_second": 3138.58,
38596
  "eval_steps_per_second": 49.042,
38597
  "step": 55000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38598
  }
38599
  ],
38600
  "logging_steps": 10,
@@ -38609,12 +38819,12 @@
38609
  "should_evaluate": false,
38610
  "should_log": false,
38611
  "should_save": true,
38612
- "should_training_stop": false
38613
  },
38614
  "attributes": {}
38615
  }
38616
  },
38617
- "total_flos": 9.600375174725632e+18,
38618
  "train_batch_size": 8,
38619
  "trial_name": null,
38620
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9999977398116359,
5
  "eval_steps": 5000,
6
+ "global_step": 55305,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
38595
  "eval_samples_per_second": 3138.58,
38596
  "eval_steps_per_second": 49.042,
38597
  "step": 55000
38598
+ },
38599
+ {
38600
+ "epoch": 0.994663695272364,
38601
+ "grad_norm": 40.125,
38602
+ "learning_rate": 1.5541611456888346e-06,
38603
+ "loss": 17.4001,
38604
+ "step": 55010
38605
+ },
38606
+ {
38607
+ "epoch": 0.9948445103414919,
38608
+ "grad_norm": 40.21875,
38609
+ "learning_rate": 1.5544436690747077e-06,
38610
+ "loss": 17.6567,
38611
+ "step": 55020
38612
+ },
38613
+ {
38614
+ "epoch": 0.9950253254106197,
38615
+ "grad_norm": 38.875,
38616
+ "learning_rate": 1.554726192460581e-06,
38617
+ "loss": 17.646,
38618
+ "step": 55030
38619
+ },
38620
+ {
38621
+ "epoch": 0.9952061404797475,
38622
+ "grad_norm": 42.15625,
38623
+ "learning_rate": 1.5550087158464543e-06,
38624
+ "loss": 17.3117,
38625
+ "step": 55040
38626
+ },
38627
+ {
38628
+ "epoch": 0.9953869555488755,
38629
+ "grad_norm": 35.9375,
38630
+ "learning_rate": 1.5552912392323276e-06,
38631
+ "loss": 17.9208,
38632
+ "step": 55050
38633
+ },
38634
+ {
38635
+ "epoch": 0.9955677706180033,
38636
+ "grad_norm": 37.78125,
38637
+ "learning_rate": 1.5555737626182007e-06,
38638
+ "loss": 17.497,
38639
+ "step": 55060
38640
+ },
38641
+ {
38642
+ "epoch": 0.9957485856871312,
38643
+ "grad_norm": 36.46875,
38644
+ "learning_rate": 1.555856286004074e-06,
38645
+ "loss": 17.4683,
38646
+ "step": 55070
38647
+ },
38648
+ {
38649
+ "epoch": 0.995929400756259,
38650
+ "grad_norm": 38.5625,
38651
+ "learning_rate": 1.5561388093899474e-06,
38652
+ "loss": 17.5277,
38653
+ "step": 55080
38654
+ },
38655
+ {
38656
+ "epoch": 0.9961102158253868,
38657
+ "grad_norm": 37.9375,
38658
+ "learning_rate": 1.5564213327758207e-06,
38659
+ "loss": 17.5301,
38660
+ "step": 55090
38661
+ },
38662
+ {
38663
+ "epoch": 0.9962910308945148,
38664
+ "grad_norm": 39.3125,
38665
+ "learning_rate": 1.556703856161694e-06,
38666
+ "loss": 17.7091,
38667
+ "step": 55100
38668
+ },
38669
+ {
38670
+ "epoch": 0.9964718459636426,
38671
+ "grad_norm": 42.59375,
38672
+ "learning_rate": 1.5569863795475671e-06,
38673
+ "loss": 17.6605,
38674
+ "step": 55110
38675
+ },
38676
+ {
38677
+ "epoch": 0.9966526610327705,
38678
+ "grad_norm": 40.71875,
38679
+ "learning_rate": 1.5572689029334404e-06,
38680
+ "loss": 17.718,
38681
+ "step": 55120
38682
+ },
38683
+ {
38684
+ "epoch": 0.9968334761018983,
38685
+ "grad_norm": 39.21875,
38686
+ "learning_rate": 1.5575514263193137e-06,
38687
+ "loss": 17.2612,
38688
+ "step": 55130
38689
+ },
38690
+ {
38691
+ "epoch": 0.9970142911710262,
38692
+ "grad_norm": 38.0625,
38693
+ "learning_rate": 1.557833949705187e-06,
38694
+ "loss": 17.609,
38695
+ "step": 55140
38696
+ },
38697
+ {
38698
+ "epoch": 0.9971951062401541,
38699
+ "grad_norm": 39.375,
38700
+ "learning_rate": 1.5581164730910604e-06,
38701
+ "loss": 18.0846,
38702
+ "step": 55150
38703
+ },
38704
+ {
38705
+ "epoch": 0.9973759213092819,
38706
+ "grad_norm": 36.09375,
38707
+ "learning_rate": 1.5583989964769337e-06,
38708
+ "loss": 17.3367,
38709
+ "step": 55160
38710
+ },
38711
+ {
38712
+ "epoch": 0.9975567363784098,
38713
+ "grad_norm": 40.6875,
38714
+ "learning_rate": 1.5586815198628066e-06,
38715
+ "loss": 17.6999,
38716
+ "step": 55170
38717
+ },
38718
+ {
38719
+ "epoch": 0.9977375514475376,
38720
+ "grad_norm": 38.96875,
38721
+ "learning_rate": 1.55896404324868e-06,
38722
+ "loss": 17.5,
38723
+ "step": 55180
38724
+ },
38725
+ {
38726
+ "epoch": 0.9979183665166655,
38727
+ "grad_norm": 39.375,
38728
+ "learning_rate": 1.5592465666345532e-06,
38729
+ "loss": 17.5552,
38730
+ "step": 55190
38731
+ },
38732
+ {
38733
+ "epoch": 0.9980991815857934,
38734
+ "grad_norm": 41.75,
38735
+ "learning_rate": 1.5595290900204265e-06,
38736
+ "loss": 17.587,
38737
+ "step": 55200
38738
+ },
38739
+ {
38740
+ "epoch": 0.9982799966549212,
38741
+ "grad_norm": 38.71875,
38742
+ "learning_rate": 1.5598116134062999e-06,
38743
+ "loss": 17.1768,
38744
+ "step": 55210
38745
+ },
38746
+ {
38747
+ "epoch": 0.9984608117240491,
38748
+ "grad_norm": 38.4375,
38749
+ "learning_rate": 1.5600941367921732e-06,
38750
+ "loss": 17.9381,
38751
+ "step": 55220
38752
+ },
38753
+ {
38754
+ "epoch": 0.9986416267931769,
38755
+ "grad_norm": 36.65625,
38756
+ "learning_rate": 1.5603766601780463e-06,
38757
+ "loss": 17.5928,
38758
+ "step": 55230
38759
+ },
38760
+ {
38761
+ "epoch": 0.9988224418623048,
38762
+ "grad_norm": 40.8125,
38763
+ "learning_rate": 1.5606591835639196e-06,
38764
+ "loss": 17.5531,
38765
+ "step": 55240
38766
+ },
38767
+ {
38768
+ "epoch": 0.9990032569314327,
38769
+ "grad_norm": 37.8125,
38770
+ "learning_rate": 1.560941706949793e-06,
38771
+ "loss": 17.3558,
38772
+ "step": 55250
38773
+ },
38774
+ {
38775
+ "epoch": 0.9991840720005605,
38776
+ "grad_norm": 38.90625,
38777
+ "learning_rate": 1.5612242303356662e-06,
38778
+ "loss": 18.1386,
38779
+ "step": 55260
38780
+ },
38781
+ {
38782
+ "epoch": 0.9993648870696884,
38783
+ "grad_norm": 38.28125,
38784
+ "learning_rate": 1.5615067537215395e-06,
38785
+ "loss": 17.7706,
38786
+ "step": 55270
38787
+ },
38788
+ {
38789
+ "epoch": 0.9995457021388162,
38790
+ "grad_norm": 41.40625,
38791
+ "learning_rate": 1.5617892771074129e-06,
38792
+ "loss": 17.5942,
38793
+ "step": 55280
38794
+ },
38795
+ {
38796
+ "epoch": 0.9997265172079441,
38797
+ "grad_norm": 39.09375,
38798
+ "learning_rate": 1.5620718004932858e-06,
38799
+ "loss": 17.6764,
38800
+ "step": 55290
38801
+ },
38802
+ {
38803
+ "epoch": 0.999907332277072,
38804
+ "grad_norm": 38.65625,
38805
+ "learning_rate": 1.562354323879159e-06,
38806
+ "loss": 17.3797,
38807
+ "step": 55300
38808
  }
38809
  ],
38810
  "logging_steps": 10,
 
38819
  "should_evaluate": false,
38820
  "should_log": false,
38821
  "should_save": true,
38822
+ "should_training_stop": true
38823
  },
38824
  "attributes": {}
38825
  }
38826
  },
38827
+ "total_flos": 9.653613618876383e+18,
38828
  "train_batch_size": 8,
38829
  "trial_name": null,
38830
  "trial_params": null