Stuti103 commited on
Commit
6b15311
·
verified ·
1 Parent(s): 48c2c5e

Training in progress, step 17700, checkpoint

Browse files
.gitattributes CHANGED
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
  checkpoint-17700/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
  checkpoint-17700/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ last-checkpoint/tokenizer.json filter=lfs diff=lfs merge=lfs -text
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5c0bed417727b47dfe87f4a2dbc478f72a9c6b96d025389d880eed0a3a0bbe0
3
  size 3541119728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f673fbf3d293bdfb7083302f95a085b1c3809362d73ae1feb5db6ae0ec6a3e7e
3
  size 3541119728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6388103df9e21c5d7a3129f6c1bffb0f061a5bcc8ed8465bd5cf24ea58134137
3
  size 778374186
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a085a3464ee030b3211908f350dc686f7d0f79f38846fc7e1d9c5bc49537c610
3
  size 778374186
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a179e154338cc3e961746fd3accae1a74ef0e23861aa2e20120545743c22ada
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:390ed221cdfff0759c038f9f9dc672e97ebfcc32b38cca4add3a81bbe314fc8b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.8303606580846825,
6
  "eval_steps": 500,
7
- "global_step": 17400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -15668,6 +15668,276 @@
15668
  "mean_token_accuracy": 0.8778254583477973,
15669
  "num_tokens": 28906983.0,
15670
  "step": 17400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15671
  }
15672
  ],
15673
  "logging_steps": 10,
@@ -15687,7 +15957,7 @@
15687
  "attributes": {}
15688
  }
15689
  },
15690
- "total_flos": 6.509385037989028e+17,
15691
  "train_batch_size": 2,
15692
  "trial_name": null,
15693
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.844677221155108,
6
  "eval_steps": 500,
7
+ "global_step": 17700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
15668
  "mean_token_accuracy": 0.8778254583477973,
15669
  "num_tokens": 28906983.0,
15670
  "step": 17400
15671
+ },
15672
+ {
15673
+ "epoch": 0.8308378768536967,
15674
+ "grad_norm": 0.4296000599861145,
15675
+ "learning_rate": 1.1692197566213315e-05,
15676
+ "loss": 0.7167,
15677
+ "mean_token_accuracy": 0.8584795027971268,
15678
+ "num_tokens": 28924631.0,
15679
+ "step": 17410
15680
+ },
15681
+ {
15682
+ "epoch": 0.8313150956227109,
15683
+ "grad_norm": 0.35790425539016724,
15684
+ "learning_rate": 1.1687425435456932e-05,
15685
+ "loss": 0.6852,
15686
+ "mean_token_accuracy": 0.8608928889036178,
15687
+ "num_tokens": 28943848.0,
15688
+ "step": 17420
15689
+ },
15690
+ {
15691
+ "epoch": 0.8317923143917251,
15692
+ "grad_norm": 0.35815557837486267,
15693
+ "learning_rate": 1.168265330470055e-05,
15694
+ "loss": 0.5988,
15695
+ "mean_token_accuracy": 0.8806958973407746,
15696
+ "num_tokens": 28960033.0,
15697
+ "step": 17430
15698
+ },
15699
+ {
15700
+ "epoch": 0.8322695331607393,
15701
+ "grad_norm": 0.41242972016334534,
15702
+ "learning_rate": 1.1677881173944167e-05,
15703
+ "loss": 0.7573,
15704
+ "mean_token_accuracy": 0.8531021654605866,
15705
+ "num_tokens": 28978291.0,
15706
+ "step": 17440
15707
+ },
15708
+ {
15709
+ "epoch": 0.8327467519297534,
15710
+ "grad_norm": 0.3900233805179596,
15711
+ "learning_rate": 1.1673109043187785e-05,
15712
+ "loss": 0.6684,
15713
+ "mean_token_accuracy": 0.8653559580445289,
15714
+ "num_tokens": 28995166.0,
15715
+ "step": 17450
15716
+ },
15717
+ {
15718
+ "epoch": 0.8332239706987676,
15719
+ "grad_norm": 0.3421511948108673,
15720
+ "learning_rate": 1.1668336912431402e-05,
15721
+ "loss": 0.5933,
15722
+ "mean_token_accuracy": 0.8817808702588081,
15723
+ "num_tokens": 29011537.0,
15724
+ "step": 17460
15725
+ },
15726
+ {
15727
+ "epoch": 0.8337011894677818,
15728
+ "grad_norm": 0.3471275269985199,
15729
+ "learning_rate": 1.1663564781675019e-05,
15730
+ "loss": 0.6444,
15731
+ "mean_token_accuracy": 0.8728347107768059,
15732
+ "num_tokens": 29029323.0,
15733
+ "step": 17470
15734
+ },
15735
+ {
15736
+ "epoch": 0.8341784082367959,
15737
+ "grad_norm": 0.4366353750228882,
15738
+ "learning_rate": 1.1658792650918635e-05,
15739
+ "loss": 0.6836,
15740
+ "mean_token_accuracy": 0.8620174437761307,
15741
+ "num_tokens": 29048157.0,
15742
+ "step": 17480
15743
+ },
15744
+ {
15745
+ "epoch": 0.8346556270058101,
15746
+ "grad_norm": 0.3484688997268677,
15747
+ "learning_rate": 1.1654020520162252e-05,
15748
+ "loss": 0.6891,
15749
+ "mean_token_accuracy": 0.8628757908940315,
15750
+ "num_tokens": 29064710.0,
15751
+ "step": 17490
15752
+ },
15753
+ {
15754
+ "epoch": 0.8351328457748243,
15755
+ "grad_norm": 0.3586483597755432,
15756
+ "learning_rate": 1.1649248389405872e-05,
15757
+ "loss": 0.6487,
15758
+ "mean_token_accuracy": 0.8712560385465622,
15759
+ "num_tokens": 29081864.0,
15760
+ "step": 17500
15761
+ },
15762
+ {
15763
+ "epoch": 0.8356100645438385,
15764
+ "grad_norm": 0.35695043206214905,
15765
+ "learning_rate": 1.1644476258649489e-05,
15766
+ "loss": 0.652,
15767
+ "mean_token_accuracy": 0.8665311306715011,
15768
+ "num_tokens": 29098008.0,
15769
+ "step": 17510
15770
+ },
15771
+ {
15772
+ "epoch": 0.8360872833128526,
15773
+ "grad_norm": 0.35384443402290344,
15774
+ "learning_rate": 1.1639704127893106e-05,
15775
+ "loss": 0.6137,
15776
+ "mean_token_accuracy": 0.8707799568772316,
15777
+ "num_tokens": 29114025.0,
15778
+ "step": 17520
15779
+ },
15780
+ {
15781
+ "epoch": 0.8365645020818668,
15782
+ "grad_norm": 0.4258424639701843,
15783
+ "learning_rate": 1.1634931997136722e-05,
15784
+ "loss": 0.446,
15785
+ "mean_token_accuracy": 0.9021103799343109,
15786
+ "num_tokens": 29127980.0,
15787
+ "step": 17530
15788
+ },
15789
+ {
15790
+ "epoch": 0.837041720850881,
15791
+ "grad_norm": 0.4185291826725006,
15792
+ "learning_rate": 1.1630159866380339e-05,
15793
+ "loss": 0.7187,
15794
+ "mean_token_accuracy": 0.8666620507836342,
15795
+ "num_tokens": 29145781.0,
15796
+ "step": 17540
15797
+ },
15798
+ {
15799
+ "epoch": 0.8375189396198952,
15800
+ "grad_norm": 0.3698226511478424,
15801
+ "learning_rate": 1.1625387735623956e-05,
15802
+ "loss": 0.8038,
15803
+ "mean_token_accuracy": 0.83167435079813,
15804
+ "num_tokens": 29164539.0,
15805
+ "step": 17550
15806
+ },
15807
+ {
15808
+ "epoch": 0.8379961583889094,
15809
+ "grad_norm": 0.5082905888557434,
15810
+ "learning_rate": 1.1620615604867574e-05,
15811
+ "loss": 0.6794,
15812
+ "mean_token_accuracy": 0.8646394088864326,
15813
+ "num_tokens": 29181392.0,
15814
+ "step": 17560
15815
+ },
15816
+ {
15817
+ "epoch": 0.8384733771579236,
15818
+ "grad_norm": 0.419879287481308,
15819
+ "learning_rate": 1.1615843474111192e-05,
15820
+ "loss": 0.748,
15821
+ "mean_token_accuracy": 0.8569721296429634,
15822
+ "num_tokens": 29198880.0,
15823
+ "step": 17570
15824
+ },
15825
+ {
15826
+ "epoch": 0.8389505959269378,
15827
+ "grad_norm": 0.3323199450969696,
15828
+ "learning_rate": 1.1611071343354809e-05,
15829
+ "loss": 0.6364,
15830
+ "mean_token_accuracy": 0.8711786776781082,
15831
+ "num_tokens": 29215341.0,
15832
+ "step": 17580
15833
+ },
15834
+ {
15835
+ "epoch": 0.839427814695952,
15836
+ "grad_norm": 0.35589149594306946,
15837
+ "learning_rate": 1.1606299212598426e-05,
15838
+ "loss": 0.5241,
15839
+ "mean_token_accuracy": 0.886940547823906,
15840
+ "num_tokens": 29230599.0,
15841
+ "step": 17590
15842
+ },
15843
+ {
15844
+ "epoch": 0.8399050334649661,
15845
+ "grad_norm": 0.3645700216293335,
15846
+ "learning_rate": 1.1601527081842044e-05,
15847
+ "loss": 0.6459,
15848
+ "mean_token_accuracy": 0.8628365308046341,
15849
+ "num_tokens": 29247686.0,
15850
+ "step": 17600
15851
+ },
15852
+ {
15853
+ "epoch": 0.8403822522339803,
15854
+ "grad_norm": 0.4367329180240631,
15855
+ "learning_rate": 1.159675495108566e-05,
15856
+ "loss": 0.5824,
15857
+ "mean_token_accuracy": 0.8813767299056053,
15858
+ "num_tokens": 29263208.0,
15859
+ "step": 17610
15860
+ },
15861
+ {
15862
+ "epoch": 0.8408594710029945,
15863
+ "grad_norm": 0.4404272139072418,
15864
+ "learning_rate": 1.1591982820329277e-05,
15865
+ "loss": 0.7341,
15866
+ "mean_token_accuracy": 0.861807630956173,
15867
+ "num_tokens": 29282037.0,
15868
+ "step": 17620
15869
+ },
15870
+ {
15871
+ "epoch": 0.8413366897720087,
15872
+ "grad_norm": 0.32958847284317017,
15873
+ "learning_rate": 1.1587210689572894e-05,
15874
+ "loss": 0.5831,
15875
+ "mean_token_accuracy": 0.8764279022812843,
15876
+ "num_tokens": 29297330.0,
15877
+ "step": 17630
15878
+ },
15879
+ {
15880
+ "epoch": 0.8418139085410229,
15881
+ "grad_norm": 0.3422182500362396,
15882
+ "learning_rate": 1.1582438558816514e-05,
15883
+ "loss": 0.7288,
15884
+ "mean_token_accuracy": 0.8610541269183158,
15885
+ "num_tokens": 29314275.0,
15886
+ "step": 17640
15887
+ },
15888
+ {
15889
+ "epoch": 0.8422911273100371,
15890
+ "grad_norm": 0.3618062138557434,
15891
+ "learning_rate": 1.157766642806013e-05,
15892
+ "loss": 0.8217,
15893
+ "mean_token_accuracy": 0.8382393896579743,
15894
+ "num_tokens": 29334784.0,
15895
+ "step": 17650
15896
+ },
15897
+ {
15898
+ "epoch": 0.8427683460790513,
15899
+ "grad_norm": 0.3208582103252411,
15900
+ "learning_rate": 1.1572894297303747e-05,
15901
+ "loss": 0.6157,
15902
+ "mean_token_accuracy": 0.8792721211910248,
15903
+ "num_tokens": 29351815.0,
15904
+ "step": 17660
15905
+ },
15906
+ {
15907
+ "epoch": 0.8432455648480655,
15908
+ "grad_norm": 0.38699203729629517,
15909
+ "learning_rate": 1.1568122166547364e-05,
15910
+ "loss": 0.5618,
15911
+ "mean_token_accuracy": 0.8911767050623893,
15912
+ "num_tokens": 29366991.0,
15913
+ "step": 17670
15914
+ },
15915
+ {
15916
+ "epoch": 0.8437227836170796,
15917
+ "grad_norm": 0.36298489570617676,
15918
+ "learning_rate": 1.156335003579098e-05,
15919
+ "loss": 0.7076,
15920
+ "mean_token_accuracy": 0.8556164249777793,
15921
+ "num_tokens": 29384444.0,
15922
+ "step": 17680
15923
+ },
15924
+ {
15925
+ "epoch": 0.8442000023860938,
15926
+ "grad_norm": 0.32522013783454895,
15927
+ "learning_rate": 1.1558577905034597e-05,
15928
+ "loss": 0.6395,
15929
+ "mean_token_accuracy": 0.87257649153471,
15930
+ "num_tokens": 29401037.0,
15931
+ "step": 17690
15932
+ },
15933
+ {
15934
+ "epoch": 0.844677221155108,
15935
+ "grad_norm": 0.36789947748184204,
15936
+ "learning_rate": 1.1553805774278218e-05,
15937
+ "loss": 0.6614,
15938
+ "mean_token_accuracy": 0.8719203874468804,
15939
+ "num_tokens": 29418205.0,
15940
+ "step": 17700
15941
  }
15942
  ],
15943
  "logging_steps": 10,
 
15957
  "attributes": {}
15958
  }
15959
  },
15960
+ "total_flos": 6.62507884939518e+17,
15961
  "train_batch_size": 2,
15962
  "trial_name": null,
15963
  "trial_params": null