Stuti103 commited on
Commit
3f8603a
·
verified ·
1 Parent(s): 16ff1ac

Training in progress, step 15600, checkpoint

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ last-checkpoint/tokenizer.json filter=lfs diff=lfs merge=lfs -text
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dce9c5324820ff451aba14ba419aeec1add1a412f3edb32c6c7c0cf2adea8138
3
  size 3541119728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70537360c9daddf04205b6fbd293c0d4965ec40c67ef261daf546af624afd98f
3
  size 3541119728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42d8ed1310dd3a3e758a6e193c344d11e872913dc2ecb72c8250191cb9dd1811
3
  size 778374186
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ca05e267a448d87fef33633929234240f69ebde46a8d89d8a7bbe11cbc11f6c
3
  size 778374186
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88acc8857c9b71c170e2e7e131e921953f71be549574de8a4567e54277800a43
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8955cd9d24ecd092d5a24dfa8ee9d34839e14159c86f280833a6a8e4cb640de6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.7301447165917035,
6
  "eval_steps": 500,
7
- "global_step": 15300,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -13778,6 +13778,276 @@
13778
  "mean_token_accuracy": 0.8891868680715561,
13779
  "num_tokens": 25427005.0,
13780
  "step": 15300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13781
  }
13782
  ],
13783
  "logging_steps": 10,
@@ -13797,7 +14067,7 @@
13797
  "attributes": {}
13798
  }
13799
  },
13800
- "total_flos": 5.726368297825444e+17,
13801
  "train_batch_size": 2,
13802
  "trial_name": null,
13803
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.7444612796621292,
6
  "eval_steps": 500,
7
+ "global_step": 15600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
13778
  "mean_token_accuracy": 0.8891868680715561,
13779
  "num_tokens": 25427005.0,
13780
  "step": 15300
13781
+ },
13782
+ {
13783
+ "epoch": 0.7306219353607177,
13784
+ "grad_norm": 0.3419613242149353,
13785
+ "learning_rate": 1.2694345025053687e-05,
13786
+ "loss": 0.6515,
13787
+ "mean_token_accuracy": 0.8683709055185318,
13788
+ "num_tokens": 25442614.0,
13789
+ "step": 15310
13790
+ },
13791
+ {
13792
+ "epoch": 0.7310991541297319,
13793
+ "grad_norm": 0.35376232862472534,
13794
+ "learning_rate": 1.2689572894297304e-05,
13795
+ "loss": 0.6847,
13796
+ "mean_token_accuracy": 0.8579560115933418,
13797
+ "num_tokens": 25460244.0,
13798
+ "step": 15320
13799
+ },
13800
+ {
13801
+ "epoch": 0.7315763728987461,
13802
+ "grad_norm": 0.4027968943119049,
13803
+ "learning_rate": 1.2684800763540922e-05,
13804
+ "loss": 0.5904,
13805
+ "mean_token_accuracy": 0.8813899368047714,
13806
+ "num_tokens": 25477478.0,
13807
+ "step": 15330
13808
+ },
13809
+ {
13810
+ "epoch": 0.7320535916677603,
13811
+ "grad_norm": 0.33690837025642395,
13812
+ "learning_rate": 1.268002863278454e-05,
13813
+ "loss": 0.6076,
13814
+ "mean_token_accuracy": 0.8772727206349373,
13815
+ "num_tokens": 25494023.0,
13816
+ "step": 15340
13817
+ },
13818
+ {
13819
+ "epoch": 0.7325308104367745,
13820
+ "grad_norm": 0.3748989999294281,
13821
+ "learning_rate": 1.2675256502028157e-05,
13822
+ "loss": 0.6861,
13823
+ "mean_token_accuracy": 0.8666884452104568,
13824
+ "num_tokens": 25510507.0,
13825
+ "step": 15350
13826
+ },
13827
+ {
13828
+ "epoch": 0.7330080292057887,
13829
+ "grad_norm": 0.3961426317691803,
13830
+ "learning_rate": 1.2670484371271774e-05,
13831
+ "loss": 0.5477,
13832
+ "mean_token_accuracy": 0.8997573867440224,
13833
+ "num_tokens": 25525876.0,
13834
+ "step": 15360
13835
+ },
13836
+ {
13837
+ "epoch": 0.7334852479748029,
13838
+ "grad_norm": 0.32060977816581726,
13839
+ "learning_rate": 1.266571224051539e-05,
13840
+ "loss": 0.6086,
13841
+ "mean_token_accuracy": 0.8730768218636513,
13842
+ "num_tokens": 25542626.0,
13843
+ "step": 15370
13844
+ },
13845
+ {
13846
+ "epoch": 0.733962466743817,
13847
+ "grad_norm": 0.4424884617328644,
13848
+ "learning_rate": 1.2660940109759007e-05,
13849
+ "loss": 0.6637,
13850
+ "mean_token_accuracy": 0.8715329870581627,
13851
+ "num_tokens": 25559552.0,
13852
+ "step": 15380
13853
+ },
13854
+ {
13855
+ "epoch": 0.7344396855128312,
13856
+ "grad_norm": 0.2700168192386627,
13857
+ "learning_rate": 1.2656167979002624e-05,
13858
+ "loss": 0.6507,
13859
+ "mean_token_accuracy": 0.8775774970650673,
13860
+ "num_tokens": 25575310.0,
13861
+ "step": 15390
13862
+ },
13863
+ {
13864
+ "epoch": 0.7349169042818454,
13865
+ "grad_norm": 0.34019699692726135,
13866
+ "learning_rate": 1.2651395848246244e-05,
13867
+ "loss": 0.6687,
13868
+ "mean_token_accuracy": 0.8798381179571152,
13869
+ "num_tokens": 25590414.0,
13870
+ "step": 15400
13871
+ },
13872
+ {
13873
+ "epoch": 0.7353941230508596,
13874
+ "grad_norm": 0.41453129053115845,
13875
+ "learning_rate": 1.2646623717489861e-05,
13876
+ "loss": 0.692,
13877
+ "mean_token_accuracy": 0.8671859934926033,
13878
+ "num_tokens": 25608759.0,
13879
+ "step": 15410
13880
+ },
13881
+ {
13882
+ "epoch": 0.7358713418198738,
13883
+ "grad_norm": 0.37873607873916626,
13884
+ "learning_rate": 1.2641851586733478e-05,
13885
+ "loss": 0.7208,
13886
+ "mean_token_accuracy": 0.8635074034333229,
13887
+ "num_tokens": 25626985.0,
13888
+ "step": 15420
13889
+ },
13890
+ {
13891
+ "epoch": 0.736348560588888,
13892
+ "grad_norm": 0.3016092777252197,
13893
+ "learning_rate": 1.2637079455977094e-05,
13894
+ "loss": 0.6058,
13895
+ "mean_token_accuracy": 0.8779332295060158,
13896
+ "num_tokens": 25642693.0,
13897
+ "step": 15430
13898
+ },
13899
+ {
13900
+ "epoch": 0.7368257793579022,
13901
+ "grad_norm": 0.3086267411708832,
13902
+ "learning_rate": 1.2632307325220713e-05,
13903
+ "loss": 0.6249,
13904
+ "mean_token_accuracy": 0.8778651550412178,
13905
+ "num_tokens": 25659131.0,
13906
+ "step": 15440
13907
+ },
13908
+ {
13909
+ "epoch": 0.7373029981269164,
13910
+ "grad_norm": 0.3954660892486572,
13911
+ "learning_rate": 1.262753519446433e-05,
13912
+ "loss": 0.6073,
13913
+ "mean_token_accuracy": 0.8788123086094857,
13914
+ "num_tokens": 25675282.0,
13915
+ "step": 15450
13916
+ },
13917
+ {
13918
+ "epoch": 0.7377802168959305,
13919
+ "grad_norm": 0.3375210165977478,
13920
+ "learning_rate": 1.2622763063707946e-05,
13921
+ "loss": 0.5759,
13922
+ "mean_token_accuracy": 0.8832358941435814,
13923
+ "num_tokens": 25690284.0,
13924
+ "step": 15460
13925
+ },
13926
+ {
13927
+ "epoch": 0.7382574356649447,
13928
+ "grad_norm": 0.429108202457428,
13929
+ "learning_rate": 1.2617990932951564e-05,
13930
+ "loss": 0.5676,
13931
+ "mean_token_accuracy": 0.8806645110249519,
13932
+ "num_tokens": 25705824.0,
13933
+ "step": 15470
13934
+ },
13935
+ {
13936
+ "epoch": 0.7387346544339589,
13937
+ "grad_norm": 0.3869950771331787,
13938
+ "learning_rate": 1.2613218802195181e-05,
13939
+ "loss": 0.56,
13940
+ "mean_token_accuracy": 0.8864919826388359,
13941
+ "num_tokens": 25721448.0,
13942
+ "step": 15480
13943
+ },
13944
+ {
13945
+ "epoch": 0.7392118732029731,
13946
+ "grad_norm": 0.2914048731327057,
13947
+ "learning_rate": 1.26084466714388e-05,
13948
+ "loss": 0.6239,
13949
+ "mean_token_accuracy": 0.880633682012558,
13950
+ "num_tokens": 25736900.0,
13951
+ "step": 15490
13952
+ },
13953
+ {
13954
+ "epoch": 0.7396890919719873,
13955
+ "grad_norm": 0.3728204667568207,
13956
+ "learning_rate": 1.2603674540682416e-05,
13957
+ "loss": 0.6358,
13958
+ "mean_token_accuracy": 0.8782069548964501,
13959
+ "num_tokens": 25753432.0,
13960
+ "step": 15500
13961
+ },
13962
+ {
13963
+ "epoch": 0.7401663107410015,
13964
+ "grad_norm": 0.3584674596786499,
13965
+ "learning_rate": 1.2598902409926033e-05,
13966
+ "loss": 0.5266,
13967
+ "mean_token_accuracy": 0.887596707046032,
13968
+ "num_tokens": 25769120.0,
13969
+ "step": 15510
13970
+ },
13971
+ {
13972
+ "epoch": 0.7406435295100157,
13973
+ "grad_norm": 0.4318288564682007,
13974
+ "learning_rate": 1.259413027916965e-05,
13975
+ "loss": 0.5954,
13976
+ "mean_token_accuracy": 0.8761422768235206,
13977
+ "num_tokens": 25785477.0,
13978
+ "step": 15520
13979
+ },
13980
+ {
13981
+ "epoch": 0.7411207482790299,
13982
+ "grad_norm": 0.3693118989467621,
13983
+ "learning_rate": 1.2589358148413266e-05,
13984
+ "loss": 0.6766,
13985
+ "mean_token_accuracy": 0.8750508233904839,
13986
+ "num_tokens": 25803338.0,
13987
+ "step": 15530
13988
+ },
13989
+ {
13990
+ "epoch": 0.741597967048044,
13991
+ "grad_norm": 0.30119234323501587,
13992
+ "learning_rate": 1.2584586017656886e-05,
13993
+ "loss": 0.6606,
13994
+ "mean_token_accuracy": 0.8698265522718429,
13995
+ "num_tokens": 25819899.0,
13996
+ "step": 15540
13997
+ },
13998
+ {
13999
+ "epoch": 0.7420751858170582,
14000
+ "grad_norm": 0.702343761920929,
14001
+ "learning_rate": 1.2579813886900503e-05,
14002
+ "loss": 0.7339,
14003
+ "mean_token_accuracy": 0.869141760468483,
14004
+ "num_tokens": 25837531.0,
14005
+ "step": 15550
14006
+ },
14007
+ {
14008
+ "epoch": 0.7425524045860724,
14009
+ "grad_norm": 0.35476893186569214,
14010
+ "learning_rate": 1.257504175614412e-05,
14011
+ "loss": 0.6443,
14012
+ "mean_token_accuracy": 0.8738871991634369,
14013
+ "num_tokens": 25854586.0,
14014
+ "step": 15560
14015
+ },
14016
+ {
14017
+ "epoch": 0.7430296233550866,
14018
+ "grad_norm": 0.4192853569984436,
14019
+ "learning_rate": 1.2570269625387736e-05,
14020
+ "loss": 0.6971,
14021
+ "mean_token_accuracy": 0.8595242589712143,
14022
+ "num_tokens": 25871022.0,
14023
+ "step": 15570
14024
+ },
14025
+ {
14026
+ "epoch": 0.7435068421241008,
14027
+ "grad_norm": 0.3494696319103241,
14028
+ "learning_rate": 1.2565497494631353e-05,
14029
+ "loss": 0.6859,
14030
+ "mean_token_accuracy": 0.853353051841259,
14031
+ "num_tokens": 25888407.0,
14032
+ "step": 15580
14033
+ },
14034
+ {
14035
+ "epoch": 0.743984060893115,
14036
+ "grad_norm": 0.3698543906211853,
14037
+ "learning_rate": 1.2560725363874971e-05,
14038
+ "loss": 0.7607,
14039
+ "mean_token_accuracy": 0.8620315045118332,
14040
+ "num_tokens": 25906131.0,
14041
+ "step": 15590
14042
+ },
14043
+ {
14044
+ "epoch": 0.7444612796621292,
14045
+ "grad_norm": 0.3582072854042053,
14046
+ "learning_rate": 1.2555953233118588e-05,
14047
+ "loss": 0.5744,
14048
+ "mean_token_accuracy": 0.8893922328948974,
14049
+ "num_tokens": 25920582.0,
14050
+ "step": 15600
14051
  }
14052
  ],
14053
  "logging_steps": 10,
 
14067
  "attributes": {}
14068
  }
14069
  },
14070
+ "total_flos": 5.837477802884506e+17,
14071
  "train_batch_size": 2,
14072
  "trial_name": null,
14073
  "trial_params": null