Mr-Vicky-01 commited on
Commit
826d30e
·
verified ·
1 Parent(s): 7027ace

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +101 -7
  2. tokenizer_config.json +101 -1
special_tokens_map.json CHANGED
@@ -1,12 +1,106 @@
1
  {
2
  "additional_special_tokens": [
3
- {
4
- "content": "<",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  ],
11
  "eos_token": {
12
  "content": "</s>",
 
1
  {
2
  "additional_special_tokens": [
3
+ "<",
4
+ "<extra_id_0>",
5
+ "<extra_id_1>",
6
+ "<extra_id_2>",
7
+ "<extra_id_3>",
8
+ "<extra_id_4>",
9
+ "<extra_id_5>",
10
+ "<extra_id_6>",
11
+ "<extra_id_7>",
12
+ "<extra_id_8>",
13
+ "<extra_id_9>",
14
+ "<extra_id_10>",
15
+ "<extra_id_11>",
16
+ "<extra_id_12>",
17
+ "<extra_id_13>",
18
+ "<extra_id_14>",
19
+ "<extra_id_15>",
20
+ "<extra_id_16>",
21
+ "<extra_id_17>",
22
+ "<extra_id_18>",
23
+ "<extra_id_19>",
24
+ "<extra_id_20>",
25
+ "<extra_id_21>",
26
+ "<extra_id_22>",
27
+ "<extra_id_23>",
28
+ "<extra_id_24>",
29
+ "<extra_id_25>",
30
+ "<extra_id_26>",
31
+ "<extra_id_27>",
32
+ "<extra_id_28>",
33
+ "<extra_id_29>",
34
+ "<extra_id_30>",
35
+ "<extra_id_31>",
36
+ "<extra_id_32>",
37
+ "<extra_id_33>",
38
+ "<extra_id_34>",
39
+ "<extra_id_35>",
40
+ "<extra_id_36>",
41
+ "<extra_id_37>",
42
+ "<extra_id_38>",
43
+ "<extra_id_39>",
44
+ "<extra_id_40>",
45
+ "<extra_id_41>",
46
+ "<extra_id_42>",
47
+ "<extra_id_43>",
48
+ "<extra_id_44>",
49
+ "<extra_id_45>",
50
+ "<extra_id_46>",
51
+ "<extra_id_47>",
52
+ "<extra_id_48>",
53
+ "<extra_id_49>",
54
+ "<extra_id_50>",
55
+ "<extra_id_51>",
56
+ "<extra_id_52>",
57
+ "<extra_id_53>",
58
+ "<extra_id_54>",
59
+ "<extra_id_55>",
60
+ "<extra_id_56>",
61
+ "<extra_id_57>",
62
+ "<extra_id_58>",
63
+ "<extra_id_59>",
64
+ "<extra_id_60>",
65
+ "<extra_id_61>",
66
+ "<extra_id_62>",
67
+ "<extra_id_63>",
68
+ "<extra_id_64>",
69
+ "<extra_id_65>",
70
+ "<extra_id_66>",
71
+ "<extra_id_67>",
72
+ "<extra_id_68>",
73
+ "<extra_id_69>",
74
+ "<extra_id_70>",
75
+ "<extra_id_71>",
76
+ "<extra_id_72>",
77
+ "<extra_id_73>",
78
+ "<extra_id_74>",
79
+ "<extra_id_75>",
80
+ "<extra_id_76>",
81
+ "<extra_id_77>",
82
+ "<extra_id_78>",
83
+ "<extra_id_79>",
84
+ "<extra_id_80>",
85
+ "<extra_id_81>",
86
+ "<extra_id_82>",
87
+ "<extra_id_83>",
88
+ "<extra_id_84>",
89
+ "<extra_id_85>",
90
+ "<extra_id_86>",
91
+ "<extra_id_87>",
92
+ "<extra_id_88>",
93
+ "<extra_id_89>",
94
+ "<extra_id_90>",
95
+ "<extra_id_91>",
96
+ "<extra_id_92>",
97
+ "<extra_id_93>",
98
+ "<extra_id_94>",
99
+ "<extra_id_95>",
100
+ "<extra_id_96>",
101
+ "<extra_id_97>",
102
+ "<extra_id_98>",
103
+ "<extra_id_99>"
104
  ],
105
  "eos_token": {
106
  "content": "</s>",
tokenizer_config.json CHANGED
@@ -834,7 +834,107 @@
834
  }
835
  },
836
  "additional_special_tokens": [
837
- "<"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
838
  ],
839
  "clean_up_tokenization_spaces": false,
840
  "eos_token": "</s>",
 
834
  }
835
  },
836
  "additional_special_tokens": [
837
+ "<",
838
+ "<extra_id_0>",
839
+ "<extra_id_1>",
840
+ "<extra_id_2>",
841
+ "<extra_id_3>",
842
+ "<extra_id_4>",
843
+ "<extra_id_5>",
844
+ "<extra_id_6>",
845
+ "<extra_id_7>",
846
+ "<extra_id_8>",
847
+ "<extra_id_9>",
848
+ "<extra_id_10>",
849
+ "<extra_id_11>",
850
+ "<extra_id_12>",
851
+ "<extra_id_13>",
852
+ "<extra_id_14>",
853
+ "<extra_id_15>",
854
+ "<extra_id_16>",
855
+ "<extra_id_17>",
856
+ "<extra_id_18>",
857
+ "<extra_id_19>",
858
+ "<extra_id_20>",
859
+ "<extra_id_21>",
860
+ "<extra_id_22>",
861
+ "<extra_id_23>",
862
+ "<extra_id_24>",
863
+ "<extra_id_25>",
864
+ "<extra_id_26>",
865
+ "<extra_id_27>",
866
+ "<extra_id_28>",
867
+ "<extra_id_29>",
868
+ "<extra_id_30>",
869
+ "<extra_id_31>",
870
+ "<extra_id_32>",
871
+ "<extra_id_33>",
872
+ "<extra_id_34>",
873
+ "<extra_id_35>",
874
+ "<extra_id_36>",
875
+ "<extra_id_37>",
876
+ "<extra_id_38>",
877
+ "<extra_id_39>",
878
+ "<extra_id_40>",
879
+ "<extra_id_41>",
880
+ "<extra_id_42>",
881
+ "<extra_id_43>",
882
+ "<extra_id_44>",
883
+ "<extra_id_45>",
884
+ "<extra_id_46>",
885
+ "<extra_id_47>",
886
+ "<extra_id_48>",
887
+ "<extra_id_49>",
888
+ "<extra_id_50>",
889
+ "<extra_id_51>",
890
+ "<extra_id_52>",
891
+ "<extra_id_53>",
892
+ "<extra_id_54>",
893
+ "<extra_id_55>",
894
+ "<extra_id_56>",
895
+ "<extra_id_57>",
896
+ "<extra_id_58>",
897
+ "<extra_id_59>",
898
+ "<extra_id_60>",
899
+ "<extra_id_61>",
900
+ "<extra_id_62>",
901
+ "<extra_id_63>",
902
+ "<extra_id_64>",
903
+ "<extra_id_65>",
904
+ "<extra_id_66>",
905
+ "<extra_id_67>",
906
+ "<extra_id_68>",
907
+ "<extra_id_69>",
908
+ "<extra_id_70>",
909
+ "<extra_id_71>",
910
+ "<extra_id_72>",
911
+ "<extra_id_73>",
912
+ "<extra_id_74>",
913
+ "<extra_id_75>",
914
+ "<extra_id_76>",
915
+ "<extra_id_77>",
916
+ "<extra_id_78>",
917
+ "<extra_id_79>",
918
+ "<extra_id_80>",
919
+ "<extra_id_81>",
920
+ "<extra_id_82>",
921
+ "<extra_id_83>",
922
+ "<extra_id_84>",
923
+ "<extra_id_85>",
924
+ "<extra_id_86>",
925
+ "<extra_id_87>",
926
+ "<extra_id_88>",
927
+ "<extra_id_89>",
928
+ "<extra_id_90>",
929
+ "<extra_id_91>",
930
+ "<extra_id_92>",
931
+ "<extra_id_93>",
932
+ "<extra_id_94>",
933
+ "<extra_id_95>",
934
+ "<extra_id_96>",
935
+ "<extra_id_97>",
936
+ "<extra_id_98>",
937
+ "<extra_id_99>"
938
  ],
939
  "clean_up_tokenization_spaces": false,
940
  "eos_token": "</s>",