File size: 6,376 Bytes
d499dce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
{
    "version": "1.0",
    "truncation": null,
    "padding": null,
    "added_tokens": [
        {
            "id": 177,
            "content": "<unk>",
            "single_word": false,
            "lstrip": false,
            "rstrip": false,
            "normalized": false,
            "special": true
        }
    ],
    "normalizer": {
        "type": "Sequence",
        "normalizers": [
            {
                "type": "Lowercase"
            },
            {
                "type": "Replace",
                "pattern": {
                    "Regex": "[^_;:,.!?\u00a1\u00bf\u2014\u2026\"\u00ab\u00bb\u201c\u201d ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\u0251\u0250\u0252\u00e6\u0253\u0299\u03b2\u0254\u0255\u00e7\u0257\u0256\u00f0\u02a4\u0259\u0258\u025a\u025b\u025c\u025d\u025e\u025f\u0284\u0261\u0260\u0262\u029b\u0266\u0267\u0127\u0265\u029c\u0268\u026a\u029d\u026d\u026c\u026b\u026e\u029f\u0271\u026f\u0270\u014b\u0273\u0272\u0274\u00f8\u0275\u0278\u03b8\u0153\u0276\u0298\u0279\u027a\u027e\u027b\u0280\u0281\u027d\u0282\u0283\u0288\u02a7\u0289\u028a\u028b\u2c71\u028c\u0263\u0264\u028d\u03c7\u028e\u028f\u0291\u0290\u0292\u0294\u02a1\u0295\u02a2\u01c0\u01c1\u01c2\u01c3\u02c8\u02cc\u02d0\u02d1\u02bc\u02b4\u02b0\u02b1\u02b2\u02b7\u02e0\u02e4\u02de\u2193\u2191\u2192\u2197\u2198\u0329']"
                },
                "content": ""
            },
            {
                "type": "Strip",
                "strip_left": true,
                "strip_right": true
            },
            {
                "type": "Replace",
                "pattern": {
                    "Regex": "(?=.)|(?<!^)$"
                },
                "content": "_"
            }
        ]
    },
    "pre_tokenizer": {
        "type": "Split",
        "pattern": {
            "Regex": ""
        },
        "behavior": "Isolated",
        "invert": false
    },
    "post_processor": null,
    "decoder": null,
    "model": {
        "vocab": {
            "_": 0,
            ";": 1,
            ":": 2,
            ",": 3,
            ".": 4,
            "!": 5,
            "?": 6,
            "\u00a1": 7,
            "\u00bf": 8,
            "\u2014": 9,
            "\u2026": 10,
            "\"": 11,
            "\u00ab": 12,
            "\u00bb": 13,
            "\u201c": 14,
            "\u201d": 15,
            " ": 16,
            "A": 17,
            "B": 18,
            "C": 19,
            "D": 20,
            "E": 21,
            "F": 22,
            "G": 23,
            "H": 24,
            "I": 25,
            "J": 26,
            "K": 27,
            "L": 28,
            "M": 29,
            "N": 30,
            "O": 31,
            "P": 32,
            "Q": 33,
            "R": 34,
            "S": 35,
            "T": 36,
            "U": 37,
            "V": 38,
            "W": 39,
            "X": 40,
            "Y": 41,
            "Z": 42,
            "a": 43,
            "b": 44,
            "c": 45,
            "d": 46,
            "e": 47,
            "f": 48,
            "g": 49,
            "h": 50,
            "i": 51,
            "j": 52,
            "k": 53,
            "l": 54,
            "m": 55,
            "n": 56,
            "o": 57,
            "p": 58,
            "q": 59,
            "r": 60,
            "s": 61,
            "t": 62,
            "u": 63,
            "v": 64,
            "w": 65,
            "x": 66,
            "y": 67,
            "z": 68,
            "\u0251": 69,
            "\u0250": 70,
            "\u0252": 71,
            "\u00e6": 72,
            "\u0253": 73,
            "\u0299": 74,
            "\u03b2": 75,
            "\u0254": 76,
            "\u0255": 77,
            "\u00e7": 78,
            "\u0257": 79,
            "\u0256": 80,
            "\u00f0": 81,
            "\u02a4": 82,
            "\u0259": 83,
            "\u0258": 84,
            "\u025a": 85,
            "\u025b": 86,
            "\u025c": 87,
            "\u025d": 88,
            "\u025e": 89,
            "\u025f": 90,
            "\u0284": 91,
            "\u0261": 92,
            "\u0260": 93,
            "\u0262": 94,
            "\u029b": 95,
            "\u0266": 96,
            "\u0267": 97,
            "\u0127": 98,
            "\u0265": 99,
            "\u029c": 100,
            "\u0268": 101,
            "\u026a": 102,
            "\u029d": 103,
            "\u026d": 104,
            "\u026c": 105,
            "\u026b": 106,
            "\u026e": 107,
            "\u029f": 108,
            "\u0271": 109,
            "\u026f": 110,
            "\u0270": 111,
            "\u014b": 112,
            "\u0273": 113,
            "\u0272": 114,
            "\u0274": 115,
            "\u00f8": 116,
            "\u0275": 117,
            "\u0278": 118,
            "\u03b8": 119,
            "\u0153": 120,
            "\u0276": 121,
            "\u0298": 122,
            "\u0279": 123,
            "\u027a": 124,
            "\u027e": 125,
            "\u027b": 126,
            "\u0280": 127,
            "\u0281": 128,
            "\u027d": 129,
            "\u0282": 130,
            "\u0283": 131,
            "\u0288": 132,
            "\u02a7": 133,
            "\u0289": 134,
            "\u028a": 135,
            "\u028b": 136,
            "\u2c71": 137,
            "\u028c": 138,
            "\u0263": 139,
            "\u0264": 140,
            "\u028d": 141,
            "\u03c7": 142,
            "\u028e": 143,
            "\u028f": 144,
            "\u0291": 145,
            "\u0290": 146,
            "\u0292": 147,
            "\u0294": 148,
            "\u02a1": 149,
            "\u0295": 150,
            "\u02a2": 151,
            "\u01c0": 152,
            "\u01c1": 153,
            "\u01c2": 154,
            "\u01c3": 155,
            "\u02c8": 156,
            "\u02cc": 157,
            "\u02d0": 158,
            "\u02d1": 159,
            "\u02bc": 160,
            "\u02b4": 161,
            "\u02b0": 162,
            "\u02b1": 163,
            "\u02b2": 164,
            "\u02b7": 165,
            "\u02e0": 166,
            "\u02e4": 167,
            "\u02de": 168,
            "\u2193": 169,
            "\u2191": 170,
            "\u2192": 171,
            "\u2197": 172,
            "\u2198": 173,
            "null": 174,
            "\u0329": 175,
            "'": 176,
            "<unk>": 177
        }
    }
}