speedcell4 commited on
Commit
28bd6f4
·
verified ·
1 Parent(s): 1f93fb9

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,4 +1,43 @@
1
  {
2
- "<model>": 64001,
3
- "<vocab>": 64002
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  }
 
1
  {
2
+ "<af>": 64001,
3
+ "<am>": 64002,
4
+ "<ar>": 64003,
5
+ "<ast>": 64004,
6
+ "<be>": 64005,
7
+ "<bg>": 64006,
8
+ "<bn>": 64007,
9
+ "<bs>": 64008,
10
+ "<ca>": 64009,
11
+ "<cs>": 64010,
12
+ "<da>": 64011,
13
+ "<de>": 64012,
14
+ "<en>": 64013,
15
+ "<es>": 64014,
16
+ "<fr>": 64015,
17
+ "<gu>": 64016,
18
+ "<ha>": 64017,
19
+ "<he>": 64018,
20
+ "<hi>": 64019,
21
+ "<is>": 64020,
22
+ "<it>": 64021,
23
+ "<kab>": 64022,
24
+ "<kn>": 64023,
25
+ "<lb>": 64024,
26
+ "<mr>": 64025,
27
+ "<mt>": 64026,
28
+ "<ne>": 64027,
29
+ "<nl>": 64028,
30
+ "<no>": 64029,
31
+ "<oc>": 64030,
32
+ "<pl>": 64031,
33
+ "<pt>": 64032,
34
+ "<ro>": 64033,
35
+ "<ru>": 64034,
36
+ "<sd>": 64035,
37
+ "<so>": 64036,
38
+ "<sr>": 64037,
39
+ "<sv>": 64038,
40
+ "<ti>": 64039,
41
+ "<uk>": 64040,
42
+ "<ur>": 64041
43
  }
special_tokens_map.json CHANGED
@@ -1,7 +1,46 @@
1
  {
2
  "additional_special_tokens": [
3
- "<model>",
4
- "<vocab>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  ],
6
  "bos_token": {
7
  "content": "<s>",
 
1
  {
2
  "additional_special_tokens": [
3
+ "<af>",
4
+ "<am>",
5
+ "<ar>",
6
+ "<ast>",
7
+ "<be>",
8
+ "<bg>",
9
+ "<bn>",
10
+ "<bs>",
11
+ "<ca>",
12
+ "<cs>",
13
+ "<da>",
14
+ "<de>",
15
+ "<en>",
16
+ "<es>",
17
+ "<fr>",
18
+ "<gu>",
19
+ "<ha>",
20
+ "<he>",
21
+ "<hi>",
22
+ "<is>",
23
+ "<it>",
24
+ "<kab>",
25
+ "<kn>",
26
+ "<lb>",
27
+ "<mr>",
28
+ "<mt>",
29
+ "<ne>",
30
+ "<nl>",
31
+ "<no>",
32
+ "<oc>",
33
+ "<pl>",
34
+ "<pt>",
35
+ "<ro>",
36
+ "<ru>",
37
+ "<sd>",
38
+ "<so>",
39
+ "<sr>",
40
+ "<sv>",
41
+ "<ti>",
42
+ "<uk>",
43
+ "<ur>"
44
  ],
45
  "bos_token": {
46
  "content": "<s>",
tokenizer.json CHANGED
@@ -41,7 +41,7 @@
41
  },
42
  {
43
  "id": 64001,
44
- "content": "<model>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
@@ -50,7 +50,358 @@
50
  },
51
  {
52
  "id": 64002,
53
- "content": "<vocab>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
@@ -90,7 +441,7 @@
90
  "single": [
91
  {
92
  "SpecialToken": {
93
- "id": "<unk>",
94
  "type_id": 0
95
  }
96
  },
@@ -110,7 +461,7 @@
110
  "pair": [
111
  {
112
  "SpecialToken": {
113
- "id": "<unk>",
114
  "type_id": 0
115
  }
116
  },
@@ -143,13 +494,13 @@
143
  "</s>"
144
  ]
145
  },
146
- "<unk>": {
147
- "id": "<unk>",
148
  "ids": [
149
- 3
150
  ],
151
  "tokens": [
152
- "<unk>"
153
  ]
154
  }
155
  }
 
41
  },
42
  {
43
  "id": 64001,
44
+ "content": "<af>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
 
50
  },
51
  {
52
  "id": 64002,
53
+ "content": "<am>",
54
+ "single_word": false,
55
+ "lstrip": false,
56
+ "rstrip": false,
57
+ "normalized": false,
58
+ "special": true
59
+ },
60
+ {
61
+ "id": 64003,
62
+ "content": "<ar>",
63
+ "single_word": false,
64
+ "lstrip": false,
65
+ "rstrip": false,
66
+ "normalized": false,
67
+ "special": true
68
+ },
69
+ {
70
+ "id": 64004,
71
+ "content": "<ast>",
72
+ "single_word": false,
73
+ "lstrip": false,
74
+ "rstrip": false,
75
+ "normalized": false,
76
+ "special": true
77
+ },
78
+ {
79
+ "id": 64005,
80
+ "content": "<be>",
81
+ "single_word": false,
82
+ "lstrip": false,
83
+ "rstrip": false,
84
+ "normalized": false,
85
+ "special": true
86
+ },
87
+ {
88
+ "id": 64006,
89
+ "content": "<bg>",
90
+ "single_word": false,
91
+ "lstrip": false,
92
+ "rstrip": false,
93
+ "normalized": false,
94
+ "special": true
95
+ },
96
+ {
97
+ "id": 64007,
98
+ "content": "<bn>",
99
+ "single_word": false,
100
+ "lstrip": false,
101
+ "rstrip": false,
102
+ "normalized": false,
103
+ "special": true
104
+ },
105
+ {
106
+ "id": 64008,
107
+ "content": "<bs>",
108
+ "single_word": false,
109
+ "lstrip": false,
110
+ "rstrip": false,
111
+ "normalized": false,
112
+ "special": true
113
+ },
114
+ {
115
+ "id": 64009,
116
+ "content": "<ca>",
117
+ "single_word": false,
118
+ "lstrip": false,
119
+ "rstrip": false,
120
+ "normalized": false,
121
+ "special": true
122
+ },
123
+ {
124
+ "id": 64010,
125
+ "content": "<cs>",
126
+ "single_word": false,
127
+ "lstrip": false,
128
+ "rstrip": false,
129
+ "normalized": false,
130
+ "special": true
131
+ },
132
+ {
133
+ "id": 64011,
134
+ "content": "<da>",
135
+ "single_word": false,
136
+ "lstrip": false,
137
+ "rstrip": false,
138
+ "normalized": false,
139
+ "special": true
140
+ },
141
+ {
142
+ "id": 64012,
143
+ "content": "<de>",
144
+ "single_word": false,
145
+ "lstrip": false,
146
+ "rstrip": false,
147
+ "normalized": false,
148
+ "special": true
149
+ },
150
+ {
151
+ "id": 64013,
152
+ "content": "<en>",
153
+ "single_word": false,
154
+ "lstrip": false,
155
+ "rstrip": false,
156
+ "normalized": false,
157
+ "special": true
158
+ },
159
+ {
160
+ "id": 64014,
161
+ "content": "<es>",
162
+ "single_word": false,
163
+ "lstrip": false,
164
+ "rstrip": false,
165
+ "normalized": false,
166
+ "special": true
167
+ },
168
+ {
169
+ "id": 64015,
170
+ "content": "<fr>",
171
+ "single_word": false,
172
+ "lstrip": false,
173
+ "rstrip": false,
174
+ "normalized": false,
175
+ "special": true
176
+ },
177
+ {
178
+ "id": 64016,
179
+ "content": "<gu>",
180
+ "single_word": false,
181
+ "lstrip": false,
182
+ "rstrip": false,
183
+ "normalized": false,
184
+ "special": true
185
+ },
186
+ {
187
+ "id": 64017,
188
+ "content": "<ha>",
189
+ "single_word": false,
190
+ "lstrip": false,
191
+ "rstrip": false,
192
+ "normalized": false,
193
+ "special": true
194
+ },
195
+ {
196
+ "id": 64018,
197
+ "content": "<he>",
198
+ "single_word": false,
199
+ "lstrip": false,
200
+ "rstrip": false,
201
+ "normalized": false,
202
+ "special": true
203
+ },
204
+ {
205
+ "id": 64019,
206
+ "content": "<hi>",
207
+ "single_word": false,
208
+ "lstrip": false,
209
+ "rstrip": false,
210
+ "normalized": false,
211
+ "special": true
212
+ },
213
+ {
214
+ "id": 64020,
215
+ "content": "<is>",
216
+ "single_word": false,
217
+ "lstrip": false,
218
+ "rstrip": false,
219
+ "normalized": false,
220
+ "special": true
221
+ },
222
+ {
223
+ "id": 64021,
224
+ "content": "<it>",
225
+ "single_word": false,
226
+ "lstrip": false,
227
+ "rstrip": false,
228
+ "normalized": false,
229
+ "special": true
230
+ },
231
+ {
232
+ "id": 64022,
233
+ "content": "<kab>",
234
+ "single_word": false,
235
+ "lstrip": false,
236
+ "rstrip": false,
237
+ "normalized": false,
238
+ "special": true
239
+ },
240
+ {
241
+ "id": 64023,
242
+ "content": "<kn>",
243
+ "single_word": false,
244
+ "lstrip": false,
245
+ "rstrip": false,
246
+ "normalized": false,
247
+ "special": true
248
+ },
249
+ {
250
+ "id": 64024,
251
+ "content": "<lb>",
252
+ "single_word": false,
253
+ "lstrip": false,
254
+ "rstrip": false,
255
+ "normalized": false,
256
+ "special": true
257
+ },
258
+ {
259
+ "id": 64025,
260
+ "content": "<mr>",
261
+ "single_word": false,
262
+ "lstrip": false,
263
+ "rstrip": false,
264
+ "normalized": false,
265
+ "special": true
266
+ },
267
+ {
268
+ "id": 64026,
269
+ "content": "<mt>",
270
+ "single_word": false,
271
+ "lstrip": false,
272
+ "rstrip": false,
273
+ "normalized": false,
274
+ "special": true
275
+ },
276
+ {
277
+ "id": 64027,
278
+ "content": "<ne>",
279
+ "single_word": false,
280
+ "lstrip": false,
281
+ "rstrip": false,
282
+ "normalized": false,
283
+ "special": true
284
+ },
285
+ {
286
+ "id": 64028,
287
+ "content": "<nl>",
288
+ "single_word": false,
289
+ "lstrip": false,
290
+ "rstrip": false,
291
+ "normalized": false,
292
+ "special": true
293
+ },
294
+ {
295
+ "id": 64029,
296
+ "content": "<no>",
297
+ "single_word": false,
298
+ "lstrip": false,
299
+ "rstrip": false,
300
+ "normalized": false,
301
+ "special": true
302
+ },
303
+ {
304
+ "id": 64030,
305
+ "content": "<oc>",
306
+ "single_word": false,
307
+ "lstrip": false,
308
+ "rstrip": false,
309
+ "normalized": false,
310
+ "special": true
311
+ },
312
+ {
313
+ "id": 64031,
314
+ "content": "<pl>",
315
+ "single_word": false,
316
+ "lstrip": false,
317
+ "rstrip": false,
318
+ "normalized": false,
319
+ "special": true
320
+ },
321
+ {
322
+ "id": 64032,
323
+ "content": "<pt>",
324
+ "single_word": false,
325
+ "lstrip": false,
326
+ "rstrip": false,
327
+ "normalized": false,
328
+ "special": true
329
+ },
330
+ {
331
+ "id": 64033,
332
+ "content": "<ro>",
333
+ "single_word": false,
334
+ "lstrip": false,
335
+ "rstrip": false,
336
+ "normalized": false,
337
+ "special": true
338
+ },
339
+ {
340
+ "id": 64034,
341
+ "content": "<ru>",
342
+ "single_word": false,
343
+ "lstrip": false,
344
+ "rstrip": false,
345
+ "normalized": false,
346
+ "special": true
347
+ },
348
+ {
349
+ "id": 64035,
350
+ "content": "<sd>",
351
+ "single_word": false,
352
+ "lstrip": false,
353
+ "rstrip": false,
354
+ "normalized": false,
355
+ "special": true
356
+ },
357
+ {
358
+ "id": 64036,
359
+ "content": "<so>",
360
+ "single_word": false,
361
+ "lstrip": false,
362
+ "rstrip": false,
363
+ "normalized": false,
364
+ "special": true
365
+ },
366
+ {
367
+ "id": 64037,
368
+ "content": "<sr>",
369
+ "single_word": false,
370
+ "lstrip": false,
371
+ "rstrip": false,
372
+ "normalized": false,
373
+ "special": true
374
+ },
375
+ {
376
+ "id": 64038,
377
+ "content": "<sv>",
378
+ "single_word": false,
379
+ "lstrip": false,
380
+ "rstrip": false,
381
+ "normalized": false,
382
+ "special": true
383
+ },
384
+ {
385
+ "id": 64039,
386
+ "content": "<ti>",
387
+ "single_word": false,
388
+ "lstrip": false,
389
+ "rstrip": false,
390
+ "normalized": false,
391
+ "special": true
392
+ },
393
+ {
394
+ "id": 64040,
395
+ "content": "<uk>",
396
+ "single_word": false,
397
+ "lstrip": false,
398
+ "rstrip": false,
399
+ "normalized": false,
400
+ "special": true
401
+ },
402
+ {
403
+ "id": 64041,
404
+ "content": "<ur>",
405
  "single_word": false,
406
  "lstrip": false,
407
  "rstrip": false,
 
441
  "single": [
442
  {
443
  "SpecialToken": {
444
+ "id": "<en>",
445
  "type_id": 0
446
  }
447
  },
 
461
  "pair": [
462
  {
463
  "SpecialToken": {
464
+ "id": "<en>",
465
  "type_id": 0
466
  }
467
  },
 
494
  "</s>"
495
  ]
496
  },
497
+ "<en>": {
498
+ "id": "<en>",
499
  "ids": [
500
+ 64013
501
  ],
502
  "tokens": [
503
+ "<en>"
504
  ]
505
  }
506
  }
tokenizer_config.json CHANGED
@@ -34,7 +34,7 @@
34
  "special": true
35
  },
36
  "64001": {
37
- "content": "<model>",
38
  "lstrip": false,
39
  "normalized": false,
40
  "rstrip": false,
@@ -42,7 +42,319 @@
42
  "special": true
43
  },
44
  "64002": {
45
- "content": "<vocab>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  "lstrip": false,
47
  "normalized": false,
48
  "rstrip": false,
@@ -51,8 +363,47 @@
51
  }
52
  },
53
  "additional_special_tokens": [
54
- "<model>",
55
- "<vocab>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  ],
57
  "bos_token": "<s>",
58
  "clean_up_tokenization_spaces": true,
 
34
  "special": true
35
  },
36
  "64001": {
37
+ "content": "<af>",
38
  "lstrip": false,
39
  "normalized": false,
40
  "rstrip": false,
 
42
  "special": true
43
  },
44
  "64002": {
45
+ "content": "<am>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "64003": {
53
+ "content": "<ar>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "64004": {
61
+ "content": "<ast>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "64005": {
69
+ "content": "<be>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "64006": {
77
+ "content": "<bg>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "64007": {
85
+ "content": "<bn>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "64008": {
93
+ "content": "<bs>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "64009": {
101
+ "content": "<ca>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "64010": {
109
+ "content": "<cs>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "64011": {
117
+ "content": "<da>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "64012": {
125
+ "content": "<de>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "64013": {
133
+ "content": "<en>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "64014": {
141
+ "content": "<es>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "64015": {
149
+ "content": "<fr>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "64016": {
157
+ "content": "<gu>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "64017": {
165
+ "content": "<ha>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "64018": {
173
+ "content": "<he>",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": true
179
+ },
180
+ "64019": {
181
+ "content": "<hi>",
182
+ "lstrip": false,
183
+ "normalized": false,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": true
187
+ },
188
+ "64020": {
189
+ "content": "<is>",
190
+ "lstrip": false,
191
+ "normalized": false,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": true
195
+ },
196
+ "64021": {
197
+ "content": "<it>",
198
+ "lstrip": false,
199
+ "normalized": false,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": true
203
+ },
204
+ "64022": {
205
+ "content": "<kab>",
206
+ "lstrip": false,
207
+ "normalized": false,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": true
211
+ },
212
+ "64023": {
213
+ "content": "<kn>",
214
+ "lstrip": false,
215
+ "normalized": false,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": true
219
+ },
220
+ "64024": {
221
+ "content": "<lb>",
222
+ "lstrip": false,
223
+ "normalized": false,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": true
227
+ },
228
+ "64025": {
229
+ "content": "<mr>",
230
+ "lstrip": false,
231
+ "normalized": false,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": true
235
+ },
236
+ "64026": {
237
+ "content": "<mt>",
238
+ "lstrip": false,
239
+ "normalized": false,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": true
243
+ },
244
+ "64027": {
245
+ "content": "<ne>",
246
+ "lstrip": false,
247
+ "normalized": false,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": true
251
+ },
252
+ "64028": {
253
+ "content": "<nl>",
254
+ "lstrip": false,
255
+ "normalized": false,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": true
259
+ },
260
+ "64029": {
261
+ "content": "<no>",
262
+ "lstrip": false,
263
+ "normalized": false,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": true
267
+ },
268
+ "64030": {
269
+ "content": "<oc>",
270
+ "lstrip": false,
271
+ "normalized": false,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": true
275
+ },
276
+ "64031": {
277
+ "content": "<pl>",
278
+ "lstrip": false,
279
+ "normalized": false,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": true
283
+ },
284
+ "64032": {
285
+ "content": "<pt>",
286
+ "lstrip": false,
287
+ "normalized": false,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": true
291
+ },
292
+ "64033": {
293
+ "content": "<ro>",
294
+ "lstrip": false,
295
+ "normalized": false,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": true
299
+ },
300
+ "64034": {
301
+ "content": "<ru>",
302
+ "lstrip": false,
303
+ "normalized": false,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": true
307
+ },
308
+ "64035": {
309
+ "content": "<sd>",
310
+ "lstrip": false,
311
+ "normalized": false,
312
+ "rstrip": false,
313
+ "single_word": false,
314
+ "special": true
315
+ },
316
+ "64036": {
317
+ "content": "<so>",
318
+ "lstrip": false,
319
+ "normalized": false,
320
+ "rstrip": false,
321
+ "single_word": false,
322
+ "special": true
323
+ },
324
+ "64037": {
325
+ "content": "<sr>",
326
+ "lstrip": false,
327
+ "normalized": false,
328
+ "rstrip": false,
329
+ "single_word": false,
330
+ "special": true
331
+ },
332
+ "64038": {
333
+ "content": "<sv>",
334
+ "lstrip": false,
335
+ "normalized": false,
336
+ "rstrip": false,
337
+ "single_word": false,
338
+ "special": true
339
+ },
340
+ "64039": {
341
+ "content": "<ti>",
342
+ "lstrip": false,
343
+ "normalized": false,
344
+ "rstrip": false,
345
+ "single_word": false,
346
+ "special": true
347
+ },
348
+ "64040": {
349
+ "content": "<uk>",
350
+ "lstrip": false,
351
+ "normalized": false,
352
+ "rstrip": false,
353
+ "single_word": false,
354
+ "special": true
355
+ },
356
+ "64041": {
357
+ "content": "<ur>",
358
  "lstrip": false,
359
  "normalized": false,
360
  "rstrip": false,
 
363
  }
364
  },
365
  "additional_special_tokens": [
366
+ "<af>",
367
+ "<am>",
368
+ "<ar>",
369
+ "<ast>",
370
+ "<be>",
371
+ "<bg>",
372
+ "<bn>",
373
+ "<bs>",
374
+ "<ca>",
375
+ "<cs>",
376
+ "<da>",
377
+ "<de>",
378
+ "<en>",
379
+ "<es>",
380
+ "<fr>",
381
+ "<gu>",
382
+ "<ha>",
383
+ "<he>",
384
+ "<hi>",
385
+ "<is>",
386
+ "<it>",
387
+ "<kab>",
388
+ "<kn>",
389
+ "<lb>",
390
+ "<mr>",
391
+ "<mt>",
392
+ "<ne>",
393
+ "<nl>",
394
+ "<no>",
395
+ "<oc>",
396
+ "<pl>",
397
+ "<pt>",
398
+ "<ro>",
399
+ "<ru>",
400
+ "<sd>",
401
+ "<so>",
402
+ "<sr>",
403
+ "<sv>",
404
+ "<ti>",
405
+ "<uk>",
406
+ "<ur>"
407
  ],
408
  "bos_token": "<s>",
409
  "clean_up_tokenization_spaces": true,