{ | |
"version": "1.0", | |
"truncation": null, | |
"padding": null, | |
"added_tokens": [ | |
{ | |
"id": 43, | |
"content": "<unk>", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
} | |
], | |
"normalizer": { | |
"type": "Sequence", | |
"normalizers": [ | |
{ | |
"type": "Lowercase" | |
}, | |
{ | |
"type": "Replace", | |
"pattern": { | |
"Regex": "[^|n\u00ed\u1ecdi\u00e0\u1eb9tarb\u00ecw\u0301l\u0300k\u00e1s\u00faogmy\u00f3puej\u00f2f\u1e63\u00e9d\u00e8\u00f9\u0144h\\-\u01f9\u2014' ]" | |
}, | |
"content": "" | |
}, | |
{ | |
"type": "Strip", | |
"strip_left": true, | |
"strip_right": true | |
}, | |
{ | |
"type": "Replace", | |
"pattern": { | |
"Regex": "(?=.)|(?<!^)$" | |
}, | |
"content": "|" | |
} | |
] | |
}, | |
"pre_tokenizer": { | |
"type": "Split", | |
"pattern": { | |
"Regex": "" | |
}, | |
"behavior": "Isolated", | |
"invert": false | |
}, | |
"post_processor": null, | |
"decoder": null, | |
"model": { | |
"vocab": { | |
"|": 0, | |
"n": 1, | |
"\u00ed": 2, | |
"\u1ecd": 3, | |
"i": 4, | |
"\u00e0": 5, | |
"\u1eb9": 6, | |
"t": 7, | |
"a": 8, | |
"r": 9, | |
"b": 10, | |
"\u00ec": 11, | |
"w": 12, | |
"\u0301": 13, | |
"l": 14, | |
"\u0300": 15, | |
"k": 16, | |
"\u00e1": 17, | |
"s": 18, | |
"\u00fa": 19, | |
"o": 20, | |
"g": 21, | |
"m": 22, | |
"y": 23, | |
"\u00f3": 24, | |
"p": 25, | |
"u": 26, | |
"e": 27, | |
"j": 28, | |
"\u00f2": 29, | |
"f": 30, | |
"\u1e63": 31, | |
"\u00e9": 32, | |
"d": 33, | |
"\u00e8": 34, | |
"\u00f9": 35, | |
"\u0144": 36, | |
"h": 37, | |
"-": 38, | |
"\u01f9": 39, | |
"\u2014": 40, | |
"'": 41, | |
" ": 42, | |
"<unk>": 43 | |
} | |
} | |
} |