BioTokenizer-BFD-WPC-100 / tokenizer.json
dotan1111's picture
Upload 2 files
7ea4c5e
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<UNK>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "Lowercase"
},
"pre_tokenizer": {
"type": "Whitespace"
},
"post_processor": null,
"decoder": null,
"model": {
"type": "WordPiece",
"unk_token": "<UNK>",
"continuing_subword_prefix": "##",
"max_input_chars_per_word": 10000,
"vocab": {
"<UNK>": 0,
"a": 1,
"b": 2,
"c": 3,
"d": 4,
"e": 5,
"f": 6,
"g": 7,
"h": 8,
"i": 9,
"k": 10,
"l": 11,
"m": 12,
"n": 13,
"o": 14,
"p": 15,
"q": 16,
"r": 17,
"s": 18,
"t": 19,
"u": 20,
"v": 21,
"w": 22,
"x": 23,
"y": 24,
"z": 25,
"##h": 26,
"##g": 27,
"##k": 28,
"##s": 29,
"##t": 30,
"##e": 31,
"##a": 32,
"##w": 33,
"##l": 34,
"##n": 35,
"##v": 36,
"##r": 37,
"##p": 38,
"##i": 39,
"##q": 40,
"##f": 41,
"##d": 42,
"##m": 43,
"##y": 44,
"##x": 45,
"##c": 46,
"##z": 47,
"##u": 48,
"##o": 49,
"##b": 50,
"##aa": 51,
"##ll": 52,
"##la": 53,
"##gg": 54,
"##rr": 55,
"##va": 56,
"##ga": 57,
"##ra": 58,
"##lv": 59,
"##pa": 60,
"##lg": 61,
"##sa": 62,
"##lr": 63,
"##ea": 64,
"##vv": 65,
"##da": 66,
"##ta": 67,
"##ls": 68,
"##lp": 69,
"##ia": 70,
"##ld": 71,
"##gr": 72,
"##le": 73,
"##ss": 74,
"##gv": 75,
"##lt": 76,
"##gs": 77,
"##er": 78,
"##gt": 79,
"##gd": 80,
"##li": 81,
"##pp": 82,
"##vr": 83,
"##ge": 84,
"##qa": 85,
"##fa": 86,
"##lk": 87,
"##vt": 88,
"##vs": 89,
"##gi": 90,
"##vd": 91,
"##ve": 92,
"##lf": 93,
"##pr": 94,
"##ka": 95,
"##dr": 96,
"##lq": 97,
"##ps": 98,
"##ee": 99
}
}
}