dotan1111
/

BioTokenizer-BFD-WPC-100

Model card Files Files and versions Community

BioTokenizer-BFD-WPC-100 / tokenizer.json

dotan1111's picture

Upload 2 files

7ea4c5e over 1 year ago

history blame contribute delete

2.27 kB

	{
	"version": "1.0",
	"truncation": null,
	"padding": null,
	"added_tokens": [
	{
	"id": 0,
	"content": "<UNK>",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false,
	"special": true
	}
	],
	"normalizer": {
	"type": "Lowercase"
	},
	"pre_tokenizer": {
	"type": "Whitespace"
	},
	"post_processor": null,
	"decoder": null,
	"model": {
	"type": "WordPiece",
	"unk_token": "<UNK>",
	"continuing_subword_prefix": "##",
	"max_input_chars_per_word": 10000,
	"vocab": {
	"<UNK>": 0,
	"a": 1,
	"b": 2,
	"c": 3,
	"d": 4,
	"e": 5,
	"f": 6,
	"g": 7,
	"h": 8,
	"i": 9,
	"k": 10,
	"l": 11,
	"m": 12,
	"n": 13,
	"o": 14,
	"p": 15,
	"q": 16,
	"r": 17,
	"s": 18,
	"t": 19,
	"u": 20,
	"v": 21,
	"w": 22,
	"x": 23,
	"y": 24,
	"z": 25,
	"##h": 26,
	"##g": 27,
	"##k": 28,
	"##s": 29,
	"##t": 30,
	"##e": 31,
	"##a": 32,
	"##w": 33,
	"##l": 34,
	"##n": 35,
	"##v": 36,
	"##r": 37,
	"##p": 38,
	"##i": 39,
	"##q": 40,
	"##f": 41,
	"##d": 42,
	"##m": 43,
	"##y": 44,
	"##x": 45,
	"##c": 46,
	"##z": 47,
	"##u": 48,
	"##o": 49,
	"##b": 50,
	"##aa": 51,
	"##ll": 52,
	"##la": 53,
	"##gg": 54,
	"##rr": 55,
	"##va": 56,
	"##ga": 57,
	"##ra": 58,
	"##lv": 59,
	"##pa": 60,
	"##lg": 61,
	"##sa": 62,
	"##lr": 63,
	"##ea": 64,
	"##vv": 65,
	"##da": 66,
	"##ta": 67,
	"##ls": 68,
	"##lp": 69,
	"##ia": 70,
	"##ld": 71,
	"##gr": 72,
	"##le": 73,
	"##ss": 74,
	"##gv": 75,
	"##lt": 76,
	"##gs": 77,
	"##er": 78,
	"##gt": 79,
	"##gd": 80,
	"##li": 81,
	"##pp": 82,
	"##vr": 83,
	"##ge": 84,
	"##qa": 85,
	"##fa": 86,
	"##lk": 87,
	"##vt": 88,
	"##vs": 89,
	"##gi": 90,
	"##vd": 91,
	"##ve": 92,
	"##lf": 93,
	"##pr": 94,
	"##ka": 95,
	"##dr": 96,
	"##lq": 97,
	"##ps": 98,
	"##ee": 99
	}
	}
	}