{ "version": "1.0", "truncation": { "direction": "Right", "max_length": 2048, "strategy": "LongestFirst", "stride": 0 }, "padding": { "strategy": { "Fixed": 2048 }, "direction": "Right", "pad_to_multiple_of": null, "pad_id": 6, "pad_type_id": 0, "pad_token": "[PAD]" }, "added_tokens": [ { "id": 5, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 6, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 7, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 8, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 9, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Split", "pattern": { "String": "" }, "behavior": "Isolated", "invert": false }, "post_processor": null, "decoder": null, "model": { "type": "WordLevel", "vocab": { "A": 0, "T": 1, "C": 2, "G": 3, "N": 4, "[UNK]": 5, "[PAD]": 6, "[CLS]": 7, "[SEP]": 8, "[MASK]": 9 }, "unk_token": "[UNK]" } }