diff --git "a/tokenizer.json" "b/tokenizer.json" new file mode 100644--- /dev/null +++ "b/tokenizer.json" @@ -0,0 +1,100665 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 50256, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 50257, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50258, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50259, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50260, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50261, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50262, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50263, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50264, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50265, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50266, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50267, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50268, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50269, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50270, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50271, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50272, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50273, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50274, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50275, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50276, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50277, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50278, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50279, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50280, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50281, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50282, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50283, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50284, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50285, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50286, + "content": " ", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50287, + "content": "\t\t\t\t\t\t\t\t\t", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50288, + "content": "\t\t\t\t\t\t\t\t", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50289, + "content": "\t\t\t\t\t\t\t", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50290, + "content": "\t\t\t\t\t\t", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50291, + "content": "\t\t\t\t\t", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50292, + "content": "\t\t\t\t", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50293, + "content": "\t\t\t", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50294, + "content": "\t\t", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50295, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50296, + "content": "