Commit
·
c7fe87b
1
Parent(s):
5c35d4b
bug: fixed tokenizer
Browse files- added_tokens.json +0 -12
- special_tokens_map.json +1 -1
- spiece.model +0 -0
- tokenizer.json +0 -0
- tokenizer_config.json +2 -4
added_tokens.json
DELETED
|
@@ -1,12 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"\t": 32109,
|
| 3 |
-
"\n": 32103,
|
| 4 |
-
" ": 32106,
|
| 5 |
-
"<": 32101,
|
| 6 |
-
"[PAD]": 32100,
|
| 7 |
-
"\\": 32107,
|
| 8 |
-
"^": 32108,
|
| 9 |
-
"`": 32105,
|
| 10 |
-
"{": 32102,
|
| 11 |
-
"}": 32104
|
| 12 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
special_tokens_map.json
CHANGED
|
@@ -102,6 +102,6 @@
|
|
| 102 |
"<extra_id_99>"
|
| 103 |
],
|
| 104 |
"eos_token": "</s>",
|
| 105 |
-
"pad_token": "
|
| 106 |
"unk_token": "<unk>"
|
| 107 |
}
|
|
|
|
| 102 |
"<extra_id_99>"
|
| 103 |
],
|
| 104 |
"eos_token": "</s>",
|
| 105 |
+
"pad_token": "<pad>",
|
| 106 |
"unk_token": "<unk>"
|
| 107 |
}
|
spiece.model
CHANGED
|
File without changes
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
CHANGED
|
@@ -104,11 +104,9 @@
|
|
| 104 |
"clean_up_tokenization_spaces": true,
|
| 105 |
"eos_token": "</s>",
|
| 106 |
"extra_ids": 100,
|
| 107 |
-
"model_max_length":
|
| 108 |
"pad_token": "<pad>",
|
| 109 |
-
"padding_side": "right",
|
| 110 |
"sp_model_kwargs": {},
|
| 111 |
"tokenizer_class": "T5Tokenizer",
|
| 112 |
-
"unk_token": "<unk>"
|
| 113 |
-
"use_fast": false
|
| 114 |
}
|
|
|
|
| 104 |
"clean_up_tokenization_spaces": true,
|
| 105 |
"eos_token": "</s>",
|
| 106 |
"extra_ids": 100,
|
| 107 |
+
"model_max_length": 512,
|
| 108 |
"pad_token": "<pad>",
|
|
|
|
| 109 |
"sp_model_kwargs": {},
|
| 110 |
"tokenizer_class": "T5Tokenizer",
|
| 111 |
+
"unk_token": "<unk>"
|
|
|
|
| 112 |
}
|