Adding Pad Token
Browse files- added_tokens.json +1 -0
- special_tokens_map.json +1 -7
- tokenizer.json +9 -0
- tokenizer_config.json +9 -1
added_tokens.json
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
{
|
|
|
2 |
"[/AVAILABLE_TOOLS]": 49155,
|
3 |
"[/INST]": 49153,
|
4 |
"[/TOOL_RESULTS]": 49158,
|
|
|
1 |
{
|
2 |
+
"<|pad|>": 49159,
|
3 |
"[/AVAILABLE_TOOLS]": 49155,
|
4 |
"[/INST]": 49153,
|
5 |
"[/TOOL_RESULTS]": 49158,
|
special_tokens_map.json
CHANGED
@@ -34,13 +34,7 @@
|
|
34 |
"rstrip": false,
|
35 |
"single_word": false
|
36 |
},
|
37 |
-
"pad_token":
|
38 |
-
"content": "<|endoftext|>",
|
39 |
-
"lstrip": false,
|
40 |
-
"normalized": false,
|
41 |
-
"rstrip": false,
|
42 |
-
"single_word": false
|
43 |
-
},
|
44 |
"unk_token": {
|
45 |
"content": "<|endoftext|>",
|
46 |
"lstrip": false,
|
|
|
34 |
"rstrip": false,
|
35 |
"single_word": false
|
36 |
},
|
37 |
+
"pad_token": "<|pad|>",
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
"unk_token": {
|
39 |
"content": "<|endoftext|>",
|
40 |
"lstrip": false,
|
tokenizer.json
CHANGED
@@ -236,6 +236,15 @@
|
|
236 |
"rstrip": false,
|
237 |
"normalized": true,
|
238 |
"special": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
239 |
}
|
240 |
],
|
241 |
"normalizer": null,
|
|
|
236 |
"rstrip": false,
|
237 |
"normalized": true,
|
238 |
"special": false
|
239 |
+
},
|
240 |
+
{
|
241 |
+
"id": 49159,
|
242 |
+
"content": "<|pad|>",
|
243 |
+
"single_word": false,
|
244 |
+
"lstrip": false,
|
245 |
+
"rstrip": false,
|
246 |
+
"normalized": true,
|
247 |
+
"special": false
|
248 |
}
|
249 |
],
|
250 |
"normalizer": null,
|
tokenizer_config.json
CHANGED
@@ -208,6 +208,14 @@
|
|
208 |
"rstrip": false,
|
209 |
"single_word": false,
|
210 |
"special": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
}
|
212 |
},
|
213 |
"additional_special_tokens": [
|
@@ -237,7 +245,7 @@
|
|
237 |
"eos_token": "<|endoftext|>",
|
238 |
"extra_special_tokens": {},
|
239 |
"model_max_length": 9223372036854775807,
|
240 |
-
"pad_token": "<|
|
241 |
"padding_side": "left",
|
242 |
"tokenizer_class": "GPT2Tokenizer",
|
243 |
"unk_token": "<|endoftext|>",
|
|
|
208 |
"rstrip": false,
|
209 |
"single_word": false,
|
210 |
"special": false
|
211 |
+
},
|
212 |
+
"49159": {
|
213 |
+
"content": "<|pad|>",
|
214 |
+
"lstrip": false,
|
215 |
+
"normalized": true,
|
216 |
+
"rstrip": false,
|
217 |
+
"single_word": false,
|
218 |
+
"special": false
|
219 |
}
|
220 |
},
|
221 |
"additional_special_tokens": [
|
|
|
245 |
"eos_token": "<|endoftext|>",
|
246 |
"extra_special_tokens": {},
|
247 |
"model_max_length": 9223372036854775807,
|
248 |
+
"pad_token": "<|pad|>",
|
249 |
"padding_side": "left",
|
250 |
"tokenizer_class": "GPT2Tokenizer",
|
251 |
"unk_token": "<|endoftext|>",
|