Upload tokenizer
Browse files- added_tokens.json +7 -3
- tokenizer.json +37 -1
- tokenizer_config.json +35 -4
added_tokens.json
CHANGED
@@ -1,13 +1,17 @@
|
|
1 |
{
|
|
|
|
|
|
|
2 |
"<|assistant|>": 32001,
|
3 |
"<|endoftext|>": 32000,
|
4 |
"<|end|>": 32007,
|
|
|
5 |
"<|placeholder1|>": 32002,
|
6 |
"<|placeholder2|>": 32003,
|
7 |
"<|placeholder3|>": 32004,
|
8 |
-
"<|
|
9 |
-
"<|function_metadata|>": 32008,
|
10 |
-
"<|tool_results|>": 32009,
|
11 |
"<|system|>": 32006,
|
|
|
|
|
12 |
"<|user|>": 32010
|
13 |
}
|
|
|
1 |
{
|
2 |
+
"\"arguments\":": 32014,
|
3 |
+
"\"function\":": 32012,
|
4 |
+
"\"name\":": 32013,
|
5 |
"<|assistant|>": 32001,
|
6 |
"<|endoftext|>": 32000,
|
7 |
"<|end|>": 32007,
|
8 |
+
"<|function_metadata|>": 32008,
|
9 |
"<|placeholder1|>": 32002,
|
10 |
"<|placeholder2|>": 32003,
|
11 |
"<|placeholder3|>": 32004,
|
12 |
+
"<|placeholder4|>": 32005,
|
|
|
|
|
13 |
"<|system|>": 32006,
|
14 |
+
"<|tool_calls|>": 32011,
|
15 |
+
"<|tool_results|>": 32009,
|
16 |
"<|user|>": 32010
|
17 |
}
|
tokenizer.json
CHANGED
@@ -128,6 +128,42 @@
|
|
128 |
"rstrip": true,
|
129 |
"normalized": false,
|
130 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
}
|
132 |
],
|
133 |
"normalizer": {
|
@@ -93460,4 +93496,4 @@
|
|
93460 |
"β ββββββββββββββ"
|
93461 |
]
|
93462 |
}
|
93463 |
-
}
|
|
|
128 |
"rstrip": true,
|
129 |
"normalized": false,
|
130 |
"special": true
|
131 |
+
},
|
132 |
+
{
|
133 |
+
"id": 32011,
|
134 |
+
"content": "<|tool_calls|>",
|
135 |
+
"single_word": false,
|
136 |
+
"lstrip": false,
|
137 |
+
"rstrip": true,
|
138 |
+
"normalized": false,
|
139 |
+
"special": true
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"id": 32012,
|
143 |
+
"content": "\"function\":",
|
144 |
+
"single_word": false,
|
145 |
+
"lstrip": false,
|
146 |
+
"rstrip": false,
|
147 |
+
"normalized": true,
|
148 |
+
"special": false
|
149 |
+
},
|
150 |
+
{
|
151 |
+
"id": 32013,
|
152 |
+
"content": "\"name\":",
|
153 |
+
"single_word": false,
|
154 |
+
"lstrip": false,
|
155 |
+
"rstrip": false,
|
156 |
+
"normalized": true,
|
157 |
+
"special": false
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"id": 32014,
|
161 |
+
"content": "\"arguments\":",
|
162 |
+
"single_word": false,
|
163 |
+
"lstrip": false,
|
164 |
+
"rstrip": false,
|
165 |
+
"normalized": true,
|
166 |
+
"special": false
|
167 |
}
|
168 |
],
|
169 |
"normalizer": {
|
|
|
93496 |
"β ββββββββββββββ"
|
93497 |
]
|
93498 |
}
|
93499 |
+
}
|
tokenizer_config.json
CHANGED
@@ -67,7 +67,7 @@
|
|
67 |
"special": true
|
68 |
},
|
69 |
"32005": {
|
70 |
-
"content": "<|
|
71 |
"lstrip": false,
|
72 |
"normalized": false,
|
73 |
"rstrip": true,
|
@@ -113,6 +113,38 @@
|
|
113 |
"rstrip": true,
|
114 |
"single_word": false,
|
115 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
}
|
117 |
},
|
118 |
"bos_token": "<s>",
|
@@ -128,13 +160,12 @@
|
|
128 |
],
|
129 |
"clean_up_tokenization_spaces": false,
|
130 |
"eos_token": "<|endoftext|>",
|
131 |
-
"
|
132 |
"model_max_length": 4096,
|
133 |
"pad_token": "<|endoftext|>",
|
134 |
"padding_side": "left",
|
135 |
"sp_model_kwargs": {},
|
136 |
"tokenizer_class": "LlamaTokenizer",
|
137 |
"unk_token": "<unk>",
|
138 |
-
"use_default_system_prompt": false
|
139 |
-
"func_bos_token": "<|tool_calls|>"
|
140 |
}
|
|
|
67 |
"special": true
|
68 |
},
|
69 |
"32005": {
|
70 |
+
"content": "<|placeholder4|>",
|
71 |
"lstrip": false,
|
72 |
"normalized": false,
|
73 |
"rstrip": true,
|
|
|
113 |
"rstrip": true,
|
114 |
"single_word": false,
|
115 |
"special": true
|
116 |
+
},
|
117 |
+
"32011": {
|
118 |
+
"content": "<|tool_calls|>",
|
119 |
+
"lstrip": false,
|
120 |
+
"normalized": false,
|
121 |
+
"rstrip": true,
|
122 |
+
"single_word": false,
|
123 |
+
"special": true
|
124 |
+
},
|
125 |
+
"32012": {
|
126 |
+
"content": "\"function\":",
|
127 |
+
"lstrip": false,
|
128 |
+
"normalized": true,
|
129 |
+
"rstrip": false,
|
130 |
+
"single_word": false,
|
131 |
+
"special": false
|
132 |
+
},
|
133 |
+
"32013": {
|
134 |
+
"content": "\"name\":",
|
135 |
+
"lstrip": false,
|
136 |
+
"normalized": true,
|
137 |
+
"rstrip": false,
|
138 |
+
"single_word": false,
|
139 |
+
"special": false
|
140 |
+
},
|
141 |
+
"32014": {
|
142 |
+
"content": "\"arguments\":",
|
143 |
+
"lstrip": false,
|
144 |
+
"normalized": true,
|
145 |
+
"rstrip": false,
|
146 |
+
"single_word": false,
|
147 |
+
"special": false
|
148 |
}
|
149 |
},
|
150 |
"bos_token": "<s>",
|
|
|
160 |
],
|
161 |
"clean_up_tokenization_spaces": false,
|
162 |
"eos_token": "<|endoftext|>",
|
163 |
+
"func_bos_token": "<|tool_calls|>",
|
164 |
"model_max_length": 4096,
|
165 |
"pad_token": "<|endoftext|>",
|
166 |
"padding_side": "left",
|
167 |
"sp_model_kwargs": {},
|
168 |
"tokenizer_class": "LlamaTokenizer",
|
169 |
"unk_token": "<unk>",
|
170 |
+
"use_default_system_prompt": false
|
|
|
171 |
}
|