AlexHT_Hung
commited on
Commit
Β·
6d96773
1
Parent(s):
73c49a1
Revert "Upload tokenizer"
Browse filesThis reverts commit 73c49a12e3243978affacfcaf5427654288b8583.
- added_tokens.json +3 -7
- tokenizer.json +1 -37
- tokenizer_config.json +4 -35
added_tokens.json
CHANGED
@@ -1,17 +1,13 @@
|
|
1 |
{
|
2 |
-
"\"arguments\":": 32014,
|
3 |
-
"\"function\":": 32012,
|
4 |
-
"\"name\":": 32013,
|
5 |
"<|assistant|>": 32001,
|
6 |
"<|endoftext|>": 32000,
|
7 |
"<|end|>": 32007,
|
8 |
-
"<|function_metadata|>": 32008,
|
9 |
"<|placeholder1|>": 32002,
|
10 |
"<|placeholder2|>": 32003,
|
11 |
"<|placeholder3|>": 32004,
|
12 |
-
"<|
|
13 |
-
"<|
|
14 |
-
"<|tool_calls|>": 32011,
|
15 |
"<|tool_results|>": 32009,
|
|
|
16 |
"<|user|>": 32010
|
17 |
}
|
|
|
1 |
{
|
|
|
|
|
|
|
2 |
"<|assistant|>": 32001,
|
3 |
"<|endoftext|>": 32000,
|
4 |
"<|end|>": 32007,
|
|
|
5 |
"<|placeholder1|>": 32002,
|
6 |
"<|placeholder2|>": 32003,
|
7 |
"<|placeholder3|>": 32004,
|
8 |
+
"<|tool_calls|>": 32005,
|
9 |
+
"<|function_metadata|>": 32008,
|
|
|
10 |
"<|tool_results|>": 32009,
|
11 |
+
"<|system|>": 32006,
|
12 |
"<|user|>": 32010
|
13 |
}
|
tokenizer.json
CHANGED
@@ -128,42 +128,6 @@
|
|
128 |
"rstrip": true,
|
129 |
"normalized": false,
|
130 |
"special": true
|
131 |
-
},
|
132 |
-
{
|
133 |
-
"id": 32011,
|
134 |
-
"content": "<|tool_calls|>",
|
135 |
-
"single_word": false,
|
136 |
-
"lstrip": false,
|
137 |
-
"rstrip": true,
|
138 |
-
"normalized": false,
|
139 |
-
"special": true
|
140 |
-
},
|
141 |
-
{
|
142 |
-
"id": 32012,
|
143 |
-
"content": "\"function\":",
|
144 |
-
"single_word": false,
|
145 |
-
"lstrip": false,
|
146 |
-
"rstrip": false,
|
147 |
-
"normalized": true,
|
148 |
-
"special": false
|
149 |
-
},
|
150 |
-
{
|
151 |
-
"id": 32013,
|
152 |
-
"content": "\"name\":",
|
153 |
-
"single_word": false,
|
154 |
-
"lstrip": false,
|
155 |
-
"rstrip": false,
|
156 |
-
"normalized": true,
|
157 |
-
"special": false
|
158 |
-
},
|
159 |
-
{
|
160 |
-
"id": 32014,
|
161 |
-
"content": "\"arguments\":",
|
162 |
-
"single_word": false,
|
163 |
-
"lstrip": false,
|
164 |
-
"rstrip": false,
|
165 |
-
"normalized": true,
|
166 |
-
"special": false
|
167 |
}
|
168 |
],
|
169 |
"normalizer": {
|
@@ -93496,4 +93460,4 @@
|
|
93496 |
"β ββββββββββββββ"
|
93497 |
]
|
93498 |
}
|
93499 |
-
}
|
|
|
128 |
"rstrip": true,
|
129 |
"normalized": false,
|
130 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
}
|
132 |
],
|
133 |
"normalizer": {
|
|
|
93460 |
"β ββββββββββββββ"
|
93461 |
]
|
93462 |
}
|
93463 |
+
}
|
tokenizer_config.json
CHANGED
@@ -67,7 +67,7 @@
|
|
67 |
"special": true
|
68 |
},
|
69 |
"32005": {
|
70 |
-
"content": "<|
|
71 |
"lstrip": false,
|
72 |
"normalized": false,
|
73 |
"rstrip": true,
|
@@ -113,38 +113,6 @@
|
|
113 |
"rstrip": true,
|
114 |
"single_word": false,
|
115 |
"special": true
|
116 |
-
},
|
117 |
-
"32011": {
|
118 |
-
"content": "<|tool_calls|>",
|
119 |
-
"lstrip": false,
|
120 |
-
"normalized": false,
|
121 |
-
"rstrip": true,
|
122 |
-
"single_word": false,
|
123 |
-
"special": true
|
124 |
-
},
|
125 |
-
"32012": {
|
126 |
-
"content": "\"function\":",
|
127 |
-
"lstrip": false,
|
128 |
-
"normalized": true,
|
129 |
-
"rstrip": false,
|
130 |
-
"single_word": false,
|
131 |
-
"special": false
|
132 |
-
},
|
133 |
-
"32013": {
|
134 |
-
"content": "\"name\":",
|
135 |
-
"lstrip": false,
|
136 |
-
"normalized": true,
|
137 |
-
"rstrip": false,
|
138 |
-
"single_word": false,
|
139 |
-
"special": false
|
140 |
-
},
|
141 |
-
"32014": {
|
142 |
-
"content": "\"arguments\":",
|
143 |
-
"lstrip": false,
|
144 |
-
"normalized": true,
|
145 |
-
"rstrip": false,
|
146 |
-
"single_word": false,
|
147 |
-
"special": false
|
148 |
}
|
149 |
},
|
150 |
"bos_token": "<s>",
|
@@ -160,12 +128,13 @@
|
|
160 |
],
|
161 |
"clean_up_tokenization_spaces": false,
|
162 |
"eos_token": "<|endoftext|>",
|
163 |
-
"
|
164 |
"model_max_length": 4096,
|
165 |
"pad_token": "<|endoftext|>",
|
166 |
"padding_side": "left",
|
167 |
"sp_model_kwargs": {},
|
168 |
"tokenizer_class": "LlamaTokenizer",
|
169 |
"unk_token": "<unk>",
|
170 |
-
"use_default_system_prompt": false
|
|
|
171 |
}
|
|
|
67 |
"special": true
|
68 |
},
|
69 |
"32005": {
|
70 |
+
"content": "<|tool_calls|>",
|
71 |
"lstrip": false,
|
72 |
"normalized": false,
|
73 |
"rstrip": true,
|
|
|
113 |
"rstrip": true,
|
114 |
"single_word": false,
|
115 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
}
|
117 |
},
|
118 |
"bos_token": "<s>",
|
|
|
128 |
],
|
129 |
"clean_up_tokenization_spaces": false,
|
130 |
"eos_token": "<|endoftext|>",
|
131 |
+
"legacy": false,
|
132 |
"model_max_length": 4096,
|
133 |
"pad_token": "<|endoftext|>",
|
134 |
"padding_side": "left",
|
135 |
"sp_model_kwargs": {},
|
136 |
"tokenizer_class": "LlamaTokenizer",
|
137 |
"unk_token": "<unk>",
|
138 |
+
"use_default_system_prompt": false,
|
139 |
+
"func_bos_token": "<|tool_calls|>"
|
140 |
}
|