AlexHung29629 commited on
Commit
73c49a1
Β·
verified Β·
1 Parent(s): a78445f

Upload tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +7 -3
  2. tokenizer.json +37 -1
  3. tokenizer_config.json +35 -4
added_tokens.json CHANGED
@@ -1,13 +1,17 @@
1
  {
 
 
 
2
  "<|assistant|>": 32001,
3
  "<|endoftext|>": 32000,
4
  "<|end|>": 32007,
 
5
  "<|placeholder1|>": 32002,
6
  "<|placeholder2|>": 32003,
7
  "<|placeholder3|>": 32004,
8
- "<|tool_calls|>": 32005,
9
- "<|function_metadata|>": 32008,
10
- "<|tool_results|>": 32009,
11
  "<|system|>": 32006,
 
 
12
  "<|user|>": 32010
13
  }
 
1
  {
2
+ "\"arguments\":": 32014,
3
+ "\"function\":": 32012,
4
+ "\"name\":": 32013,
5
  "<|assistant|>": 32001,
6
  "<|endoftext|>": 32000,
7
  "<|end|>": 32007,
8
+ "<|function_metadata|>": 32008,
9
  "<|placeholder1|>": 32002,
10
  "<|placeholder2|>": 32003,
11
  "<|placeholder3|>": 32004,
12
+ "<|placeholder4|>": 32005,
 
 
13
  "<|system|>": 32006,
14
+ "<|tool_calls|>": 32011,
15
+ "<|tool_results|>": 32009,
16
  "<|user|>": 32010
17
  }
tokenizer.json CHANGED
@@ -128,6 +128,42 @@
128
  "rstrip": true,
129
  "normalized": false,
130
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  }
132
  ],
133
  "normalizer": {
@@ -93460,4 +93496,4 @@
93460
  "▁ ▁▁▁▁▁▁▁▁▁▁▁▁▁▁"
93461
  ]
93462
  }
93463
- }
 
128
  "rstrip": true,
129
  "normalized": false,
130
  "special": true
131
+ },
132
+ {
133
+ "id": 32011,
134
+ "content": "<|tool_calls|>",
135
+ "single_word": false,
136
+ "lstrip": false,
137
+ "rstrip": true,
138
+ "normalized": false,
139
+ "special": true
140
+ },
141
+ {
142
+ "id": 32012,
143
+ "content": "\"function\":",
144
+ "single_word": false,
145
+ "lstrip": false,
146
+ "rstrip": false,
147
+ "normalized": true,
148
+ "special": false
149
+ },
150
+ {
151
+ "id": 32013,
152
+ "content": "\"name\":",
153
+ "single_word": false,
154
+ "lstrip": false,
155
+ "rstrip": false,
156
+ "normalized": true,
157
+ "special": false
158
+ },
159
+ {
160
+ "id": 32014,
161
+ "content": "\"arguments\":",
162
+ "single_word": false,
163
+ "lstrip": false,
164
+ "rstrip": false,
165
+ "normalized": true,
166
+ "special": false
167
  }
168
  ],
169
  "normalizer": {
 
93496
  "▁ ▁▁▁▁▁▁▁▁▁▁▁▁▁▁"
93497
  ]
93498
  }
93499
+ }
tokenizer_config.json CHANGED
@@ -67,7 +67,7 @@
67
  "special": true
68
  },
69
  "32005": {
70
- "content": "<|tool_calls|>",
71
  "lstrip": false,
72
  "normalized": false,
73
  "rstrip": true,
@@ -113,6 +113,38 @@
113
  "rstrip": true,
114
  "single_word": false,
115
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  }
117
  },
118
  "bos_token": "<s>",
@@ -128,13 +160,12 @@
128
  ],
129
  "clean_up_tokenization_spaces": false,
130
  "eos_token": "<|endoftext|>",
131
- "legacy": false,
132
  "model_max_length": 4096,
133
  "pad_token": "<|endoftext|>",
134
  "padding_side": "left",
135
  "sp_model_kwargs": {},
136
  "tokenizer_class": "LlamaTokenizer",
137
  "unk_token": "<unk>",
138
- "use_default_system_prompt": false,
139
- "func_bos_token": "<|tool_calls|>"
140
  }
 
67
  "special": true
68
  },
69
  "32005": {
70
+ "content": "<|placeholder4|>",
71
  "lstrip": false,
72
  "normalized": false,
73
  "rstrip": true,
 
113
  "rstrip": true,
114
  "single_word": false,
115
  "special": true
116
+ },
117
+ "32011": {
118
+ "content": "<|tool_calls|>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": true,
122
+ "single_word": false,
123
+ "special": true
124
+ },
125
+ "32012": {
126
+ "content": "\"function\":",
127
+ "lstrip": false,
128
+ "normalized": true,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "32013": {
134
+ "content": "\"name\":",
135
+ "lstrip": false,
136
+ "normalized": true,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "32014": {
142
+ "content": "\"arguments\":",
143
+ "lstrip": false,
144
+ "normalized": true,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
  }
149
  },
150
  "bos_token": "<s>",
 
160
  ],
161
  "clean_up_tokenization_spaces": false,
162
  "eos_token": "<|endoftext|>",
163
+ "func_bos_token": "<|tool_calls|>",
164
  "model_max_length": 4096,
165
  "pad_token": "<|endoftext|>",
166
  "padding_side": "left",
167
  "sp_model_kwargs": {},
168
  "tokenizer_class": "LlamaTokenizer",
169
  "unk_token": "<unk>",
170
+ "use_default_system_prompt": false
 
171
  }