ZhankuiHe commited on
Commit
da2c971
·
verified ·
1 Parent(s): d466853

Upload tokenizer

Browse files
dialog/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<item>": 50257
3
+ }
dialog/special_tokens_map.json CHANGED
@@ -1,4 +1,13 @@
1
  {
 
 
 
 
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<|endoftext|>",
4
  "lstrip": false,
 
1
  {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<item>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ }
10
+ ],
11
  "bos_token": {
12
  "content": "<|endoftext|>",
13
  "lstrip": false,
dialog/tokenizer.json CHANGED
@@ -11,6 +11,15 @@
11
  "rstrip": false,
12
  "normalized": true,
13
  "special": true
 
 
 
 
 
 
 
 
 
14
  }
15
  ],
16
  "normalizer": null,
 
11
  "rstrip": false,
12
  "normalized": true,
13
  "special": true
14
+ },
15
+ {
16
+ "id": 50257,
17
+ "content": "<item>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
  }
24
  ],
25
  "normalizer": null,
dialog/tokenizer_config.json CHANGED
@@ -9,10 +9,21 @@
9
  "rstrip": false,
10
  "single_word": false,
11
  "special": true
 
 
 
 
 
 
 
 
12
  }
13
  },
 
 
 
14
  "bos_token": "<|endoftext|>",
15
- "chat_template": "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}",
16
  "clean_up_tokenization_spaces": true,
17
  "eos_token": "<|endoftext|>",
18
  "errors": "replace",
 
9
  "rstrip": false,
10
  "single_word": false,
11
  "special": true
12
+ },
13
+ "50257": {
14
+ "content": "<item>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
  }
21
  },
22
+ "additional_special_tokens": [
23
+ "<item>"
24
+ ],
25
  "bos_token": "<|endoftext|>",
26
+ "chat_template": "{% for message in messages %}{{ message.role + '\n' + message.content | replace('<e>', '') | replace('</e>', '') | replace('_', ' ') }}{{ eos_token }}{% endfor %}{{ 'assistant\n' }}",
27
  "clean_up_tokenization_spaces": true,
28
  "eos_token": "<|endoftext|>",
29
  "errors": "replace",
entity/tokenizer_config.json CHANGED
@@ -56505,7 +56505,7 @@
56505
  "special": false
56506
  }
56507
  },
56508
- "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] }}{% elif message['role'] == 'assistant' %}{{ 'System: ' + message['content'] }}{% endif %}{% if not loop.last %}{{'\n'}}{% endif %}{% endfor %}",
56509
  "clean_up_tokenization_spaces": true,
56510
  "init_inputs": [
56511
  [
 
56505
  "special": false
56506
  }
56507
  },
56508
+ "chat_template": "{% for message in messages %}{{ message.content + ' ' }}{% endfor %}",
56509
  "clean_up_tokenization_spaces": true,
56510
  "init_inputs": [
56511
  [
word/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<item>": 50265
3
+ }
word/special_tokens_map.json CHANGED
@@ -1,4 +1,13 @@
1
  {
 
 
 
 
 
 
 
 
 
2
  "bos_token": "<s>",
3
  "cls_token": "<s>",
4
  "eos_token": "</s>",
 
1
  {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<item>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ }
10
+ ],
11
  "bos_token": "<s>",
12
  "cls_token": "<s>",
13
  "eos_token": "</s>",
word/tokenizer.json CHANGED
@@ -47,6 +47,15 @@
47
  "rstrip": false,
48
  "normalized": false,
49
  "special": true
 
 
 
 
 
 
 
 
 
50
  }
51
  ],
52
  "normalizer": null,
 
47
  "rstrip": false,
48
  "normalized": false,
49
  "special": true
50
+ },
51
+ {
52
+ "id": 50265,
53
+ "content": "<item>",
54
+ "single_word": false,
55
+ "lstrip": false,
56
+ "rstrip": false,
57
+ "normalized": false,
58
+ "special": true
59
  }
60
  ],
61
  "normalizer": null,
word/tokenizer_config.json CHANGED
@@ -40,10 +40,21 @@
40
  "rstrip": false,
41
  "single_word": false,
42
  "special": true
 
 
 
 
 
 
 
 
43
  }
44
  },
 
 
 
45
  "bos_token": "<s>",
46
- "chat_template": "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}",
47
  "clean_up_tokenization_spaces": true,
48
  "cls_token": "<s>",
49
  "eos_token": "</s>",
 
40
  "rstrip": false,
41
  "single_word": false,
42
  "special": true
43
+ },
44
+ "50265": {
45
+ "content": "<item>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
  }
52
  },
53
+ "additional_special_tokens": [
54
+ "<item>"
55
+ ],
56
  "bos_token": "<s>",
57
+ "chat_template": "{% for message in messages %}{{ message.role + '\n' + message.content | replace('<e>', '') | replace('</e>', '') | replace('_', ' ') }}{{ eos_token }}{% endfor %}",
58
  "clean_up_tokenization_spaces": true,
59
  "cls_token": "<s>",
60
  "eos_token": "</s>",