Sakalti commited on
Commit
adb27f7
·
verified ·
1 Parent(s): ae828af

Create tokenizer.json

Browse files
Files changed (1) hide show
  1. tokenizer.json +36 -0
tokenizer.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "bert",
3
+ "vocab_size": 32000,
4
+ "type_vocab_size": 2,
5
+ "padding_id": 0,
6
+ "unk_id": 1,
7
+ "cls_id": 2,
8
+ "sep_id": 3,
9
+ "mask_id": 4,
10
+ "normalizer": null,
11
+ "pretokenizer": {
12
+ "type": "ByteLevel",
13
+ "add_prefix_space": true
14
+ },
15
+ "postprocessor": {
16
+ "type": "TemplateProcessing",
17
+ "single": {
18
+ "template": "{input} </s>",
19
+ "special_tokens": [
20
+ {
21
+ "id": 2,
22
+ "content": "</s>"
23
+ }
24
+ ]
25
+ },
26
+ "pair": {
27
+ "template": "{pair} </s>",
28
+ "special_tokens": [
29
+ {
30
+ "id": 2,
31
+ "content": "</s>"
32
+ }
33
+ ]
34
+ }
35
+ }
36
+ }