piccora commited on
Commit
f54c645
·
verified ·
1 Parent(s): f12fd13

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. added_tokens.json +2 -5
  2. tokenizer.json +16 -11
  3. tokenizer_config.json +8 -8
  4. vocab.txt +5 -0
added_tokens.json CHANGED
@@ -1,7 +1,4 @@
1
  {
2
- "[CLS]": 8102,
3
- "[MASK]": 8103,
4
- "[PAD]": 8101,
5
- "[SEP]": 8100,
6
- "[UNK]": 8099
7
  }
 
1
  {
2
+ "[MASK]": 8105,
3
+ "[SEP]": 8104
 
 
 
4
  }
tokenizer.json CHANGED
@@ -4,7 +4,7 @@
4
  "padding": null,
5
  "added_tokens": [
6
  {
7
- "id": 8099,
8
  "content": "[UNK]",
9
  "single_word": false,
10
  "lstrip": false,
@@ -13,8 +13,8 @@
13
  "special": true
14
  },
15
  {
16
- "id": 8100,
17
- "content": "[SEP]",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
@@ -22,8 +22,8 @@
22
  "special": true
23
  },
24
  {
25
- "id": 8101,
26
- "content": "[PAD]",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
@@ -31,8 +31,8 @@
31
  "special": true
32
  },
33
  {
34
- "id": 8102,
35
- "content": "[CLS]",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
@@ -40,7 +40,7 @@
40
  "special": true
41
  },
42
  {
43
- "id": 8103,
44
  "content": "[MASK]",
45
  "single_word": false,
46
  "lstrip": false,
@@ -117,7 +117,7 @@
117
  "[CLS]": {
118
  "id": "[CLS]",
119
  "ids": [
120
- 8102
121
  ],
122
  "tokens": [
123
  "[CLS]"
@@ -126,7 +126,7 @@
126
  "[SEP]": {
127
  "id": "[SEP]",
128
  "ids": [
129
- 8100
130
  ],
131
  "tokens": [
132
  "[SEP]"
@@ -8243,7 +8243,12 @@
8243
  "}": 8098,
8244
  "~": 8099,
8245
  "": 8100,
8246
- "": 8101
 
 
 
 
 
8247
  }
8248
  }
8249
  }
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
7
+ "id": 8101,
8
  "content": "[UNK]",
9
  "single_word": false,
10
  "lstrip": false,
 
13
  "special": true
14
  },
15
  {
16
+ "id": 8102,
17
+ "content": "[PAD]",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
 
22
  "special": true
23
  },
24
  {
25
+ "id": 8103,
26
+ "content": "[CLS]",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
 
31
  "special": true
32
  },
33
  {
34
+ "id": 8104,
35
+ "content": "[SEP]",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
 
40
  "special": true
41
  },
42
  {
43
+ "id": 8105,
44
  "content": "[MASK]",
45
  "single_word": false,
46
  "lstrip": false,
 
117
  "[CLS]": {
118
  "id": "[CLS]",
119
  "ids": [
120
+ 8103
121
  ],
122
  "tokens": [
123
  "[CLS]"
 
126
  "[SEP]": {
127
  "id": "[SEP]",
128
  "ids": [
129
+ 8104
130
  ],
131
  "tokens": [
132
  "[SEP]"
 
8243
  "}": 8098,
8244
  "~": 8099,
8245
  "": 8100,
8246
+ "[UNK]": 8101,
8247
+ "[PAD]": 8102,
8248
+ "[CLS]": 8103,
8249
+ "[SEP]": 8104,
8250
+ "[MASK]": 8105,
8251
+ "�": 8106
8252
  }
8253
  }
8254
  }
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "added_tokens_decoder": {
3
- "8099": {
4
  "content": "[UNK]",
5
  "lstrip": false,
6
  "normalized": false,
@@ -8,31 +8,31 @@
8
  "single_word": false,
9
  "special": true
10
  },
11
- "8100": {
12
- "content": "[SEP]",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
16
  "single_word": false,
17
  "special": true
18
  },
19
- "8101": {
20
- "content": "[PAD]",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
26
  },
27
- "8102": {
28
- "content": "[CLS]",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
  },
35
- "8103": {
36
  "content": "[MASK]",
37
  "lstrip": false,
38
  "normalized": false,
 
1
  {
2
  "added_tokens_decoder": {
3
+ "8101": {
4
  "content": "[UNK]",
5
  "lstrip": false,
6
  "normalized": false,
 
8
  "single_word": false,
9
  "special": true
10
  },
11
+ "8102": {
12
+ "content": "[PAD]",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
16
  "single_word": false,
17
  "special": true
18
  },
19
+ "8103": {
20
+ "content": "[CLS]",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
26
  },
27
+ "8104": {
28
+ "content": "[SEP]",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
  },
35
+ "8105": {
36
  "content": "[MASK]",
37
  "lstrip": false,
38
  "normalized": false,
vocab.txt CHANGED
@@ -8096,4 +8096,9 @@ z
8096
  }
8097
  ~
8098
  
 
 
 
 
 
8099
 
 
8096
  }
8097
  ~
8098
  
8099
+ [UNK]
8100
+ [PAD]
8101
+ [CLS]
8102
+ [SEP]
8103
+ [MASK]
8104