speedcell4 commited on
Commit
8b9f56f
·
verified ·
1 Parent(s): 2be0ccb

Upload tokenizer

Browse files
Files changed (3) hide show
  1. sentencepiece.bpe.model +2 -2
  2. tokenizer.json +10 -10
  3. tokenizer_config.json +6 -6
sentencepiece.bpe.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15ac4c56aeb767c39c5d5a5a36c69b0d4042ff1fde83421d242ecfe00a2186d0
3
- size 832604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96450bb58fc1a7d2abfc279a0cb04a1b26021e7085ec8ce1024b037732cf5118
3
+ size 832615
tokenizer.json CHANGED
@@ -13,8 +13,8 @@
13
  "special": true
14
  },
15
  {
16
- "id": 2,
17
- "content": "</s>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
@@ -22,8 +22,8 @@
22
  "special": true
23
  },
24
  {
25
- "id": 3,
26
- "content": "<unk>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
@@ -31,8 +31,8 @@
31
  "special": true
32
  },
33
  {
34
- "id": 4,
35
- "content": "<pad>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
@@ -162,10 +162,6 @@
162
  "<unk>",
163
  0.0
164
  ],
165
- [
166
- "<pad>",
167
- 0.0
168
- ],
169
  [
170
  ",",
171
  -3.00239896774292
@@ -128149,6 +128145,10 @@
128149
  [
128150
  "Wandler",
128151
  -13.44303035736084
 
 
 
 
128152
  ]
128153
  ],
128154
  "byte_fallback": false
 
13
  "special": true
14
  },
15
  {
16
+ "id": 1,
17
+ "content": "<pad>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
 
22
  "special": true
23
  },
24
  {
25
+ "id": 2,
26
+ "content": "</s>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
 
31
  "special": true
32
  },
33
  {
34
+ "id": 3,
35
+ "content": "<unk>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
 
162
  "<unk>",
163
  0.0
164
  ],
 
 
 
 
165
  [
166
  ",",
167
  -3.00239896774292
 
128145
  [
128146
  "Wandler",
128147
  -13.44303035736084
128148
+ ],
128149
+ [
128150
+ "sekretär",
128151
+ -13.444136619567871
128152
  ]
128153
  ],
128154
  "byte_fallback": false
tokenizer_config.json CHANGED
@@ -9,24 +9,24 @@
9
  "single_word": false,
10
  "special": true
11
  },
12
- "2": {
13
- "content": "</s>",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
17
  "single_word": false,
18
  "special": true
19
  },
20
- "3": {
21
- "content": "<unk>",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
25
  "single_word": false,
26
  "special": true
27
  },
28
- "4": {
29
- "content": "<pad>",
30
  "lstrip": false,
31
  "normalized": false,
32
  "rstrip": false,
 
9
  "single_word": false,
10
  "special": true
11
  },
12
+ "1": {
13
+ "content": "<pad>",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
17
  "single_word": false,
18
  "special": true
19
  },
20
+ "2": {
21
+ "content": "</s>",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
25
  "single_word": false,
26
  "special": true
27
  },
28
+ "3": {
29
+ "content": "<unk>",
30
  "lstrip": false,
31
  "normalized": false,
32
  "rstrip": false,