Boris Malashenko
commited on
Tokenizer fix
Browse filesGot "data did not match any variant of untagged enum PyPreTokenizerTypeWrapper at line 90 column 3" error. Found that tokenizer.json causes it, reinialized it from previous model.
- tokenizer.json +5 -5
tokenizer.json
CHANGED
@@ -85,8 +85,8 @@
|
|
85 |
"pre_tokenizer": {
|
86 |
"type": "Metaspace",
|
87 |
"replacement": "▁",
|
88 |
-
"
|
89 |
-
"
|
90 |
},
|
91 |
"post_processor": {
|
92 |
"type": "TemplateProcessing",
|
@@ -172,8 +172,8 @@
|
|
172 |
"decoder": {
|
173 |
"type": "Metaspace",
|
174 |
"replacement": "▁",
|
175 |
-
"
|
176 |
-
"
|
177 |
},
|
178 |
"model": {
|
179 |
"type": "Unigram",
|
@@ -184846,4 +184846,4 @@
|
|
184846 |
],
|
184847 |
"byte_fallback": false
|
184848 |
}
|
184849 |
-
}
|
|
|
85 |
"pre_tokenizer": {
|
86 |
"type": "Metaspace",
|
87 |
"replacement": "▁",
|
88 |
+
"add_prefix_space": true,
|
89 |
+
"prepend_scheme": "always"
|
90 |
},
|
91 |
"post_processor": {
|
92 |
"type": "TemplateProcessing",
|
|
|
172 |
"decoder": {
|
173 |
"type": "Metaspace",
|
174 |
"replacement": "▁",
|
175 |
+
"add_prefix_space": true,
|
176 |
+
"prepend_scheme": "always"
|
177 |
},
|
178 |
"model": {
|
179 |
"type": "Unigram",
|
|
|
184846 |
],
|
184847 |
"byte_fallback": false
|
184848 |
}
|
184849 |
+
}
|