Spaces:
Running
on
Zero
Running
on
Zero
Update model/utils.py
Browse files- model/utils.py +1 -1
model/utils.py
CHANGED
@@ -109,7 +109,7 @@ def get_tokenizer(dataset_name, tokenizer: str = "pinyin"):
|
|
109 |
- if use "byte", set to 256 (unicode byte range)
|
110 |
"""
|
111 |
if tokenizer in ["pinyin", "char"]:
|
112 |
-
tokenizer_path =
|
113 |
with open(tokenizer_path, "r", encoding="utf-8") as f:
|
114 |
vocab_char_map = {}
|
115 |
for i, char in enumerate(f):
|
|
|
109 |
- if use "byte", set to 256 (unicode byte range)
|
110 |
"""
|
111 |
if tokenizer in ["pinyin", "char"]:
|
112 |
+
tokenizer_path = "data/Emilia_ZH_EN_pinyin/vocab.txt"
|
113 |
with open(tokenizer_path, "r", encoding="utf-8") as f:
|
114 |
vocab_char_map = {}
|
115 |
for i, char in enumerate(f):
|