Gregniuki commited on
Commit
4c21e38
·
verified ·
1 Parent(s): e43ebcf

Update model/utils.py

Browse files
Files changed (1) hide show
  1. model/utils.py +1 -1
model/utils.py CHANGED
@@ -109,7 +109,7 @@ def get_tokenizer(dataset_name, tokenizer: str = "pinyin"):
109
  - if use "byte", set to 256 (unicode byte range)
110
  """
111
  if tokenizer in ["pinyin", "char"]:
112
- tokenizer_path = os.path.join(files("main").joinpath("../../data"), f"{dataset_name}_{tokenizer}/vocab.txt")
113
  with open(tokenizer_path, "r", encoding="utf-8") as f:
114
  vocab_char_map = {}
115
  for i, char in enumerate(f):
 
109
  - if use "byte", set to 256 (unicode byte range)
110
  """
111
  if tokenizer in ["pinyin", "char"]:
112
+ tokenizer_path = "data/Emilia_ZH_EN_pinyin/vocab.txt"
113
  with open(tokenizer_path, "r", encoding="utf-8") as f:
114
  vocab_char_map = {}
115
  for i, char in enumerate(f):