Step-Audio-Tokenizer
/
dengcunqin
/speech_paraformer-large_asr_nat-zh-cantonese-en-16k-vocab8501-online
/write_tokens_from_txt.py
import json | |
from pathlib import Path | |
import sys | |
config = sys.argv[1] | |
token_list_out = sys.argv[2] | |
config = Path(config) | |
token_list = [] | |
with config.open("r", encoding="utf-8") as f: | |
for idx, line in enumerate(f): | |
line = line.rstrip() | |
token_list.append(line) | |
file = open(token_list_out, 'w', encoding='utf-8') | |
json.dump(token_list, file, ensure_ascii=False, indent=4) | |
file.close() |