Spaces:
Paused
Paused
| # coding: utf-8 | |
| # Code based on https://github.com/carpedm20/multi-speaker-tacotron-tensorflow/blob/master/text/korean.py | |
| import re | |
| from TTS.tts.utils.text.korean.ko_dictionary import english_dictionary, etc_dictionary | |
| def normalize(text): | |
| text = text.strip() | |
| text = re.sub("[⺀-⺙⺛-⻳⼀-⿕々〇〡-〩〸-〺〻㐀-䶵一-鿃豈-鶴侮-頻並-龎]", "", text) | |
| text = normalize_with_dictionary(text, etc_dictionary) | |
| text = normalize_english(text) | |
| text = text.lower() | |
| return text | |
| def normalize_with_dictionary(text, dic): | |
| if any(key in text for key in dic.keys()): | |
| pattern = re.compile("|".join(re.escape(key) for key in dic.keys())) | |
| return pattern.sub(lambda x: dic[x.group()], text) | |
| return text | |
| def normalize_english(text): | |
| def fn(m): | |
| word = m.group() | |
| if word in english_dictionary: | |
| return english_dictionary.get(word) | |
| return word | |
| text = re.sub("([A-Za-z]+)", fn, text) | |
| return text | |