Commit
ยท
00bfabc
1
Parent(s):
dd205e4
add front end
Browse files- indextts/infer.py +5 -8
- indextts/utils/front.py +96 -0
- requirements.txt +2 -1
indextts/infer.py
CHANGED
|
@@ -6,6 +6,7 @@ import torchaudio
|
|
| 6 |
from omegaconf import OmegaConf
|
| 7 |
import sentencepiece as spm
|
| 8 |
|
|
|
|
| 9 |
from utils.common import tokenize_by_CJK_char
|
| 10 |
from utils.feature_extractors import MelSpectrogramFeatures
|
| 11 |
from indextts.vqvae.xtts_dvae import DiscreteVAE
|
|
@@ -41,16 +42,12 @@ class IndexTTS:
|
|
| 41 |
self.bigvgan = self.bigvgan.to(self.device)
|
| 42 |
self.bigvgan.eval()
|
| 43 |
print(">> bigvgan weights restored from:", self.bigvgan_path)
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
def preprocess_text(self, text):
|
| 46 |
-
|
| 47 |
-
english_punctuation = ",.!?;:\"\"''()[]<>"
|
| 48 |
-
|
| 49 |
-
# ๅๅปบไธไธชๆ ๅฐๅญๅ
ธ
|
| 50 |
-
punctuation_map = str.maketrans(chinese_punctuation, english_punctuation)
|
| 51 |
-
|
| 52 |
-
# ไฝฟ็จtranslateๆนๆณๆฟๆขๆ ็น็ฌฆๅท
|
| 53 |
-
return text.translate(punctuation_map)
|
| 54 |
|
| 55 |
def infer(self, audio_prompt, text, output_path):
|
| 56 |
text = self.preprocess_text(text)
|
|
|
|
| 6 |
from omegaconf import OmegaConf
|
| 7 |
import sentencepiece as spm
|
| 8 |
|
| 9 |
+
from indextts.utils.front import TextNormalizer
|
| 10 |
from utils.common import tokenize_by_CJK_char
|
| 11 |
from utils.feature_extractors import MelSpectrogramFeatures
|
| 12 |
from indextts.vqvae.xtts_dvae import DiscreteVAE
|
|
|
|
| 42 |
self.bigvgan = self.bigvgan.to(self.device)
|
| 43 |
self.bigvgan.eval()
|
| 44 |
print(">> bigvgan weights restored from:", self.bigvgan_path)
|
| 45 |
+
self.normalizer = TextNormalizer()
|
| 46 |
+
self.normalizer.load()
|
| 47 |
+
print(">> TextNormalizer loaded")
|
| 48 |
|
| 49 |
def preprocess_text(self, text):
|
| 50 |
+
return self.normalizer.infer(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
def infer(self, audio_prompt, text, output_path):
|
| 53 |
text = self.preprocess_text(text)
|
indextts/utils/front.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
import traceback
|
| 3 |
+
import os
|
| 4 |
+
import sys
|
| 5 |
+
import re
|
| 6 |
+
import re
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class TextNormalizer:
|
| 12 |
+
def __init__(self):
|
| 13 |
+
# self.normalizer = Normalizer(cache_dir="textprocessing/tn")
|
| 14 |
+
self.zh_normalizer = None
|
| 15 |
+
self.en_normalizer = None
|
| 16 |
+
self.char_rep_map = {
|
| 17 |
+
"๏ผ": ",",
|
| 18 |
+
"๏ผ": ",",
|
| 19 |
+
";": ",",
|
| 20 |
+
"๏ผ": ",",
|
| 21 |
+
"ใ": ".",
|
| 22 |
+
"๏ผ": "!",
|
| 23 |
+
"๏ผ": "?",
|
| 24 |
+
"\n": ".",
|
| 25 |
+
"ยท": ",",
|
| 26 |
+
"ใ": ",",
|
| 27 |
+
"...": "โฆ",
|
| 28 |
+
"โฆโฆ": "โฆ",
|
| 29 |
+
"$": ".",
|
| 30 |
+
"โ": "'",
|
| 31 |
+
"โ": "'",
|
| 32 |
+
'"': "'",
|
| 33 |
+
"โ": "'",
|
| 34 |
+
"โ": "'",
|
| 35 |
+
"๏ผ": "'",
|
| 36 |
+
"๏ผ": "'",
|
| 37 |
+
"(": "'",
|
| 38 |
+
")": "'",
|
| 39 |
+
"ใ": "'",
|
| 40 |
+
"ใ": "'",
|
| 41 |
+
"ใ": "'",
|
| 42 |
+
"ใ": "'",
|
| 43 |
+
"[": "'",
|
| 44 |
+
"]": "'",
|
| 45 |
+
"โ": "-",
|
| 46 |
+
"๏ฝ": "-",
|
| 47 |
+
"~": "-",
|
| 48 |
+
"ใ": "'",
|
| 49 |
+
"ใ": "'",
|
| 50 |
+
":": ",",
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
def match_email(self, email):
|
| 54 |
+
# ๆญฃๅ่กจ่พพๅผๅน้
้ฎ็ฎฑๆ ผๅผ๏ผๆฐๅญ่ฑๆ@ๆฐๅญ่ฑๆ.่ฑๆ
|
| 55 |
+
pattern = r'^[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z]+$'
|
| 56 |
+
return re.match(pattern, email) is not None
|
| 57 |
+
|
| 58 |
+
def use_chinese(self, s):
|
| 59 |
+
has_chinese = bool(re.search(r'[\u4e00-\u9fff]', s))
|
| 60 |
+
has_digit = bool(re.search(r'\d', s))
|
| 61 |
+
has_alpha = bool(re.search(r'[a-zA-Z]', s))
|
| 62 |
+
is_email = self.match_email(s)
|
| 63 |
+
if has_chinese or not has_alpha or is_email:
|
| 64 |
+
return True
|
| 65 |
+
else:
|
| 66 |
+
return False
|
| 67 |
+
|
| 68 |
+
def load(self):
|
| 69 |
+
# print(os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))
|
| 70 |
+
# sys.path.append(model_dir)
|
| 71 |
+
|
| 72 |
+
from tn.chinese.normalizer import Normalizer as NormalizerZh
|
| 73 |
+
from tn.english.normalizer import Normalizer as NormalizerEn
|
| 74 |
+
|
| 75 |
+
self.zh_normalizer = NormalizerZh(remove_interjections=False, remove_erhua=False)
|
| 76 |
+
self.en_normalizer = NormalizerEn()
|
| 77 |
+
|
| 78 |
+
def infer(self, text):
|
| 79 |
+
pattern = re.compile("|".join(re.escape(p) for p in self.char_rep_map.keys()))
|
| 80 |
+
replaced_text = pattern.sub(lambda x: self.char_rep_map[x.group()], text)
|
| 81 |
+
if not self.zh_normalizer or not self.en_normalizer:
|
| 82 |
+
print("Error, text normalizer is not initialized !!!")
|
| 83 |
+
return ""
|
| 84 |
+
try:
|
| 85 |
+
normalizer = self.zh_normalizer if self.use_chinese(text) else self.en_normalizer
|
| 86 |
+
result = normalizer.normalize(text)
|
| 87 |
+
except Exception:
|
| 88 |
+
result = ""
|
| 89 |
+
print(traceback.format_exc())
|
| 90 |
+
return result
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
if __name__ == '__main__':
|
| 94 |
+
# ๆต่ฏ็จๅบ
|
| 95 |
+
text_normalizer = TextNormalizer()
|
| 96 |
+
print(text_normalizer.infer("2.5ๅนณๆน็ต็บฟ"))
|
requirements.txt
CHANGED
|
@@ -20,4 +20,5 @@ sentencepiece
|
|
| 20 |
pypinyin
|
| 21 |
librosa
|
| 22 |
gradio
|
| 23 |
-
tqdm
|
|
|
|
|
|
| 20 |
pypinyin
|
| 21 |
librosa
|
| 22 |
gradio
|
| 23 |
+
tqdm
|
| 24 |
+
WeTextProcessing
|