Voice-Clone-Multilingual

Running

App Files Files Community

Voice-Clone-Multilingual / TTS /tts /utils /text /cmudict.py

Shadhil

voice-clone with single audio sample input

9b2107c almost 2 years ago

raw

history blame contribute delete

2.91 kB

	# -- coding: utf-8 --

	import re

	VALID_SYMBOLS = [
	"AA",
	"AA0",
	"AA1",
	"AA2",
	"AE",
	"AE0",
	"AE1",
	"AE2",
	"AH",
	"AH0",
	"AH1",
	"AH2",
	"AO",
	"AO0",
	"AO1",
	"AO2",
	"AW",
	"AW0",
	"AW1",
	"AW2",
	"AY",
	"AY0",
	"AY1",
	"AY2",
	"B",
	"CH",
	"D",
	"DH",
	"EH",
	"EH0",
	"EH1",
	"EH2",
	"ER",
	"ER0",
	"ER1",
	"ER2",
	"EY",
	"EY0",
	"EY1",
	"EY2",
	"F",
	"G",
	"HH",
	"IH",
	"IH0",
	"IH1",
	"IH2",
	"IY",
	"IY0",
	"IY1",
	"IY2",
	"JH",
	"K",
	"L",
	"M",
	"N",
	"NG",
	"OW",
	"OW0",
	"OW1",
	"OW2",
	"OY",
	"OY0",
	"OY1",
	"OY2",
	"P",
	"R",
	"S",
	"SH",
	"T",
	"TH",
	"UH",
	"UH0",
	"UH1",
	"UH2",
	"UW",
	"UW0",
	"UW1",
	"UW2",
	"V",
	"W",
	"Y",
	"Z",
	"ZH",
	]


	class CMUDict:
	"""Thin wrapper around CMUDict data. http://www.speech.cs.cmu.edu/cgi-bin/cmudict"""

	def __init__(self, file_or_path, keep_ambiguous=True):
	if isinstance(file_or_path, str):
	with open(file_or_path, encoding="latin-1") as f:
	entries = _parse_cmudict(f)
	else:
	entries = _parse_cmudict(file_or_path)
	if not keep_ambiguous:
	entries = {word: pron for word, pron in entries.items() if len(pron) == 1}
	self._entries = entries

	def __len__(self):
	return len(self._entries)

	def lookup(self, word):
	"""Returns list of ARPAbet pronunciations of the given word."""
	return self._entries.get(word.upper())

	@staticmethod
	def get_arpabet(word, cmudict, punctuation_symbols):
	first_symbol, last_symbol = "", ""
	if word and word[0] in punctuation_symbols:
	first_symbol = word[0]
	word = word[1:]
	if word and word[-1] in punctuation_symbols:
	last_symbol = word[-1]
	word = word[:-1]
	arpabet = cmudict.lookup(word)
	if arpabet is not None:
	return first_symbol + "{%s}" % arpabet[0] + last_symbol
	return first_symbol + word + last_symbol


	_alt_re = re.compile(r"\([0-9]+\)")


	def _parse_cmudict(file):
	cmudict = {}
	for line in file:
	if line and (line[0] >= "A" and line[0] <= "Z" or line[0] == "'"):
	parts = line.split(" ")
	word = re.sub(_alt_re, "", parts[0])
	pronunciation = _get_pronunciation(parts[1])
	if pronunciation:
	if word in cmudict:
	cmudict[word].append(pronunciation)
	else:
	cmudict[word] = [pronunciation]
	return cmudict


	def _get_pronunciation(s):
	parts = s.strip().split(" ")
	for part in parts:
	if part not in VALID_SYMBOLS:
	return None
	return " ".join(parts)