Spaces:

united-link
/

formosan-tts

Sleeping

formosan-tts / ipa /ipa.py

fix: if input is text can't handle uppercase problem

873fbd2 about 1 month ago

1.25 kB

	def can_form_string(x, symbol_dict):
	def helper(x, symbol_dict, matched_parts):
	if not x:
	return True, matched_parts

	for key in symbol_dict.keys():
	if x.startswith(key):
	result, parts = helper(
	x[len(key) :], symbol_dict, matched_parts + [key]
	)
	if result:
	return True, parts

	return False, []

	return helper(x, symbol_dict, [])


	def text_to_ipa(text, lang_tag, g2p, ignore_comma=True):
	ipa = []
	words = text.split() # change in future

	print(f"ipa: {words}")

	for word in words:
	ipa_parts = ""
	extended_g2p = {**g2p[lang_tag], ",": "" if ignore_comma else ","}
	result, matched_parts = can_form_string(word, extended_g2p)

	if result is False:
	print(f"no match g2p : {word}")
	return ""

	for matched_part in matched_parts:
	ipa_parts = ipa_parts + g2p[lang_tag][matched_part]

	ipa.append(ipa_parts)
	ipa = (
	" ".join(ipa)
	.replace("g", "ɡ")
	.replace("ʦ", "t͡s")
	.replace("ʨ", "t͡ɕ")
	.replace("R", "ʀ")
	.replace("ʤ", "dʒ")
	)
	return ipa