Spaces:
Sleeping
Sleeping
def can_form_string(x, symbol_dict): | |
def helper(x, symbol_dict, matched_parts): | |
if not x: | |
return True, matched_parts | |
for key in symbol_dict.keys(): | |
if x.startswith(key): | |
result, parts = helper( | |
x[len(key) :], symbol_dict, matched_parts + [key] | |
) | |
if result: | |
return True, parts | |
return False, [] | |
return helper(x, symbol_dict, []) | |
def text_to_ipa(text, lang_tag, g2p, ignore_comma=True): | |
ipa = [] | |
words = text.split() # change in future | |
print(f"ipa: {words}") | |
for word in words: | |
ipa_parts = "" | |
extended_g2p = {**g2p[lang_tag], ",": "" if ignore_comma else ","} | |
result, matched_parts = can_form_string(word, extended_g2p) | |
if result is False: | |
print(f"no match g2p : {word}") | |
return "" | |
for matched_part in matched_parts: | |
ipa_parts = ipa_parts + g2p[lang_tag][matched_part] | |
ipa.append(ipa_parts) | |
ipa = ( | |
" ".join(ipa) | |
.replace("g", "ɡ") | |
.replace("ʦ", "t͡s") | |
.replace("ʨ", "t͡ɕ") | |
.replace("R", "ʀ") | |
.replace("ʤ", "dʒ") | |
) | |
return ipa | |