File size: 1,247 Bytes
c4d001b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fdeff58
c4d001b
fdeff58
c4d001b
873fbd2
c4d001b
 
 
fdeff58
 
c4d001b
 
873fbd2
c4d001b
 
 
 
 
 
9b0cd72
 
 
 
 
 
 
 
c4d001b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def can_form_string(x, symbol_dict):
    def helper(x, symbol_dict, matched_parts):
        if not x:
            return True, matched_parts

        for key in symbol_dict.keys():
            if x.startswith(key):
                result, parts = helper(
                    x[len(key) :], symbol_dict, matched_parts + [key]
                )
                if result:
                    return True, parts

        return False, []

    return helper(x, symbol_dict, [])


def text_to_ipa(text, lang_tag, g2p, ignore_comma=True):
    ipa = []
    words = text.split()  # change in future

    print(f"ipa: {words}")

    for word in words:
        ipa_parts = ""
        extended_g2p = {**g2p[lang_tag], ",": "" if ignore_comma else ","}
        result, matched_parts = can_form_string(word, extended_g2p)

        if result is False:
            print(f"no match g2p : {word}")
            return ""

        for matched_part in matched_parts:
            ipa_parts = ipa_parts + g2p[lang_tag][matched_part]

        ipa.append(ipa_parts)
    ipa = (
        " ".join(ipa)
        .replace("g", "ɡ")
        .replace("ʦ", "t͡s")
        .replace("ʨ", "t͡ɕ")
        .replace("R", "ʀ")
        .replace("ʤ", "dʒ")
    )
    return ipa