File size: 1,645 Bytes
c4d001b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5740c9e
c4d001b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import csv
from io import BytesIO

import requests
from omegaconf import OmegaConf

EXTRA_G2P = {
    "z": "z",
    "o": "o",
    "h": "h",
    "g": "g",
    "y": "j",
    "w": "w",
    "c": "ʦ",
    "u": "u",
    "f": "f",
    "v": "v",
    "j": "ɟ",
    "b": "b",
    "q": "q",
    "e": "e",
    ",": ",",
}


def gh_download(repo, path):
    headers = {
        "Accept": "application/vnd.github.raw+json",
    }

    url = f"https://api.github.com/repos/{repo}/contents/{path}"
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        raise Exception(f"Failed to download {path} from {repo}, response: {response}")
    response.encoding = "utf-8-sig"

    return response.text


def load_g2p(g2p_string):
    g2p = dict()

    csv_reader = csv.DictReader(g2p_string.split("\n"))

    for row in csv_reader:
        # print(row)
        language = row["Language"]
        dialect = row["Dialect"]

        if dialect == "-":
            lang_tag = f"{language}"
        else:
            lang_tag = f"{language}_{dialect}"

        for key in row:
            if key in ["Language", "Dialect"]:
                continue

            if row[key] == "-":
                continue

            g2p[lang_tag] = g2p.get(lang_tag, {})
            g2p[lang_tag][key] = row[key].split(",")[0]

        for g, p in EXTRA_G2P.items():
            if g not in g2p[lang_tag]:
                g2p[lang_tag][g] = p

    return g2p


OmegaConf.register_new_resolver("gh_download", gh_download)
OmegaConf.register_new_resolver("load_g2p", load_g2p)

g2p = OmegaConf.to_object(OmegaConf.load("configs/g2p.yaml"))["g2p"]