File size: 3,008 Bytes
ee0ec3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python3
import argparse
import sys

IS_MEMBER, IS_PARENTHESIS_ROOT = 5, 3

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("paths", default=[], nargs="*", help="Input paths")
    parser.add_argument("--is_member", default=False, action="store_true", help="Add is_member")
    parser.add_argument("--is_parenthesis_root", default=False, action="store_true", help="Add is_parenthesis_root")
    args = parser.parse_args()

    for path in args.paths:
        with open(path, "r", encoding="utf-8") as conllu_file:
            block = []
            for line in conllu_file:
                line = line.rstrip("\n")
                if not line:
                    assert block
                    # Process block
                    heads, deps = {}, {}
                    for entry in block:
                        columns = entry.split("\t")
                        if len(columns) == 10:
                            assert int(columns[0]) == len(heads) + 1
                            deps[len(heads) + 1] = columns[7]
                            heads[len(heads) + 1] = columns[6]

                    for i in range(len(block)):
                        columns = block[i].split("\t")
                        if len(columns) == 10:
                            if args.is_member and columns[IS_MEMBER] == "1":
                                parent = int(columns[0])
                                while parent and deps[parent] not in ("Apos", "Coord"):
                                    parent = int(heads[parent])

                                if deps[parent] == "Apos":
                                    columns[7] += "_Ap"
                                elif deps[parent] == "Coord":
                                    columns[7] += "_Co"
                                else:
                                    print("Did not find correct parent of IsMember {} in the below sentence".format(block[i]), *block, file=sys.stderr, sep="\n")
                                columns[IS_MEMBER] = "_"

                            if args.is_parenthesis_root and columns[IS_PARENTHESIS_ROOT] == "1":
                                columns[7] += "_Pa"
                                columns[IS_PARENTHESIS_ROOT] = "_"

                            block[i] = "\t".join(columns)

#                         if args.is_member and columns[IS_MEMBER] == "1":
#                             columns[7] += "_IsMember"
#                         columns[IS_MEMBER] = "_"
# 
#                         if args.is_parenthesis_root and columns[IS_PARENTHESIS_ROOT] == "1":
#                             columns[7] += "_Pa"
#                         columns[IS_PARENTHESIS_ROOT] = "_"
# 
#                         line = "\t".join(columns)
# 
#                     print(line)

                    print(*block, sep="\n", end="\n\n")
                    block = []
                else:
                    block.append(line)
            assert not block