File size: 2,680 Bytes
daf0288
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import json
import argparse
from pathlib import Path
import glob

from ..utils import build_table_from_html_and_cell, html_table_template


def combine_all_json(file_dir: str) -> dict:
    total_result = dict()
    files = os.listdir(file_dir)
    try:
        files.remove("final.json")
    except ValueError:
        pass
    for file in files:
        with open(os.path.join(file_dir, file), "r") as f:
            result = json.load(f)
            total_result.update(result)

    print(f"Combined to a json with {len(total_result)} entries.")

    return total_result


def json_to_final(file_dir: str, type: str):
    if type == "html" or type == "bbox":
        result = combine_all_json(file_dir)
    elif type == "html+cell":
        result_cell = combine_all_json(file_dir)
        result_html_file = os.path.join(
            Path(file_dir).parent,
            Path(file_dir).name.split("-")[0].replace("cell", "html") + "-html",
        )
        assert Path(result_html_file).is_dir(), f"{result_html_file} does not exist."
        result = combine_all_json(result_html_file)
        assert len(result) == len(result_cell)
    else:
        # assert html and cell json files have the same length
        raise NotImplementedError

    out = dict()

    if type == "bbox":
        out = result
    else:
        for filename, obj in result.items():
            if type == "html":
                pred_html = "".join(obj["pred"])
                gt_html = "".join(obj["gt"])

                out[filename] = dict(
                    pred=html_table_template(pred_html), gt=html_table_template(gt_html)
                )
            elif type == "html+cell":
                pred_html_cell = build_table_from_html_and_cell(
                    obj["pred"], result_cell[filename]["pred"]
                )
                gt_html_cell = build_table_from_html_and_cell(
                    obj["gt"], result_cell[filename]["gt"]
                )
                out[filename] = dict(
                    pred=html_table_template(pred_html_cell),
                    gt=html_table_template(gt_html_cell),
                )
            else:
                raise NotImplementedError

    # write to file
    with open(os.path.join(file_dir, f"final.json"), "w", encoding="utf-8") as f:
        json.dump(out, f, indent=4)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="postprecess")

    parser.add_argument(
        "-f", "--file", help="path to all json files from difference devices"
    )
    parser.add_argument("-t", "--type", help="html, html+cell")
    args = parser.parse_args()

    json_to_final(args.file, args.type)