Spaces:
Build error
Build error
File size: 2,680 Bytes
daf0288 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import os
import json
import argparse
from pathlib import Path
import glob
from ..utils import build_table_from_html_and_cell, html_table_template
def combine_all_json(file_dir: str) -> dict:
total_result = dict()
files = os.listdir(file_dir)
try:
files.remove("final.json")
except ValueError:
pass
for file in files:
with open(os.path.join(file_dir, file), "r") as f:
result = json.load(f)
total_result.update(result)
print(f"Combined to a json with {len(total_result)} entries.")
return total_result
def json_to_final(file_dir: str, type: str):
if type == "html" or type == "bbox":
result = combine_all_json(file_dir)
elif type == "html+cell":
result_cell = combine_all_json(file_dir)
result_html_file = os.path.join(
Path(file_dir).parent,
Path(file_dir).name.split("-")[0].replace("cell", "html") + "-html",
)
assert Path(result_html_file).is_dir(), f"{result_html_file} does not exist."
result = combine_all_json(result_html_file)
assert len(result) == len(result_cell)
else:
# assert html and cell json files have the same length
raise NotImplementedError
out = dict()
if type == "bbox":
out = result
else:
for filename, obj in result.items():
if type == "html":
pred_html = "".join(obj["pred"])
gt_html = "".join(obj["gt"])
out[filename] = dict(
pred=html_table_template(pred_html), gt=html_table_template(gt_html)
)
elif type == "html+cell":
pred_html_cell = build_table_from_html_and_cell(
obj["pred"], result_cell[filename]["pred"]
)
gt_html_cell = build_table_from_html_and_cell(
obj["gt"], result_cell[filename]["gt"]
)
out[filename] = dict(
pred=html_table_template(pred_html_cell),
gt=html_table_template(gt_html_cell),
)
else:
raise NotImplementedError
# write to file
with open(os.path.join(file_dir, f"final.json"), "w", encoding="utf-8") as f:
json.dump(out, f, indent=4)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="postprecess")
parser.add_argument(
"-f", "--file", help="path to all json files from difference devices"
)
parser.add_argument("-t", "--type", help="html, html+cell")
args = parser.parse_args()
json_to_final(args.file, args.type)
|