Spaces:
Build error
Build error
import os | |
import json | |
import argparse | |
from pathlib import Path | |
import glob | |
from ..utils import build_table_from_html_and_cell, html_table_template | |
def combine_all_json(file_dir: str) -> dict: | |
total_result = dict() | |
files = os.listdir(file_dir) | |
try: | |
files.remove("final.json") | |
except ValueError: | |
pass | |
for file in files: | |
with open(os.path.join(file_dir, file), "r") as f: | |
result = json.load(f) | |
total_result.update(result) | |
print(f"Combined to a json with {len(total_result)} entries.") | |
return total_result | |
def json_to_final(file_dir: str, type: str): | |
if type == "html" or type == "bbox": | |
result = combine_all_json(file_dir) | |
elif type == "html+cell": | |
result_cell = combine_all_json(file_dir) | |
result_html_file = os.path.join( | |
Path(file_dir).parent, | |
Path(file_dir).name.split("-")[0].replace("cell", "html") + "-html", | |
) | |
assert Path(result_html_file).is_dir(), f"{result_html_file} does not exist." | |
result = combine_all_json(result_html_file) | |
assert len(result) == len(result_cell) | |
else: | |
# assert html and cell json files have the same length | |
raise NotImplementedError | |
out = dict() | |
if type == "bbox": | |
out = result | |
else: | |
for filename, obj in result.items(): | |
if type == "html": | |
pred_html = "".join(obj["pred"]) | |
gt_html = "".join(obj["gt"]) | |
out[filename] = dict( | |
pred=html_table_template(pred_html), gt=html_table_template(gt_html) | |
) | |
elif type == "html+cell": | |
pred_html_cell = build_table_from_html_and_cell( | |
obj["pred"], result_cell[filename]["pred"] | |
) | |
gt_html_cell = build_table_from_html_and_cell( | |
obj["gt"], result_cell[filename]["gt"] | |
) | |
out[filename] = dict( | |
pred=html_table_template(pred_html_cell), | |
gt=html_table_template(gt_html_cell), | |
) | |
else: | |
raise NotImplementedError | |
# write to file | |
with open(os.path.join(file_dir, f"final.json"), "w", encoding="utf-8") as f: | |
json.dump(out, f, indent=4) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="postprecess") | |
parser.add_argument( | |
"-f", "--file", help="path to all json files from difference devices" | |
) | |
parser.add_argument("-t", "--type", help="html, html+cell") | |
args = parser.parse_args() | |
json_to_final(args.file, args.type) | |