|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import logging |
|
import os |
|
import sys |
|
|
|
sys.path.insert( |
|
0, |
|
os.path.abspath( |
|
os.path.join( |
|
os.path.dirname( |
|
os.path.abspath(__file__)), |
|
'../../'))) |
|
|
|
from deepdoc.vision.seeit import draw_box |
|
from deepdoc.vision import Recognizer, LayoutRecognizer, TableStructureRecognizer, OCR, init_in_out |
|
from api.utils.file_utils import get_project_base_directory |
|
import argparse |
|
import re |
|
import numpy as np |
|
|
|
|
|
def main(args): |
|
images, outputs = init_in_out(args) |
|
if args.mode.lower() == "layout": |
|
labels = LayoutRecognizer.labels |
|
detr = Recognizer( |
|
labels, |
|
"layout", |
|
os.path.join( |
|
get_project_base_directory(), |
|
"rag/res/deepdoc/")) |
|
if args.mode.lower() == "tsr": |
|
labels = TableStructureRecognizer.labels |
|
detr = TableStructureRecognizer() |
|
ocr = OCR() |
|
|
|
layouts = detr(images, float(args.threshold)) |
|
for i, lyt in enumerate(layouts): |
|
if args.mode.lower() == "tsr": |
|
|
|
html = get_table_html(images[i], lyt, ocr) |
|
with open(outputs[i] + ".html", "w+", encoding='utf-8') as f: |
|
f.write(html) |
|
lyt = [{ |
|
"type": t["label"], |
|
"bbox": [t["x0"], t["top"], t["x1"], t["bottom"]], |
|
"score": t["score"] |
|
} for t in lyt] |
|
img = draw_box(images[i], lyt, labels, float(args.threshold)) |
|
img.save(outputs[i], quality=95) |
|
logging.info("save result to: " + outputs[i]) |
|
|
|
|
|
def get_table_html(img, tb_cpns, ocr): |
|
boxes = ocr(np.array(img)) |
|
boxes = Recognizer.sort_Y_firstly( |
|
[{"x0": b[0][0], "x1": b[1][0], |
|
"top": b[0][1], "text": t[0], |
|
"bottom": b[-1][1], |
|
"layout_type": "table", |
|
"page_number": 0} for b, t in boxes if b[0][0] <= b[1][0] and b[0][1] <= b[-1][1]], |
|
np.mean([b[-1][1] - b[0][1] for b, _ in boxes]) / 3 |
|
) |
|
|
|
def gather(kwd, fzy=10, ption=0.6): |
|
nonlocal boxes |
|
eles = Recognizer.sort_Y_firstly( |
|
[r for r in tb_cpns if re.match(kwd, r["label"])], fzy) |
|
eles = Recognizer.layouts_cleanup(boxes, eles, 5, ption) |
|
return Recognizer.sort_Y_firstly(eles, 0) |
|
|
|
headers = gather(r".*header$") |
|
rows = gather(r".* (row|header)") |
|
spans = gather(r".*spanning") |
|
clmns = sorted([r for r in tb_cpns if re.match( |
|
r"table column$", r["label"])], key=lambda x: x["x0"]) |
|
clmns = Recognizer.layouts_cleanup(boxes, clmns, 5, 0.5) |
|
|
|
for b in boxes: |
|
ii = Recognizer.find_overlapped_with_threashold(b, rows, thr=0.3) |
|
if ii is not None: |
|
b["R"] = ii |
|
b["R_top"] = rows[ii]["top"] |
|
b["R_bott"] = rows[ii]["bottom"] |
|
|
|
ii = Recognizer.find_overlapped_with_threashold(b, headers, thr=0.3) |
|
if ii is not None: |
|
b["H_top"] = headers[ii]["top"] |
|
b["H_bott"] = headers[ii]["bottom"] |
|
b["H_left"] = headers[ii]["x0"] |
|
b["H_right"] = headers[ii]["x1"] |
|
b["H"] = ii |
|
|
|
ii = Recognizer.find_horizontally_tightest_fit(b, clmns) |
|
if ii is not None: |
|
b["C"] = ii |
|
b["C_left"] = clmns[ii]["x0"] |
|
b["C_right"] = clmns[ii]["x1"] |
|
|
|
ii = Recognizer.find_overlapped_with_threashold(b, spans, thr=0.3) |
|
if ii is not None: |
|
b["H_top"] = spans[ii]["top"] |
|
b["H_bott"] = spans[ii]["bottom"] |
|
b["H_left"] = spans[ii]["x0"] |
|
b["H_right"] = spans[ii]["x1"] |
|
b["SP"] = ii |
|
|
|
html = """ |
|
<html> |
|
<head> |
|
<style> |
|
._table_1nkzy_11 { |
|
margin: auto; |
|
width: 70%%; |
|
padding: 10px; |
|
} |
|
._table_1nkzy_11 p { |
|
margin-bottom: 50px; |
|
border: 1px solid #e1e1e1; |
|
} |
|
|
|
caption { |
|
color: #6ac1ca; |
|
font-size: 20px; |
|
height: 50px; |
|
line-height: 50px; |
|
font-weight: 600; |
|
margin-bottom: 10px; |
|
} |
|
|
|
._table_1nkzy_11 table { |
|
width: 100%%; |
|
border-collapse: collapse; |
|
} |
|
|
|
th { |
|
color: #fff; |
|
background-color: #6ac1ca; |
|
} |
|
|
|
td:hover { |
|
background: #c1e8e8; |
|
} |
|
|
|
tr:nth-child(even) { |
|
background-color: #f2f2f2; |
|
} |
|
|
|
._table_1nkzy_11 th, |
|
._table_1nkzy_11 td { |
|
text-align: center; |
|
border: 1px solid #ddd; |
|
padding: 8px; |
|
} |
|
</style> |
|
</head> |
|
<body> |
|
%s |
|
</body> |
|
</html> |
|
""" % TableStructureRecognizer.construct_table(boxes, html=True) |
|
return html |
|
|
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument('--inputs', |
|
help="Directory where to store images or PDFs, or a file path to a single image or PDF", |
|
required=True) |
|
parser.add_argument('--output_dir', help="Directory where to store the output images. Default: './layouts_outputs'", |
|
default="./layouts_outputs") |
|
parser.add_argument( |
|
'--threshold', |
|
help="A threshold to filter out detections. Default: 0.5", |
|
default=0.5) |
|
parser.add_argument('--mode', help="Task mode: layout recognition or table structure recognition", choices=["layout", "tsr"], |
|
default="layout") |
|
args = parser.parse_args() |
|
main(args) |
|
|