H commited on
Commit
ea41e57
·
1 Parent(s): 598945f

Fix pdfparser error (#1707)

Browse files

### What problem does this PR solve?

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

deepdoc/vision/layout_recognizer.py CHANGED
@@ -75,7 +75,7 @@ class LayoutRecognizer(Recognizer):
75
  "x0": b["bbox"][0] / scale_factor, "x1": b["bbox"][2] / scale_factor,
76
  "top": b["bbox"][1] / scale_factor, "bottom": b["bbox"][-1] / scale_factor,
77
  "page_number": pn,
78
- } for b in lts]
79
  lts = self.sort_Y_firstly(lts, np.mean(
80
  [l["bottom"] - l["top"] for l in lts]) / 2)
81
  lts = self.layouts_cleanup(bxs, lts)
 
75
  "x0": b["bbox"][0] / scale_factor, "x1": b["bbox"][2] / scale_factor,
76
  "top": b["bbox"][1] / scale_factor, "bottom": b["bbox"][-1] / scale_factor,
77
  "page_number": pn,
78
+ } for b in lts if float(b["score"]) >= 0.8 or b["type"] not in self.garbage_layouts]
79
  lts = self.sort_Y_firstly(lts, np.mean(
80
  [l["bottom"] - l["top"] for l in lts]) / 2)
81
  lts = self.layouts_cleanup(bxs, lts)