Zhi-Qiang You
youzhiqiang
Kevin Hu
commited on
Commit
·
b57c1d6
1
Parent(s):
1ac7c68
fix:t_recognizer TypeError: 'super' object is not callable (#4404)
Browse files### What problem does this PR solve?
[Bug]: layout recognizer failed for wrong boxes class type #4230
(https://github.com/infiniflow/ragflow/issues/4230)
### Type of change
- [✅ ] Bug Fix (non-breaking change which fixes an issue)
---------
Co-authored-by: youzhiqiang <[email protected]>
Co-authored-by: Kevin Hu <[email protected]>
deepdoc/vision/layout_recognizer.py
CHANGED
@@ -153,6 +153,8 @@ class LayoutRecognizer(Recognizer):
|
|
153 |
ocr_res = [b for b in ocr_res if b["text"].strip() not in garbag_set]
|
154 |
return ocr_res, page_layout
|
155 |
|
|
|
|
|
156 |
|
157 |
class LayoutRecognizer4YOLOv10(LayoutRecognizer):
|
158 |
labels = [
|
|
|
153 |
ocr_res = [b for b in ocr_res if b["text"].strip() not in garbag_set]
|
154 |
return ocr_res, page_layout
|
155 |
|
156 |
+
def forward(self, image_list, thr=0.7, batch_size=16):
|
157 |
+
return super().__call__(image_list, thr, batch_size)
|
158 |
|
159 |
class LayoutRecognizer4YOLOv10(LayoutRecognizer):
|
160 |
labels = [
|
deepdoc/vision/t_recognizer.py
CHANGED
@@ -23,7 +23,7 @@ sys.path.insert(
|
|
23 |
'../../')))
|
24 |
|
25 |
from deepdoc.vision.seeit import draw_box
|
26 |
-
from deepdoc.vision import LayoutRecognizer, TableStructureRecognizer, OCR, init_in_out
|
27 |
import argparse
|
28 |
import re
|
29 |
import numpy as np
|
@@ -33,9 +33,8 @@ def main(args):
|
|
33 |
images, outputs = init_in_out(args)
|
34 |
if args.mode.lower() == "layout":
|
35 |
detr = LayoutRecognizer("layout")
|
36 |
-
layouts =
|
37 |
if args.mode.lower() == "tsr":
|
38 |
-
labels = TableStructureRecognizer.labels
|
39 |
detr = TableStructureRecognizer()
|
40 |
ocr = OCR()
|
41 |
layouts = detr(images, thr=float(args.threshold))
|
@@ -50,7 +49,7 @@ def main(args):
|
|
50 |
"bbox": [t["x0"], t["top"], t["x1"], t["bottom"]],
|
51 |
"score": t["score"]
|
52 |
} for t in lyt]
|
53 |
-
img = draw_box(images[i], lyt, labels, float(args.threshold))
|
54 |
img.save(outputs[i], quality=95)
|
55 |
logging.info("save result to: " + outputs[i])
|
56 |
|
|
|
23 |
'../../')))
|
24 |
|
25 |
from deepdoc.vision.seeit import draw_box
|
26 |
+
from deepdoc.vision import LayoutRecognizer, TableStructureRecognizer, OCR, init_in_out
|
27 |
import argparse
|
28 |
import re
|
29 |
import numpy as np
|
|
|
33 |
images, outputs = init_in_out(args)
|
34 |
if args.mode.lower() == "layout":
|
35 |
detr = LayoutRecognizer("layout")
|
36 |
+
layouts = detr.forward(images, thr=float(args.threshold))
|
37 |
if args.mode.lower() == "tsr":
|
|
|
38 |
detr = TableStructureRecognizer()
|
39 |
ocr = OCR()
|
40 |
layouts = detr(images, thr=float(args.threshold))
|
|
|
49 |
"bbox": [t["x0"], t["top"], t["x1"], t["bottom"]],
|
50 |
"score": t["score"]
|
51 |
} for t in lyt]
|
52 |
+
img = draw_box(images[i], lyt, detr.labels, float(args.threshold))
|
53 |
img.save(outputs[i], quality=95)
|
54 |
logging.info("save result to: " + outputs[i])
|
55 |
|