Spaces:

RapidAI
/

RapidOCR

Running

App Files Files Community

SWHL commited on Oct 21, 2022

Commit

5d6a0bb

1 Parent(s): 9792e33

Update files

Browse files

Files changed (18) hide show

.gitignore +3 -0
app.py +112 -4
config.yaml +72 -0
rapidocr_onnxruntime/__init__.py +4 -0
rapidocr_onnxruntime/ch_ppocr_v2_cls/__init__.py +4 -0
rapidocr_onnxruntime/ch_ppocr_v2_cls/config.yaml +14 -0
rapidocr_onnxruntime/ch_ppocr_v2_cls/text_cls.py +117 -0
rapidocr_onnxruntime/ch_ppocr_v2_cls/utils.py +80 -0
rapidocr_onnxruntime/ch_ppocr_v3_det/__init__.py +4 -0
rapidocr_onnxruntime/ch_ppocr_v3_det/config.yaml +29 -0
rapidocr_onnxruntime/ch_ppocr_v3_det/text_detect.py +127 -0
rapidocr_onnxruntime/ch_ppocr_v3_det/utils.py +452 -0
rapidocr_onnxruntime/ch_ppocr_v3_rec/__init__.py +4 -0
rapidocr_onnxruntime/ch_ppocr_v3_rec/config.yaml +12 -0
rapidocr_onnxruntime/ch_ppocr_v3_rec/text_recognize.py +120 -0
rapidocr_onnxruntime/ch_ppocr_v3_rec/utils.py +128 -0
rapidocr_onnxruntime/rapid_ocr_api.py +164 -0
requirements.txt +9 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ *.pyc
2	+
3	+ __pycache__/

app.py CHANGED Viewed

@@ -1,7 +1,115 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-iface.launch()

+# -*- encoding: utf-8 -*-
+import math
+import random
+from pathlib import Path
+import time
+import cv2
 import gradio as gr
+from rapidocr_onnxruntime import TextSystem
+import numpy as np
+from PIL import Image, ImageDraw, ImageFont
+text_sys = TextSystem('config.yaml')
+def draw_ocr_box_txt(image, boxes, txts, font_path,
+                     scores=None, text_score=0.5):
+    if not Path(font_path).exists():
+        raise FileNotFoundError(f'The {font_path} does not exists! \n'
+                                f'Please download the file in the https://drive.google.com/file/d/1evWVX38EFNwTq_n5gTFgnlv8tdaNcyIA/view?usp=sharing')
+    h, w = image.height, image.width
+    img_left = image.copy()
+    img_right = Image.new('RGB', (w, h), (255, 255, 255))
+    random.seed(0)
+    draw_left = ImageDraw.Draw(img_left)
+    draw_right = ImageDraw.Draw(img_right)
+    for idx, (box, txt) in enumerate(zip(boxes, txts)):
+        if scores is not None and scores[idx] < text_score:
+            continue
+        color = (random.randint(0, 255),
+                 random.randint(0, 255),
+                 random.randint(0, 255))
+        draw_left.polygon(box, fill=color)
+        draw_right.polygon([box[0][0], box[0][1],
+                            box[1][0], box[1][1],
+                            box[2][0], box[2][1],
+                            box[3][0], box[3][1]],
+                           outline=color)
+        box_height = math.sqrt((box[0][0] - box[3][0])**2
+                               + (box[0][1] - box[3][1])**2)
+        box_width = math.sqrt((box[0][0] - box[1][0])**2
+                              + (box[0][1] - box[1][1])**2)
+        if box_height > 2 * box_width:
+            font_size = max(int(box_width * 0.9), 10)
+            font = ImageFont.truetype(font_path, font_size,
+                                      encoding="utf-8")
+            cur_y = box[0][1]
+            for c in txt:
+                char_size = font.getsize(c)
+                draw_right.text((box[0][0] + 3, cur_y), c,
+                                fill=(0, 0, 0), font=font)
+                cur_y += char_size[1]
+        else:
+            font_size = max(int(box_height * 0.8), 10)
+            font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
+            draw_right.text([box[0][0], box[0][1]], txt,
+                            fill=(0, 0, 0), font=font)
+    img_left = Image.blend(image, img_left, 0.5)
+    img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
+    img_show.paste(img_left, (0, 0, w, h))
+    img_show.paste(img_right, (w, 0, w * 2, h))
+    return np.array(img_show)
+def visualize(image_path, boxes, rec_res, font_path="resources/fonts/FZYTK.TTF"):
+    image = Image.open(image_path)
+    txts = [rec_res[i][0] for i in range(len(rec_res))]
+    scores = [rec_res[i][1] for i in range(len(rec_res))]
+    draw_img = draw_ocr_box_txt(image, boxes,
+                                txts, font_path,
+                                scores,
+                                text_score=0.5)
+    draw_img_save = Path("./inference_results/")
+    if not draw_img_save.exists():
+        draw_img_save.mkdir(parents=True, exist_ok=True)
+    time_stamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
+    image_save = str(draw_img_save / f'{time_stamp}_{Path(image_path).name}')
+    cv2.imwrite(image_save, draw_img[:, :, ::-1])
+    return image_save
+def inference(img):
+    img_path = img.name
+    img = cv2.imread(img_path)
+    dt_boxes, rec_res = text_sys(img)
+    img_save_path = visualize(img_path, dt_boxes, rec_res)
+    return img_save_path, rec_res
+title = 'Rapid🗲OCR Demo (捷智OCR)'
+description = 'Gradio demo for RapidOCR. Github Repo: https://github.com/RapidAI/RapidOCR'
+article = "<p style='text-align: center'> Completely open source, free and support offline deployment of multi-platform and multi-language OCR SDK <a href='https://github.com/RapidAI/RapidOCR'>Github Repo</a></p>"
+css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
+gr.Interface(
+    inference,
+    inputs=gr.inputs.Image(type='file', label='Input'),
+    outputs=[
+        gr.outputs.Image(type='file', label='Output_image'),
+        gr.outputs.Textbox(type='text', label='Output_text')
+    ],
+    title=title,
+    description=description,
+    article=article,
+    css=css,
+    allow_flagging='never',
+    ).launch(debug=True, enable_queue=True)

config.yaml ADDED Viewed

	@@ -0,0 +1,72 @@

+Global:
+    text_score: 0.5
+    use_angle_cls: true
+    print_verbose: true
+    min_height: 30
+    width_height_ratio: 8
+Det:
+    module_name: ch_ppocr_v3_det
+    class_name: TextDetector
+    model_path: resources/models/ch_PP-OCRv3_det_infer.onnx
+    use_cuda: false
+    # Details of the params: https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html
+    CUDAExecutionProvider:
+        device_id: 0
+        arena_extend_strategy: kNextPowerOfTwo
+        cudnn_conv_algo_search: EXHAUSTIVE
+        do_copy_in_default_stream: true
+    pre_process:
+        DetResizeForTest:
+            limit_side_len: 736
+            limit_type: min
+        NormalizeImage:
+            std: [0.229, 0.224, 0.225]
+            mean: [0.485, 0.456, 0.406]
+            scale: 1./255.
+            order: hwc
+        ToCHWImage:
+        KeepKeys:
+            keep_keys: ['image', 'shape']
+    post_process:
+        thresh: 0.3
+        box_thresh: 0.5
+        max_candidates: 1000
+        unclip_ratio: 1.6
+        use_dilation: true
+        score_mode: fast
+Cls:
+    module_name: ch_ppocr_v2_cls
+    class_name: TextClassifier
+    model_path: resources/models/ch_ppocr_mobile_v2.0_cls_infer.onnx
+    use_cuda: false
+    CUDAExecutionProvider:
+        device_id: 0
+        arena_extend_strategy: kNextPowerOfTwo
+        cudnn_conv_algo_search: EXHAUSTIVE
+        do_copy_in_default_stream: true
+    cls_image_shape: [3, 48, 192]
+    cls_batch_num: 6
+    cls_thresh: 0.9
+    label_list: ['0', '180']
+Rec:
+    module_name: ch_ppocr_v3_rec
+    class_name: TextRecognizer
+    model_path: resources/models/ch_PP-OCRv3_rec_infer.onnx
+    use_cuda: false
+    CUDAExecutionProvider:
+        device_id: 0
+        arena_extend_strategy: kNextPowerOfTwo
+        cudnn_conv_algo_search: EXHAUSTIVE
+        do_copy_in_default_stream: true
+    rec_img_shape: [3, 48, 320]
+    rec_batch_num: 6

rapidocr_onnxruntime/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# -*- encoding: utf-8 -*-
+# @Author: SWHL
+# @Contact: [email protected]
+from .rapid_ocr_api import TextSystem

rapidocr_onnxruntime/ch_ppocr_v2_cls/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# -*- encoding: utf-8 -*-
+# @Author: SWHL
+# @Contact: [email protected]
+from .text_cls import TextClassifier

rapidocr_onnxruntime/ch_ppocr_v2_cls/config.yaml ADDED Viewed

	@@ -0,0 +1,14 @@

+model_path: resources/models/ch_ppocr_mobile_v2.0_cls_infer.onnx
+use_cuda: false
+# Details of the params: https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html
+CUDAExecutionProvider:
+    device_id: 0
+    arena_extend_strategy: kNextPowerOfTwo
+    cudnn_conv_algo_search: EXHAUSTIVE
+    do_copy_in_default_stream: true
+cls_image_shape: [3, 48, 192]
+cls_batch_num: 6
+cls_thresh: 0.9
+label_list: ['0', '180']

rapidocr_onnxruntime/ch_ppocr_v2_cls/text_cls.py ADDED Viewed

	@@ -0,0 +1,117 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import copy
+import math
+import time
+from typing import List
+import cv2
+import numpy as np
+try:
+    from .utils import ClsPostProcess, read_yaml, OrtInferSession
+except:
+    from utils import ClsPostProcess, read_yaml, OrtInferSession
+class TextClassifier(object):
+    def __init__(self, config):
+        self.cls_image_shape = config['cls_image_shape']
+        self.cls_batch_num = config['cls_batch_num']
+        self.cls_thresh = config['cls_thresh']
+        self.postprocess_op = ClsPostProcess(config['label_list'])
+        session_instance = OrtInferSession(config)
+        self.session = session_instance.session
+        self.input_name = session_instance.get_input_name()
+    def __call__(self, img_list: List[np.ndarray]):
+        if isinstance(img_list, np.ndarray):
+            img_list = [img_list]
+        img_list = copy.deepcopy(img_list)
+        # Calculate the aspect ratio of all text bars
+        width_list = [img.shape[1] / float(img.shape[0]) for img in img_list]
+        # Sorting can speed up the cls process
+        indices = np.argsort(np.array(width_list))
+        img_num = len(img_list)
+        cls_res = [['', 0.0]] * img_num
+        batch_num = self.cls_batch_num
+        elapse = 0
+        for beg_img_no in range(0, img_num, batch_num):
+            end_img_no = min(img_num, beg_img_no + batch_num)
+            norm_img_batch = []
+            for ino in range(beg_img_no, end_img_no):
+                norm_img = self.resize_norm_img(img_list[indices[ino]])
+                norm_img = norm_img[np.newaxis, :]
+                norm_img_batch.append(norm_img)
+            norm_img_batch = np.concatenate(norm_img_batch).astype(np.float32)
+            starttime = time.time()
+            onnx_inputs = {self.input_name: norm_img_batch}
+            prob_out = self.session.run(None, onnx_inputs)[0]
+            cls_result = self.postprocess_op(prob_out)
+            elapse += time.time() - starttime
+            for rno in range(len(cls_result)):
+                label, score = cls_result[rno]
+                cls_res[indices[beg_img_no + rno]] = [label, score]
+                if '180' in label and score > self.cls_thresh:
+                    img_list[indices[beg_img_no + rno]] = cv2.rotate(
+                        img_list[indices[beg_img_no + rno]], 1)
+        return img_list, cls_res, elapse
+    def resize_norm_img(self, img):
+        img_c, img_h, img_w = self.cls_image_shape
+        h, w = img.shape[:2]
+        ratio = w / float(h)
+        if math.ceil(img_h * ratio) > img_w:
+            resized_w = img_w
+        else:
+            resized_w = int(math.ceil(img_h * ratio))
+        resized_image = cv2.resize(img, (resized_w, img_h))
+        resized_image = resized_image.astype('float32')
+        if img_c == 1:
+            resized_image = resized_image / 255
+            resized_image = resized_image[np.newaxis, :]
+        else:
+            resized_image = resized_image.transpose((2, 0, 1)) / 255
+        resized_image -= 0.5
+        resized_image /= 0.5
+        padding_im = np.zeros((img_c, img_h, img_w), dtype=np.float32)
+        padding_im[:, :, :resized_w] = resized_image
+        return padding_im
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--image_path', type=str, help='image_dir|image_path')
+    parser.add_argument('--config_path', type=str, default='config.yaml')
+    args = parser.parse_args()
+    config = read_yaml(args.config_path)
+    text_classifier = TextClassifier(config)
+    img = cv2.imread(args.image_path)
+    img_list, cls_res, predict_time = text_classifier(img)
+    for ino in range(len(img_list)):
+        print(f"cls result:{cls_res[ino]}")

rapidocr_onnxruntime/ch_ppocr_v2_cls/utils.py ADDED Viewed

	@@ -0,0 +1,80 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import warnings
+import yaml
+from onnxruntime import (get_available_providers, get_device,
+                         SessionOptions, InferenceSession,
+                         GraphOptimizationLevel)
+class OrtInferSession(object):
+    def __init__(self, config):
+        sess_opt = SessionOptions()
+        sess_opt.log_severity_level = 4
+        sess_opt.enable_cpu_mem_arena = False
+        sess_opt.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
+        cuda_ep = 'CUDAExecutionProvider'
+        cpu_ep = 'CPUExecutionProvider'
+        cpu_provider_options = {
+            "arena_extend_strategy": "kSameAsRequested",
+        }
+        EP_list = []
+        if config['use_cuda'] and get_device() == 'GPU' \
+                and cuda_ep in get_available_providers():
+            EP_list = [(cuda_ep, config[cuda_ep])]
+        EP_list.append((cpu_ep, cpu_provider_options))
+        self.session = InferenceSession(config['model_path'],
+                                        sess_options=sess_opt,
+                                        providers=EP_list)
+        if config['use_cuda'] and cuda_ep not in self.session.get_providers():
+            warnings.warn(f'{cuda_ep} is not avaiable for current env, the inference part is automatically shifted to be executed under {cpu_ep}.\n'
+                          'Please ensure the installed onnxruntime-gpu version matches your cuda and cudnn version, '
+                          'you can check their relations from the offical web site: '
+                          'https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html',
+                          RuntimeWarning)
+    def get_input_name(self, input_idx=0):
+        return self.session.get_inputs()[input_idx].name
+    def get_output_name(self, output_idx=0):
+        return self.session.get_outputs()[output_idx].name
+def read_yaml(yaml_path):
+    with open(yaml_path, 'rb') as f:
+        data = yaml.load(f, Loader=yaml.Loader)
+    return data
+class ClsPostProcess(object):
+    """ Convert between text-label and text-index """
+    def __init__(self, label_list):
+        super(ClsPostProcess, self).__init__()
+        self.label_list = label_list
+    def __call__(self, preds, label=None):
+        pred_idxs = preds.argmax(axis=1)
+        decode_out = [(self.label_list[idx], preds[i, idx])
+                      for i, idx in enumerate(pred_idxs)]
+        if label is None:
+            return decode_out
+        label = [(self.label_list[idx], 1.0) for idx in label]
+        return decode_out, label

rapidocr_onnxruntime/ch_ppocr_v3_det/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# -*- encoding: utf-8 -*-
+# @Author: SWHL
+# @Contact: [email protected]
+from .text_detect import TextDetector

rapidocr_onnxruntime/ch_ppocr_v3_det/config.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+model_path: resources/models/ch_PP-OCRv3_det_infer.onnx
+use_cuda: false
+CUDAExecutionProvider:
+    device_id: 0
+    arena_extend_strategy: kNextPowerOfTwo
+    cudnn_conv_algo_search: EXHAUSTIVE
+    do_copy_in_default_stream: true
+pre_process:
+    DetResizeForTest:
+        limit_side_len: 736
+        limit_type: min
+    NormalizeImage:
+        std: [0.229, 0.224, 0.225]
+        mean: [0.485, 0.456, 0.406]
+        scale: 1./255.
+        order: hwc
+    ToCHWImage:
+    KeepKeys:
+        keep_keys: ['image', 'shape']
+post_process:
+    thresh: 0.3
+    box_thresh: 0.5
+    max_candidates: 1000
+    unclip_ratio: 1.6
+    use_dilation: true
+    score_mode: "fast"

rapidocr_onnxruntime/ch_ppocr_v3_det/text_detect.py ADDED Viewed

	@@ -0,0 +1,127 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# -*- encoding: utf-8 -*-
+# @Author: SWHL
+# @Contact: [email protected]
+import argparse
+import time
+import cv2
+import numpy as np
+try:
+    from .utils import (DBPostProcess, create_operators,
+                        transform, read_yaml, OrtInferSession)
+except:
+    from utils import (DBPostProcess, create_operators,
+                       transform, read_yaml, OrtInferSession)
+class TextDetector(object):
+    def __init__(self, config):
+        self.preprocess_op = create_operators(config['pre_process'])
+        self.postprocess_op = DBPostProcess(**config['post_process'])
+        session_instance = OrtInferSession(config)
+        self.session = session_instance.session
+        self.input_name = session_instance.get_input_name()
+    def __call__(self, img):
+        if img is None:
+            raise ValueError('img is None')
+        ori_im_shape = img.shape[:2]
+        data = {'image': img}
+        data = transform(data, self.preprocess_op)
+        img, shape_list = data
+        if img is None:
+            return None, 0
+        img = np.expand_dims(img, axis=0).astype(np.float32)
+        shape_list = np.expand_dims(shape_list, axis=0)
+        starttime = time.time()
+        preds = self.session.run(None, {self.input_name: img})
+        post_result = self.postprocess_op(preds[0], shape_list)
+        dt_boxes = post_result[0]['points']
+        dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im_shape)
+        elapse = time.time() - starttime
+        return dt_boxes, elapse
+    def order_points_clockwise(self, pts):
+        """
+        reference from:
+        https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py
+        sort the points based on their x-coordinates
+        """
+        xSorted = pts[np.argsort(pts[:, 0]), :]
+        # grab the left-most and right-most points from the sorted
+        # x-roodinate points
+        leftMost = xSorted[:2, :]
+        rightMost = xSorted[2:, :]
+        # now, sort the left-most coordinates according to their
+        # y-coordinates so we can grab the top-left and bottom-left
+        # points, respectively
+        leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
+        (tl, bl) = leftMost
+        rightMost = rightMost[np.argsort(rightMost[:, 1]), :]
+        (tr, br) = rightMost
+        rect = np.array([tl, tr, br, bl], dtype="float32")
+        return rect
+    def clip_det_res(self, points, img_height, img_width):
+        for pno in range(points.shape[0]):
+            points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1))
+            points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1))
+        return points
+    def filter_tag_det_res(self, dt_boxes, image_shape):
+        img_height, img_width = image_shape[:2]
+        dt_boxes_new = []
+        for box in dt_boxes:
+            box = self.order_points_clockwise(box)
+            box = self.clip_det_res(box, img_height, img_width)
+            rect_width = int(np.linalg.norm(box[0] - box[1]))
+            rect_height = int(np.linalg.norm(box[0] - box[3]))
+            if rect_width <= 3 or rect_height <= 3:
+                continue
+            dt_boxes_new.append(box)
+        dt_boxes = np.array(dt_boxes_new)
+        return dt_boxes
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--config_path', type=str, default='config.yaml')
+    parser.add_argument('--image_path', type=str, default=None)
+    args = parser.parse_args()
+    config = read_yaml(args.config_path)
+    text_detector = TextDetector(config)
+    img = cv2.imread(args.image_path)
+    dt_boxes, elapse = text_detector(img)
+    from utils import draw_text_det_res
+    src_im = draw_text_det_res(dt_boxes, args.image_path)
+    cv2.imwrite('det_results.jpg', src_im)
+    print('The det_results.jpg has been saved in the current directory.')

rapidocr_onnxruntime/ch_ppocr_v3_det/utils.py ADDED Viewed

	@@ -0,0 +1,452 @@

+"""
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+# -*- encoding: utf-8 -*-
+# @Author: SWHL
+# @Contact: [email protected]
+import sys
+import warnings
+import cv2
+import numpy as np
+import pyclipper
+import six
+import yaml
+from shapely.geometry import Polygon
+from onnxruntime import (get_available_providers, get_device,
+                         SessionOptions, InferenceSession,
+                         GraphOptimizationLevel)
+class OrtInferSession(object):
+    def __init__(self, config):
+        sess_opt = SessionOptions()
+        sess_opt.log_severity_level = 4
+        sess_opt.enable_cpu_mem_arena = False
+        sess_opt.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
+        cuda_ep = 'CUDAExecutionProvider'
+        cpu_ep = 'CPUExecutionProvider'
+        cpu_provider_options = {
+            "arena_extend_strategy": "kSameAsRequested",
+        }
+        EP_list = []
+        if config['use_cuda'] and get_device() == 'GPU' \
+                and cuda_ep in get_available_providers():
+            EP_list = [(cuda_ep, config[cuda_ep])]
+        EP_list.append((cpu_ep, cpu_provider_options))
+        self.session = InferenceSession(config['model_path'],
+                                        sess_options=sess_opt,
+                                        providers=EP_list)
+        if config['use_cuda'] and cuda_ep not in self.session.get_providers():
+            warnings.warn(f'{cuda_ep} is not avaiable for current env, the inference part is automatically shifted to be executed under {cpu_ep}.\n'
+                          'Please ensure the installed onnxruntime-gpu version matches your cuda and cudnn version, '
+                          'you can check their relations from the offical web site: '
+                          'https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html',
+                          RuntimeWarning)
+    def get_input_name(self, input_idx=0):
+        return self.session.get_inputs()[input_idx].name
+    def get_output_name(self, output_idx=0):
+        return self.session.get_outputs()[output_idx].name
+def read_yaml(yaml_path):
+    with open(yaml_path, 'rb') as f:
+        data = yaml.load(f, Loader=yaml.Loader)
+    return data
+class DecodeImage(object):
+    """ decode image """
+    def __init__(self, img_mode='RGB', channel_first=False):
+        self.img_mode = img_mode
+        self.channel_first = channel_first
+    def __call__(self, data):
+        img = data['image']
+        if six.PY2:
+            assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage"
+        else:
+            assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage"
+        img = np.frombuffer(img, dtype='uint8')
+        img = cv2.imdecode(img, 1)
+        if img is None:
+            return None
+        if self.img_mode == 'GRAY':
+            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+        elif self.img_mode == 'RGB':
+            assert img.shape[2] == 3, f'invalid shape of image[{img.shape}]'
+            img = img[:, :, ::-1]
+        if self.channel_first:
+            img = img.transpose((2, 0, 1))
+        data['image'] = img
+        return data
+class NormalizeImage(object):
+    """ normalize image such as substract mean, divide std"""
+    def __init__(self, scale=None, mean=None, std=None, order='chw'):
+        if isinstance(scale, str):
+            scale = eval(scale)
+        self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
+        mean = mean if mean is not None else [0.485, 0.456, 0.406]
+        std = std if std is not None else [0.229, 0.224, 0.225]
+        shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
+        self.mean = np.array(mean).reshape(shape).astype('float32')
+        self.std = np.array(std).reshape(shape).astype('float32')
+    def __call__(self, data):
+        img = np.array(data['image']).astype(np.float32)
+        data['image'] = (img * self.scale - self.mean) / self.std
+        return data
+class ToCHWImage(object):
+    """ convert hwc image to chw image"""
+    def __init__(self):
+        pass
+    def __call__(self, data):
+        img = data['image']
+        from PIL import Image
+        if isinstance(img, Image.Image):
+            img = np.array(img)
+        data['image'] = img.transpose((2, 0, 1))
+        return data
+class KeepKeys(object):
+    def __init__(self, keep_keys):
+        self.keep_keys = keep_keys
+    def __call__(self, data):
+        data_list = []
+        for key in self.keep_keys:
+            data_list.append(data[key])
+        return data_list
+class DetResizeForTest(object):
+    def __init__(self, **kwargs):
+        super(DetResizeForTest, self).__init__()
+        self.resize_type = 0
+        if 'image_shape' in kwargs:
+            self.image_shape = kwargs['image_shape']
+            self.resize_type = 1
+        elif 'limit_side_len' in kwargs:
+            self.limit_side_len = kwargs.get('limit_side_len', 736)
+            self.limit_type = kwargs.get('limit_type', 'min')
+        if 'resize_long' in kwargs:
+            self.resize_type = 2
+            self.resize_long = kwargs.get('resize_long', 960)
+        else:
+            self.limit_side_len = kwargs.get('limit_side_len', 736)
+            self.limit_type = kwargs.get('limit_type', 'min')
+    def __call__(self, data):
+        img = data['image']
+        src_h, src_w = img.shape[:2]
+        if self.resize_type == 0:
+            # img, shape = self.resize_image_type0(img)
+            img, [ratio_h, ratio_w] = self.resize_image_type0(img)
+        elif self.resize_type == 2:
+            img, [ratio_h, ratio_w] = self.resize_image_type2(img)
+        else:
+            # img, shape = self.resize_image_type1(img)
+            img, [ratio_h, ratio_w] = self.resize_image_type1(img)
+        data['image'] = img
+        data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
+        return data
+    def resize_image_type1(self, img):
+        resize_h, resize_w = self.image_shape
+        ori_h, ori_w = img.shape[:2]  # (h, w, c)
+        ratio_h = float(resize_h) / ori_h
+        ratio_w = float(resize_w) / ori_w
+        img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        # return img, np.array([ori_h, ori_w])
+        return img, [ratio_h, ratio_w]
+    def resize_image_type0(self, img):
+        """
+        resize image to a size multiple of 32 which is required by the network
+        args:
+            img(array): array with shape [h, w, c]
+        return(tuple):
+            img, (ratio_h, ratio_w)
+        """
+        limit_side_len = self.limit_side_len
+        h, w = img.shape[:2]
+        # limit the max side
+        if self.limit_type == 'max':
+            if max(h, w) > limit_side_len:
+                if h > w:
+                    ratio = float(limit_side_len) / h
+                else:
+                    ratio = float(limit_side_len) / w
+            else:
+                ratio = 1.
+        else:
+            if min(h, w) < limit_side_len:
+                if h < w:
+                    ratio = float(limit_side_len) / h
+                else:
+                    ratio = float(limit_side_len) / w
+            else:
+                ratio = 1.
+        resize_h = int(h * ratio)
+        resize_w = int(w * ratio)
+        resize_h = int(round(resize_h / 32) * 32)
+        resize_w = int(round(resize_w / 32) * 32)
+        try:
+            if int(resize_w) <= 0 or int(resize_h) <= 0:
+                return None, (None, None)
+            img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        except:
+            print(img.shape, resize_w, resize_h)
+            sys.exit(0)
+        ratio_h = resize_h / float(h)
+        ratio_w = resize_w / float(w)
+        return img, [ratio_h, ratio_w]
+    def resize_image_type2(self, img):
+        h, w = img.shape[:2]
+        resize_w = w
+        resize_h = h
+        # Fix the longer side
+        if resize_h > resize_w:
+            ratio = float(self.resize_long) / resize_h
+        else:
+            ratio = float(self.resize_long) / resize_w
+        resize_h = int(resize_h * ratio)
+        resize_w = int(resize_w * ratio)
+        max_stride = 128
+        resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
+        resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
+        img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        ratio_h = resize_h / float(h)
+        ratio_w = resize_w / float(w)
+        return img, [ratio_h, ratio_w]
+def transform(data, ops=None):
+    """ transform """
+    if ops is None:
+        ops = []
+    for op in ops:
+        data = op(data)
+        if data is None:
+            return None
+    return data
+def create_operators(op_param_dict):
+    """
+    create operators based on the config
+    """
+    ops = []
+    for op_name, param in op_param_dict.items():
+        if param is None:
+            param = {}
+        op = eval(op_name)(**param)
+        ops.append(op)
+    return ops
+def draw_text_det_res(dt_boxes, img_path):
+    src_im = cv2.imread(img_path)
+    for box in dt_boxes:
+        box = np.array(box).astype(np.int32).reshape(-1, 2)
+        cv2.polylines(src_im, [box], True,
+                      color=(255, 255, 0), thickness=2)
+    return src_im
+class DBPostProcess(object):
+    """The post process for Differentiable Binarization (DB)."""
+    def __init__(self,
+                 thresh=0.3,
+                 box_thresh=0.7,
+                 max_candidates=1000,
+                 unclip_ratio=2.0,
+                 score_mode="fast",
+                 use_dilation=False):
+        self.thresh = thresh
+        self.box_thresh = box_thresh
+        self.max_candidates = max_candidates
+        self.unclip_ratio = unclip_ratio
+        self.min_size = 3
+        self.score_mode = score_mode
+        if use_dilation:
+            self.dilation_kernel = np.array([[1, 1], [1, 1]])
+        else:
+            self.dilation_kernel = None
+    def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
+        '''
+        _bitmap: single map with shape (1, H, W),
+                whose values are binarized as {0, 1}
+        '''
+        bitmap = _bitmap
+        height, width = bitmap.shape
+        outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
+                                cv2.CHAIN_APPROX_SIMPLE)
+        if len(outs) == 3:
+            img, contours, _ = outs[0], outs[1], outs[2]
+        elif len(outs) == 2:
+            contours, _ = outs[0], outs[1]
+        num_contours = min(len(contours), self.max_candidates)
+        boxes = []
+        scores = []
+        for index in range(num_contours):
+            contour = contours[index]
+            points, sside = self.get_mini_boxes(contour)
+            if sside < self.min_size:
+                continue
+            points = np.array(points)
+            if self.score_mode == "fast":
+                score = self.box_score_fast(pred, points.reshape(-1, 2))
+            else:
+                score = self.box_score_slow(pred, contour)
+            if self.box_thresh > score:
+                continue
+            box = self.unclip(points).reshape(-1, 1, 2)
+            box, sside = self.get_mini_boxes(box)
+            if sside < self.min_size + 2:
+                continue
+            box = np.array(box)
+            box[:, 0] = np.clip(
+                np.round(box[:, 0] / width * dest_width), 0, dest_width)
+            box[:, 1] = np.clip(
+                np.round(box[:, 1] / height * dest_height), 0, dest_height)
+            boxes.append(box.astype(np.int16))
+            scores.append(score)
+        return np.array(boxes, dtype=np.int16), scores
+    def unclip(self, box):
+        unclip_ratio = self.unclip_ratio
+        poly = Polygon(box)
+        distance = poly.area * unclip_ratio / poly.length
+        offset = pyclipper.PyclipperOffset()
+        offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+        expanded = np.array(offset.Execute(distance))
+        return expanded
+    def get_mini_boxes(self, contour):
+        bounding_box = cv2.minAreaRect(contour)
+        points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
+        index_1, index_2, index_3, index_4 = 0, 1, 2, 3
+        if points[1][1] > points[0][1]:
+            index_1 = 0
+            index_4 = 1
+        else:
+            index_1 = 1
+            index_4 = 0
+        if points[3][1] > points[2][1]:
+            index_2 = 2
+            index_3 = 3
+        else:
+            index_2 = 3
+            index_3 = 2
+        box = [
+            points[index_1], points[index_2], points[index_3], points[index_4]
+        ]
+        return box, min(bounding_box[1])
+    def box_score_fast(self, bitmap, _box):
+        h, w = bitmap.shape[:2]
+        box = _box.copy()
+        xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int32), 0, w - 1)
+        xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int32), 0, w - 1)
+        ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int32), 0, h - 1)
+        ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int32), 0, h - 1)
+        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
+        box[:, 0] = box[:, 0] - xmin
+        box[:, 1] = box[:, 1] - ymin
+        cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
+        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
+    def box_score_slow(self, bitmap, contour):
+        '''
+        box_score_slow: use polyon mean score as the mean score
+        '''
+        h, w = bitmap.shape[:2]
+        contour = contour.copy()
+        contour = np.reshape(contour, (-1, 2))
+        xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
+        xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
+        ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
+        ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)
+        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
+        contour[:, 0] = contour[:, 0] - xmin
+        contour[:, 1] = contour[:, 1] - ymin
+        cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1)
+        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
+    def __call__(self, pred, shape_list):
+        pred = pred[:, 0, :, :]
+        segmentation = pred > self.thresh
+        boxes_batch = []
+        for batch_index in range(pred.shape[0]):
+            src_h, src_w, ratio_h, ratio_w = shape_list[batch_index]
+            if self.dilation_kernel is not None:
+                mask = cv2.dilate(
+                    np.array(segmentation[batch_index]).astype(np.uint8),
+                    self.dilation_kernel)
+            else:
+                mask = segmentation[batch_index]
+            boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask,
+                                                   src_w, src_h)
+            boxes_batch.append({'points': boxes})
+        return boxes_batch

rapidocr_onnxruntime/ch_ppocr_v3_rec/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# -*- encoding: utf-8 -*-
+# @Author: SWHL
+# @Contact: [email protected]
+from .text_recognize import TextRecognizer

rapidocr_onnxruntime/ch_ppocr_v3_rec/config.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+model_path: resources/models/ch_PP-OCRv3_rec_infer.onnx
+use_cuda: false
+# Details of the params: https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html
+CUDAExecutionProvider:
+    device_id: 0
+    arena_extend_strategy: kNextPowerOfTwo
+    cudnn_conv_algo_search: EXHAUSTIVE
+    do_copy_in_default_stream: true
+rec_img_shape: [3, 48, 320]
+rec_batch_num: 6

rapidocr_onnxruntime/ch_ppocr_v3_rec/text_recognize.py ADDED Viewed

	@@ -0,0 +1,120 @@

+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import math
+import time
+from typing import List
+import cv2
+import numpy as np
+try:
+    from .utils import CTCLabelDecode, read_yaml, OrtInferSession
+except:
+    from utils import CTCLabelDecode, read_yaml, OrtInferSession
+class TextRecognizer(object):
+    def __init__(self, config):
+        session_instance = OrtInferSession(config)
+        self.session = session_instance.session
+        self.input_name = session_instance.get_input_name()
+        meta_dict = session_instance.get_metadata()
+        if 'character' in meta_dict.keys():
+            self.character_dict_path = meta_dict['character'].splitlines()
+        else:
+            self.character_dict_path = config.get('keys_path', None)
+        self.postprocess_op = CTCLabelDecode(self.character_dict_path)
+        self.rec_batch_num = config['rec_batch_num']
+        self.rec_image_shape = config['rec_img_shape']
+    def __call__(self, img_list: List[np.ndarray]):
+        if isinstance(img_list, np.ndarray):
+            img_list = [img_list]
+        # Calculate the aspect ratio of all text bars
+        width_list = [img.shape[1] / float(img.shape[0]) for img in img_list]
+        # Sorting can speed up the recognition process
+        indices = np.argsort(np.array(width_list))
+        img_num = len(img_list)
+        rec_res = [['', 0.0]] * img_num
+        batch_num = self.rec_batch_num
+        elapse = 0
+        for beg_img_no in range(0, img_num, batch_num):
+            end_img_no = min(img_num, beg_img_no + batch_num)
+            max_wh_ratio = 0
+            for ino in range(beg_img_no, end_img_no):
+                h, w = img_list[indices[ino]].shape[0:2]
+                wh_ratio = w * 1.0 / h
+                max_wh_ratio = max(max_wh_ratio, wh_ratio)
+            norm_img_batch = []
+            for ino in range(beg_img_no, end_img_no):
+                norm_img = self.resize_norm_img(img_list[indices[ino]],
+                                                max_wh_ratio)
+                norm_img_batch.append(norm_img[np.newaxis, :])
+            norm_img_batch = np.concatenate(norm_img_batch).astype(np.float32)
+            starttime = time.time()
+            onnx_inputs = {self.input_name: norm_img_batch}
+            preds = self.session.run(None, onnx_inputs)[0]
+            rec_result = self.postprocess_op(preds)
+            for rno in range(len(rec_result)):
+                rec_res[indices[beg_img_no + rno]] = rec_result[rno]
+            elapse += time.time() - starttime
+        return rec_res, elapse
+    def resize_norm_img(self, img, max_wh_ratio):
+        img_channel, img_height, img_width = self.rec_image_shape
+        assert img_channel == img.shape[2]
+        img_width = int(img_height * max_wh_ratio)
+        h, w = img.shape[:2]
+        ratio = w / float(h)
+        if math.ceil(img_height * ratio) > img_width:
+            resized_w = img_width
+        else:
+            resized_w = int(math.ceil(img_height * ratio))
+        resized_image = cv2.resize(img, (resized_w, img_height))
+        resized_image = resized_image.astype('float32')
+        resized_image = resized_image.transpose((2, 0, 1)) / 255
+        resized_image -= 0.5
+        resized_image /= 0.5
+        padding_im = np.zeros((img_channel, img_height, img_width),
+                              dtype=np.float32)
+        padding_im[:, :, 0:resized_w] = resized_image
+        return padding_im
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--image_path', type=str, help='image_dir|image_path')
+    parser.add_argument('--config_path', type=str, default='config.yaml')
+    args = parser.parse_args()
+    config = read_yaml(args.config_path)
+    text_recognizer = TextRecognizer(config)
+    img = cv2.imread(args.image_path)
+    rec_res, predict_time = text_recognizer(img)
+    print(f'rec result: {rec_res}\t cost: {predict_time}s')

rapidocr_onnxruntime/ch_ppocr_v3_rec/utils.py ADDED Viewed

	@@ -0,0 +1,128 @@

+# -*- encoding: utf-8 -*-
+# @Author: SWHL
+# @Contact: [email protected]
+import warnings
+import numpy as np
+import yaml
+from onnxruntime import (get_available_providers, get_device,
+                         SessionOptions, InferenceSession,
+                         GraphOptimizationLevel)
+class OrtInferSession(object):
+    def __init__(self, config):
+        sess_opt = SessionOptions()
+        sess_opt.log_severity_level = 4
+        sess_opt.enable_cpu_mem_arena = False
+        sess_opt.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
+        cuda_ep = 'CUDAExecutionProvider'
+        cpu_ep = 'CPUExecutionProvider'
+        cpu_provider_options = {
+            "arena_extend_strategy": "kSameAsRequested",
+        }
+        EP_list = []
+        if config['use_cuda'] and get_device() == 'GPU' \
+                and cuda_ep in get_available_providers():
+            EP_list = [(cuda_ep, config[cuda_ep])]
+        EP_list.append((cpu_ep, cpu_provider_options))
+        self.session = InferenceSession(config['model_path'],
+                                        sess_options=sess_opt,
+                                        providers=EP_list)
+        if config['use_cuda'] and cuda_ep not in self.session.get_providers():
+            warnings.warn(f'{cuda_ep} is not avaiable for current env, the inference part is automatically shifted to be executed under {cpu_ep}.\n'
+                          'Please ensure the installed onnxruntime-gpu version matches your cuda and cudnn version, '
+                          'you can check their relations from the offical web site: '
+                          'https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html',
+                          RuntimeWarning)
+    def get_input_name(self, input_idx=0):
+        return self.session.get_inputs()[input_idx].name
+    def get_output_name(self, output_idx=0):
+        return self.session.get_outputs()[output_idx].name
+    def get_metadata(self):
+        meta_dict = self.session.get_modelmeta().custom_metadata_map
+        return meta_dict
+def read_yaml(yaml_path):
+    with open(yaml_path, 'rb') as f:
+        data = yaml.load(f, Loader=yaml.Loader)
+    return data
+class CTCLabelDecode(object):
+    """ Convert between text-label and text-index """
+    def __init__(self, character_dict_path):
+        super(CTCLabelDecode, self).__init__()
+        self.character_str = []
+        assert character_dict_path is not None, "character_dict_path should not be None"
+        if isinstance(character_dict_path, str):
+            with open(character_dict_path, "rb") as fin:
+                lines = fin.readlines()
+                for line in lines:
+                    line = line.decode('utf-8').strip("\n").strip("\r\n")
+                    self.character_str.append(line)
+        else:
+            self.character_str = character_dict_path
+        self.character_str.append(' ')
+        dict_character = self.add_special_char(self.character_str)
+        self.character = dict_character
+        self.dict = {}
+        for i, char in enumerate(dict_character):
+            self.dict[char] = i
+    def __call__(self, preds, label=None):
+        preds_idx = preds.argmax(axis=2)
+        preds_prob = preds.max(axis=2)
+        text = self.decode(preds_idx, preds_prob,
+                           is_remove_duplicate=True)
+        if label is None:
+            return text
+        label = self.decode(label)
+        return text, label
+    def add_special_char(self, dict_character):
+        dict_character = ['blank'] + dict_character
+        return dict_character
+    def get_ignored_tokens(self):
+        return [0]  # for ctc blank
+    def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
+        """ convert text-index into text-label. """
+        result_list = []
+        ignored_tokens = self.get_ignored_tokens()
+        batch_size = len(text_index)
+        for batch_idx in range(batch_size):
+            char_list = []
+            conf_list = []
+            for idx in range(len(text_index[batch_idx])):
+                if text_index[batch_idx][idx] in ignored_tokens:
+                    continue
+                if is_remove_duplicate:
+                    # only for predict
+                    if idx > 0 and text_index[batch_idx][idx - 1] == text_index[
+                            batch_idx][idx]:
+                        continue
+                char_list.append(self.character[int(text_index[batch_idx][
+                    idx])])
+                if text_prob is not None:
+                    conf_list.append(text_prob[batch_idx][idx])
+                else:
+                    conf_list.append(1)
+            text = ''.join(char_list)
+            result_list.append((text, np.mean(conf_list  + [1e-50] )))
+        return result_list

rapidocr_onnxruntime/rapid_ocr_api.py ADDED Viewed

	@@ -0,0 +1,164 @@

+# -*- encoding: utf-8 -*-
+# @Author: SWHL
+# @Contact: [email protected]
+import copy
+import importlib
+import sys
+from pathlib import Path
+import cv2
+import numpy as np
+import yaml
+root_dir = Path(__file__).resolve().parent
+sys.path.append(str(root_dir))
+class TextSystem(object):
+    def __init__(self, config_path):
+        super(TextSystem).__init__()
+        if not Path(config_path).exists():
+            raise FileExistsError(f'{config_path} does not exist!')
+        config = self.read_yaml(config_path)
+        global_config = config['Global']
+        self.print_verbose = global_config['print_verbose']
+        self.text_score = global_config['text_score']
+        self.min_height = global_config['min_height']
+        self.width_height_ratio = global_config['width_height_ratio']
+        TextDetector = self.init_module(config['Det']['module_name'],
+                                        config['Det']['class_name'])
+        self.text_detector = TextDetector(config['Det'])
+        TextRecognizer = self.init_module(config['Rec']['module_name'],
+                                          config['Rec']['class_name'])
+        self.text_recognizer = TextRecognizer(config['Rec'])
+        self.use_angle_cls = config['Global']['use_angle_cls']
+        if self.use_angle_cls:
+            TextClassifier = self.init_module(config['Cls']['module_name'],
+                                              config['Cls']['class_name'])
+            self.text_cls = TextClassifier(config['Cls'])
+    def __call__(self, img: np.ndarray):
+        h, w = img.shape[:2]
+        if self.width_height_ratio == -1:
+            use_limit_ratio = False
+        else:
+            use_limit_ratio = w / h > self.width_height_ratio
+        if h <= self.min_height or use_limit_ratio:
+            dt_boxes, img_crop_list = self.get_boxes_img_without_det(img, h, w)
+        else:
+            dt_boxes, elapse = self.text_detector(img)
+            if dt_boxes is None or len(dt_boxes) < 1:
+                return None, None
+            if self.print_verbose:
+                print(f'dt_boxes num: {len(dt_boxes)}, elapse: {elapse}')
+            dt_boxes = self.sorted_boxes(dt_boxes)
+            img_crop_list = self.get_crop_img_list(img, dt_boxes)
+        if self.use_angle_cls:
+            img_crop_list, _, elapse = self.text_cls(img_crop_list)
+            if self.print_verbose:
+                print(f'cls num: {len(img_crop_list)}, elapse: {elapse}')
+        rec_res, elapse = self.text_recognizer(img_crop_list)
+        if self.print_verbose:
+            print(f'rec_res num: {len(rec_res)}, elapse: {elapse}')
+        filter_boxes, filter_rec_res = self.filter_boxes_rec_by_score(dt_boxes,
+                                                                      rec_res)
+        return filter_boxes, filter_rec_res
+    @staticmethod
+    def read_yaml(yaml_path):
+        with open(yaml_path, 'rb') as f:
+            data = yaml.load(f, Loader=yaml.Loader)
+        return data
+    @staticmethod
+    def init_module(module_name, class_name):
+        module_part = importlib.import_module(module_name)
+        return getattr(module_part, class_name)
+    def get_boxes_img_without_det(self, img, h, w):
+        x0, y0, x1, y1 = 0, 0, w, h
+        dt_boxes = np.array([[x0, y0], [x1, y0], [x1, y1], [x0, y1]])
+        dt_boxes = dt_boxes[np.newaxis, ...]
+        img_crop_list = [img]
+        return dt_boxes, img_crop_list
+    def get_crop_img_list(self, img, dt_boxes):
+        def get_rotate_crop_image(img, points):
+            img_crop_width = int(
+                max(
+                    np.linalg.norm(points[0] - points[1]),
+                    np.linalg.norm(points[2] - points[3])))
+            img_crop_height = int(
+                max(
+                    np.linalg.norm(points[0] - points[3]),
+                    np.linalg.norm(points[1] - points[2])))
+            pts_std = np.float32([[0, 0], [img_crop_width, 0],
+                                  [img_crop_width, img_crop_height],
+                                  [0, img_crop_height]])
+            M = cv2.getPerspectiveTransform(points, pts_std)
+            dst_img = cv2.warpPerspective(
+                img,
+                M, (img_crop_width, img_crop_height),
+                borderMode=cv2.BORDER_REPLICATE,
+                flags=cv2.INTER_CUBIC)
+            dst_img_height, dst_img_width = dst_img.shape[0:2]
+            if dst_img_height * 1.0 / dst_img_width >= 1.5:
+                dst_img = np.rot90(dst_img)
+            return dst_img
+        img_crop_list = []
+        for box in dt_boxes:
+            tmp_box = copy.deepcopy(box)
+            img_crop = get_rotate_crop_image(img, tmp_box)
+            img_crop_list.append(img_crop)
+        return img_crop_list
+    @staticmethod
+    def sorted_boxes(dt_boxes):
+        """
+        Sort text boxes in order from top to bottom, left to right
+        args:
+            dt_boxes(array):detected text boxes with shape [4, 2]
+        return:
+            sorted boxes(array) with shape [4, 2]
+        """
+        num_boxes = dt_boxes.shape[0]
+        sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
+        _boxes = list(sorted_boxes)
+        for i in range(num_boxes - 1):
+            if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \
+                    (_boxes[i + 1][0][0] < _boxes[i][0][0]):
+                tmp = _boxes[i]
+                _boxes[i] = _boxes[i + 1]
+                _boxes[i + 1] = tmp
+        return _boxes
+    def filter_boxes_rec_by_score(self, dt_boxes, rec_res):
+        filter_boxes, filter_rec_res = [], []
+        for box, rec_reuslt in zip(dt_boxes, rec_res):
+            text, score = rec_reuslt
+            if score >= self.text_score:
+                filter_boxes.append(box)
+                filter_rec_res.append(rec_reuslt)
+        return filter_boxes, filter_rec_res
+if __name__ == '__main__':
+    text_sys = TextSystem('config.yaml')
+    import cv2
+    img = cv2.imread('resources/test_images/det_images/ch_en_num.jpg')
+    result = text_sys(img)
+    print(result)

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+Gradio
+pyclipper>=1.2.0
+Shapely>=1.7.1
+opencv_python==4.5.1.48
+six>=1.15.0
+numpy>=1.19.5
+Pillow
+PyYAML
+pytest