import pyclipper import cv2 import numpy as np from shapely.geometry import Polygon import torch class SegDetectorRepresenter(): def __init__(self, thresh=0.6, box_thresh=0.8, max_candidates=1000, unclip_ratio=2.2): self.min_size = 3 self.thresh = thresh self.box_thresh = box_thresh self.max_candidates = max_candidates self.unclip_ratio = unclip_ratio def __call__(self, batch, pred, is_output_polygon=False): ''' batch: (image, polygons, ignore_tags batch: a dict produced by dataloaders. image: tensor of shape (N, C, H, W). polygons: tensor of shape (N, K, 4, 2), the polygons of objective regions. ignore_tags: tensor of shape (N, K), indicates whether a region is ignorable or not. shape: the original shape of images. filename: the original filenames of images. pred: binary: text region segmentation map, with shape (N, H, W) thresh: [if exists] thresh hold prediction with shape (N, H, W) thresh_binary: [if exists] binarized with threshold, (N, H, W) ''' pred = pred[:, 0, :, :] segmentation = self.binarize(pred) boxes_batch = [] scores_batch = [] batch_size = pred.size(0) if isinstance(pred, torch.Tensor) else pred.shape[0] for batch_index in range(batch_size): height, width = batch['shape'][batch_index] if is_output_polygon: boxes, scores = self.polygons_from_bitmap(pred[batch_index], segmentation[batch_index], width, height) else: boxes, scores = self.boxes_from_bitmap(pred[batch_index], segmentation[batch_index], width, height) boxes_batch.append(boxes) scores_batch.append(scores) return boxes_batch, scores_batch def binarize(self, pred): return pred > self.thresh def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height): ''' _bitmap: single map with shape (H, W), whose values are binarized as {0, 1} ''' assert len(_bitmap.shape) == 2 bitmap = _bitmap.cpu().numpy() # The first channel pred = pred.cpu().detach().numpy() height, width = bitmap.shape boxes = [] scores = [] contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) for contour in contours[:self.max_candidates]: epsilon = 0.005 * cv2.arcLength(contour, True) approx = cv2.approxPolyDP(contour, epsilon, True) points = approx.reshape((-1, 2)) if points.shape[0] < 4: continue # _, sside = self.get_mini_boxes(contour) # if sside < self.min_size: # continue score = self.box_score_fast(pred, contour.squeeze(1)) if self.box_thresh > score: continue if points.shape[0] > 2: box = self.unclip(points, unclip_ratio=self.unclip_ratio) if len(box) > 1: continue else: continue box = box.reshape(-1, 2) _, sside = self.get_mini_boxes(box.reshape((-1, 1, 2))) if sside < self.min_size + 2: continue if not isinstance(dest_width, int): dest_width = dest_width.item() dest_height = dest_height.item() box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width) box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0, dest_height) boxes.append(box) scores.append(score) return boxes, scores def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): ''' _bitmap: single map with shape (H, W), whose values are binarized as {0, 1} ''' assert len(_bitmap.shape) == 2 if isinstance(pred, torch.Tensor): bitmap = _bitmap.cpu().numpy() # The first channel pred = pred.cpu().detach().numpy() else: bitmap = _bitmap height, width = bitmap.shape try: contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) except ValueError: return [], [] num_contours = min(len(contours), self.max_candidates) boxes = np.zeros((num_contours, 4, 2), dtype=np.int16) scores = np.zeros((num_contours,), dtype=np.float32) for index in range(num_contours): contour = contours[index].squeeze(1) points, sside = self.get_mini_boxes(contour) if sside < self.min_size: continue points = np.array(points) score = self.box_score_fast(pred, contour) if self.box_thresh > score: continue box = self.unclip(points, unclip_ratio=self.unclip_ratio).reshape(-1, 1, 2) box, sside = self.get_mini_boxes(box) if sside < self.min_size + 2: continue box = np.array(box) if not isinstance(dest_width, int): dest_width = dest_width.item() dest_height = dest_height.item() box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width) box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0, dest_height) startidx = box.sum(axis=1).argmin() box = np.roll(box, 4-startidx, 0) box = np.array(box) boxes[index, :, :] = box.astype(np.int16) scores[index] = score return boxes, scores def unclip(self, box, unclip_ratio=1.8): poly = Polygon(box) distance = poly.area * unclip_ratio / poly.length offset = pyclipper.PyclipperOffset() offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) expanded = np.array(offset.Execute(distance)) return expanded def get_mini_boxes(self, contour): bounding_box = cv2.minAreaRect(contour) points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) index_1, index_2, index_3, index_4 = 0, 1, 2, 3 if points[1][1] > points[0][1]: index_1 = 0 index_4 = 1 else: index_1 = 1 index_4 = 0 if points[3][1] > points[2][1]: index_2 = 2 index_3 = 3 else: index_2 = 3 index_3 = 2 box = [points[index_1], points[index_2], points[index_3], points[index_4]] return box, min(bounding_box[1]) def box_score_fast(self, bitmap, _box): h, w = bitmap.shape[:2] box = _box.copy() xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int32), 0, w - 1) xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int32), 0, w - 1) ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int32), 0, h - 1) ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int32), 0, h - 1) mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) box[:, 0] = box[:, 0] - xmin box[:, 1] = box[:, 1] - ymin cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1) return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]