Spaces:

Sunday01
/

testapi

Running

testapi / manga_translator /detection /default_utils /dbnet_utils.py

9dce458 8 months ago

7.51 kB


	import pyclipper
	import cv2
	import numpy as np
	from shapely.geometry import Polygon
	import torch

	class SegDetectorRepresenter():
	def __init__(self, thresh=0.6, box_thresh=0.8, max_candidates=1000, unclip_ratio=2.2):
	self.min_size = 3
	self.thresh = thresh
	self.box_thresh = box_thresh
	self.max_candidates = max_candidates
	self.unclip_ratio = unclip_ratio

	def __call__(self, batch, pred, is_output_polygon=False):
	'''
	batch: (image, polygons, ignore_tags
	batch: a dict produced by dataloaders.
	image: tensor of shape (N, C, H, W).
	polygons: tensor of shape (N, K, 4, 2), the polygons of objective regions.
	ignore_tags: tensor of shape (N, K), indicates whether a region is ignorable or not.
	shape: the original shape of images.
	filename: the original filenames of images.
	pred:
	binary: text region segmentation map, with shape (N, H, W)
	thresh: [if exists] thresh hold prediction with shape (N, H, W)
	thresh_binary: [if exists] binarized with threshold, (N, H, W)
	'''
	pred = pred[:, 0, :, :]
	segmentation = self.binarize(pred)
	boxes_batch = []
	scores_batch = []
	batch_size = pred.size(0) if isinstance(pred, torch.Tensor) else pred.shape[0]
	for batch_index in range(batch_size):
	height, width = batch['shape'][batch_index]
	if is_output_polygon:
	boxes, scores = self.polygons_from_bitmap(pred[batch_index], segmentation[batch_index], width, height)
	else:
	boxes, scores = self.boxes_from_bitmap(pred[batch_index], segmentation[batch_index], width, height)
	boxes_batch.append(boxes)
	scores_batch.append(scores)
	return boxes_batch, scores_batch

	def binarize(self, pred):
	return pred > self.thresh

	def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
	'''
	_bitmap: single map with shape (H, W),
	whose values are binarized as {0, 1}
	'''

	assert len(_bitmap.shape) == 2
	bitmap = _bitmap.cpu().numpy() # The first channel
	pred = pred.cpu().detach().numpy()
	height, width = bitmap.shape
	boxes = []
	scores = []

	contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

	for contour in contours[:self.max_candidates]:
	epsilon = 0.005 * cv2.arcLength(contour, True)
	approx = cv2.approxPolyDP(contour, epsilon, True)
	points = approx.reshape((-1, 2))
	if points.shape[0] < 4:
	continue
	# _, sside = self.get_mini_boxes(contour)
	# if sside < self.min_size:
	# continue
	score = self.box_score_fast(pred, contour.squeeze(1))
	if self.box_thresh > score:
	continue

	if points.shape[0] > 2:
	box = self.unclip(points, unclip_ratio=self.unclip_ratio)
	if len(box) > 1:
	continue
	else:
	continue
	box = box.reshape(-1, 2)
	_, sside = self.get_mini_boxes(box.reshape((-1, 1, 2)))
	if sside < self.min_size + 2:
	continue

	if not isinstance(dest_width, int):
	dest_width = dest_width.item()
	dest_height = dest_height.item()

	box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
	box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0, dest_height)
	boxes.append(box)
	scores.append(score)
	return boxes, scores

	def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
	'''
	_bitmap: single map with shape (H, W),
	whose values are binarized as {0, 1}
	'''

	assert len(_bitmap.shape) == 2
	if isinstance(pred, torch.Tensor):
	bitmap = _bitmap.cpu().numpy() # The first channel
	pred = pred.cpu().detach().numpy()
	else:
	bitmap = _bitmap
	height, width = bitmap.shape
	try:
	contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
	except ValueError:
	return [], []
	num_contours = min(len(contours), self.max_candidates)
	boxes = np.zeros((num_contours, 4, 2), dtype=np.int16)
	scores = np.zeros((num_contours,), dtype=np.float32)

	for index in range(num_contours):
	contour = contours[index].squeeze(1)
	points, sside = self.get_mini_boxes(contour)
	if sside < self.min_size:
	continue
	points = np.array(points)
	score = self.box_score_fast(pred, contour)
	if self.box_thresh > score:
	continue

	box = self.unclip(points, unclip_ratio=self.unclip_ratio).reshape(-1, 1, 2)
	box, sside = self.get_mini_boxes(box)
	if sside < self.min_size + 2:
	continue
	box = np.array(box)
	if not isinstance(dest_width, int):
	dest_width = dest_width.item()
	dest_height = dest_height.item()

	box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
	box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0, dest_height)
	startidx = box.sum(axis=1).argmin()
	box = np.roll(box, 4-startidx, 0)
	box = np.array(box)
	boxes[index, :, :] = box.astype(np.int16)
	scores[index] = score
	return boxes, scores

	def unclip(self, box, unclip_ratio=1.8):
	poly = Polygon(box)
	distance = poly.area * unclip_ratio / poly.length
	offset = pyclipper.PyclipperOffset()
	offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
	expanded = np.array(offset.Execute(distance))
	return expanded

	def get_mini_boxes(self, contour):
	bounding_box = cv2.minAreaRect(contour)
	points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])

	index_1, index_2, index_3, index_4 = 0, 1, 2, 3
	if points[1][1] > points[0][1]:
	index_1 = 0
	index_4 = 1
	else:
	index_1 = 1
	index_4 = 0
	if points[3][1] > points[2][1]:
	index_2 = 2
	index_3 = 3
	else:
	index_2 = 3
	index_3 = 2

	box = [points[index_1], points[index_2], points[index_3], points[index_4]]
	return box, min(bounding_box[1])

	def box_score_fast(self, bitmap, _box):
	h, w = bitmap.shape[:2]
	box = _box.copy()
	xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int32), 0, w - 1)
	xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int32), 0, w - 1)
	ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int32), 0, h - 1)
	ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int32), 0, h - 1)

	mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
	box[:, 0] = box[:, 0] - xmin
	box[:, 1] = box[:, 1] - ymin
	cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
	return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]