Spaces:

datdo2717
/

Scan_Doc_App

Runtime error

App Files Files Community

Scan_Doc_App / Rotate /ppocr /postprocess /sast_postprocess.py

datdo2717

rotate

c5b5437 almost 2 years ago

raw

history blame contribute delete

13.8 kB

	# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	import os
	import sys

	__dir__ = os.path.dirname(__file__)
	sys.path.append(__dir__)
	sys.path.append(os.path.join(__dir__, '..'))

	import numpy as np
	from .locality_aware_nms import nms_locality
	import paddle
	import cv2
	import time


	class SASTPostProcess(object):
	"""
	The post process for SAST.
	"""

	def __init__(self,
	score_thresh=0.5,
	nms_thresh=0.2,
	sample_pts_num=2,
	shrink_ratio_of_width=0.3,
	expand_scale=1.0,
	tcl_map_thresh=0.5,
	**kwargs):

	self.score_thresh = score_thresh
	self.nms_thresh = nms_thresh
	self.sample_pts_num = sample_pts_num
	self.shrink_ratio_of_width = shrink_ratio_of_width
	self.expand_scale = expand_scale
	self.tcl_map_thresh = tcl_map_thresh

	# c++ la-nms is faster, but only support python 3.5
	self.is_python35 = False
	if sys.version_info.major == 3 and sys.version_info.minor == 5:
	self.is_python35 = True

	def point_pair2poly(self, point_pair_list):
	"""
	Transfer vertical point_pairs into poly point in clockwise.
	"""
	# constract poly
	point_num = len(point_pair_list) * 2
	point_list = [0] * point_num
	for idx, point_pair in enumerate(point_pair_list):
	point_list[idx] = point_pair[0]
	point_list[point_num - 1 - idx] = point_pair[1]
	return np.array(point_list).reshape(-1, 2)

	def shrink_quad_along_width(self,
	quad,
	begin_width_ratio=0.,
	end_width_ratio=1.):
	"""
	Generate shrink_quad_along_width.
	"""
	ratio_pair = np.array(
	[[begin_width_ratio], [end_width_ratio]], dtype=np.float32)
	p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair
	p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair
	return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]])

	def expand_poly_along_width(self, poly, shrink_ratio_of_width=0.3):
	"""
	expand poly along width.
	"""
	point_num = poly.shape[0]
	left_quad = np.array(
	[poly[0], poly[1], poly[-2], poly[-1]], dtype=np.float32)
	left_ratio = -shrink_ratio_of_width * np.linalg.norm(left_quad[0] - left_quad[3]) / \
	(np.linalg.norm(left_quad[0] - left_quad[1]) + 1e-6)
	left_quad_expand = self.shrink_quad_along_width(left_quad, left_ratio,
	1.0)
	right_quad = np.array(
	[
	poly[point_num // 2 - 2], poly[point_num // 2 - 1],
	poly[point_num // 2], poly[point_num // 2 + 1]
	],
	dtype=np.float32)
	right_ratio = 1.0 + \
	shrink_ratio_of_width * np.linalg.norm(right_quad[0] - right_quad[3]) / \
	(np.linalg.norm(right_quad[0] - right_quad[1]) + 1e-6)
	right_quad_expand = self.shrink_quad_along_width(right_quad, 0.0,
	right_ratio)
	poly[0] = left_quad_expand[0]
	poly[-1] = left_quad_expand[-1]
	poly[point_num // 2 - 1] = right_quad_expand[1]
	poly[point_num // 2] = right_quad_expand[2]
	return poly

	def restore_quad(self, tcl_map, tcl_map_thresh, tvo_map):
	"""Restore quad."""
	xy_text = np.argwhere(tcl_map[:, :, 0] > tcl_map_thresh)
	xy_text = xy_text[:, ::-1] # (n, 2)

	# Sort the text boxes via the y axis
	xy_text = xy_text[np.argsort(xy_text[:, 1])]

	scores = tcl_map[xy_text[:, 1], xy_text[:, 0], 0]
	scores = scores[:, np.newaxis]

	# Restore
	point_num = int(tvo_map.shape[-1] / 2)
	assert point_num == 4
	tvo_map = tvo_map[xy_text[:, 1], xy_text[:, 0], :]
	xy_text_tile = np.tile(xy_text, (1, point_num)) # (n, point_num * 2)
	quads = xy_text_tile - tvo_map

	return scores, quads, xy_text

	def quad_area(self, quad):
	"""
	compute area of a quad.
	"""
	edge = [(quad[1][0] - quad[0][0]) * (quad[1][1] + quad[0][1]),
	(quad[2][0] - quad[1][0]) * (quad[2][1] + quad[1][1]),
	(quad[3][0] - quad[2][0]) * (quad[3][1] + quad[2][1]),
	(quad[0][0] - quad[3][0]) * (quad[0][1] + quad[3][1])]
	return np.sum(edge) / 2.

	def nms(self, dets):
	if self.is_python35:
	from ppocr.utils.utility import check_install
	check_install('lanms', 'lanms-nova')
	import lanms
	dets = lanms.merge_quadrangle_n9(dets, self.nms_thresh)
	else:
	dets = nms_locality(dets, self.nms_thresh)
	return dets

	def cluster_by_quads_tco(self, tcl_map, tcl_map_thresh, quads, tco_map):
	"""
	Cluster pixels in tcl_map based on quads.
	"""
	instance_count = quads.shape[0] + 1 # contain background
	instance_label_map = np.zeros(tcl_map.shape[:2], dtype=np.int32)
	if instance_count == 1:
	return instance_count, instance_label_map

	# predict text center
	xy_text = np.argwhere(tcl_map[:, :, 0] > tcl_map_thresh)
	n = xy_text.shape[0]
	xy_text = xy_text[:, ::-1] # (n, 2)
	tco = tco_map[xy_text[:, 1], xy_text[:, 0], :] # (n, 2)
	pred_tc = xy_text - tco

	# get gt text center
	m = quads.shape[0]
	gt_tc = np.mean(quads, axis=1) # (m, 2)

	pred_tc_tile = np.tile(pred_tc[:, np.newaxis, :],
	(1, m, 1)) # (n, m, 2)
	gt_tc_tile = np.tile(gt_tc[np.newaxis, :, :], (n, 1, 1)) # (n, m, 2)
	dist_mat = np.linalg.norm(pred_tc_tile - gt_tc_tile, axis=2) # (n, m)
	xy_text_assign = np.argmin(dist_mat, axis=1) + 1 # (n,)

	instance_label_map[xy_text[:, 1], xy_text[:, 0]] = xy_text_assign
	return instance_count, instance_label_map

	def estimate_sample_pts_num(self, quad, xy_text):
	"""
	Estimate sample points number.
	"""
	eh = (np.linalg.norm(quad[0] - quad[3]) +
	np.linalg.norm(quad[1] - quad[2])) / 2.0
	ew = (np.linalg.norm(quad[0] - quad[1]) +
	np.linalg.norm(quad[2] - quad[3])) / 2.0

	dense_sample_pts_num = max(2, int(ew))
	dense_xy_center_line = xy_text[np.linspace(
	0,
	xy_text.shape[0] - 1,
	dense_sample_pts_num,
	endpoint=True,
	dtype=np.float32).astype(np.int32)]

	dense_xy_center_line_diff = dense_xy_center_line[
	1:] - dense_xy_center_line[:-1]
	estimate_arc_len = np.sum(
	np.linalg.norm(
	dense_xy_center_line_diff, axis=1))

	sample_pts_num = max(2, int(estimate_arc_len / eh))
	return sample_pts_num

	def detect_sast(self,
	tcl_map,
	tvo_map,
	tbo_map,
	tco_map,
	ratio_w,
	ratio_h,
	src_w,
	src_h,
	shrink_ratio_of_width=0.3,
	tcl_map_thresh=0.5,
	offset_expand=1.0,
	out_strid=4.0):
	"""
	first resize the tcl_map, tvo_map and tbo_map to the input_size, then restore the polys
	"""
	# restore quad
	scores, quads, xy_text = self.restore_quad(tcl_map, tcl_map_thresh,
	tvo_map)
	dets = np.hstack((quads, scores)).astype(np.float32, copy=False)
	dets = self.nms(dets)
	if dets.shape[0] == 0:
	return []
	quads = dets[:, :-1].reshape(-1, 4, 2)

	# Compute quad area
	quad_areas = []
	for quad in quads:
	quad_areas.append(-self.quad_area(quad))

	# instance segmentation
	# instance_count, instance_label_map = cv2.connectedComponents(tcl_map.astype(np.uint8), connectivity=8)
	instance_count, instance_label_map = self.cluster_by_quads_tco(
	tcl_map, tcl_map_thresh, quads, tco_map)

	# restore single poly with tcl instance.
	poly_list = []
	for instance_idx in range(1, instance_count):
	xy_text = np.argwhere(instance_label_map == instance_idx)[:, ::-1]
	quad = quads[instance_idx - 1]
	q_area = quad_areas[instance_idx - 1]
	if q_area < 5:
	continue

	#
	len1 = float(np.linalg.norm(quad[0] - quad[1]))
	len2 = float(np.linalg.norm(quad[1] - quad[2]))
	min_len = min(len1, len2)
	if min_len < 3:
	continue

	# filter small CC
	if xy_text.shape[0] <= 0:
	continue

	# filter low confidence instance
	xy_text_scores = tcl_map[xy_text[:, 1], xy_text[:, 0], 0]
	if np.sum(xy_text_scores) / quad_areas[instance_idx - 1] < 0.1:
	# if np.sum(xy_text_scores) / quad_areas[instance_idx - 1] < 0.05:
	continue

	# sort xy_text
	left_center_pt = np.array(
	[[(quad[0, 0] + quad[-1, 0]) / 2.0,
	(quad[0, 1] + quad[-1, 1]) / 2.0]]) # (1, 2)
	right_center_pt = np.array(
	[[(quad[1, 0] + quad[2, 0]) / 2.0,
	(quad[1, 1] + quad[2, 1]) / 2.0]]) # (1, 2)
	proj_unit_vec = (right_center_pt - left_center_pt) / \
	(np.linalg.norm(right_center_pt - left_center_pt) + 1e-6)
	proj_value = np.sum(xy_text * proj_unit_vec, axis=1)
	xy_text = xy_text[np.argsort(proj_value)]

	# Sample pts in tcl map
	if self.sample_pts_num == 0:
	sample_pts_num = self.estimate_sample_pts_num(quad, xy_text)
	else:
	sample_pts_num = self.sample_pts_num
	xy_center_line = xy_text[np.linspace(
	0,
	xy_text.shape[0] - 1,
	sample_pts_num,
	endpoint=True,
	dtype=np.float32).astype(np.int32)]

	point_pair_list = []
	for x, y in xy_center_line:
	# get corresponding offset
	offset = tbo_map[y, x, :].reshape(2, 2)
	if offset_expand != 1.0:
	offset_length = np.linalg.norm(
	offset, axis=1, keepdims=True)
	expand_length = np.clip(
	offset_length * (offset_expand - 1),
	a_min=0.5,
	a_max=3.0)
	offset_detal = offset / offset_length * expand_length
	offset = offset + offset_detal
	# original point
	ori_yx = np.array([y, x], dtype=np.float32)
	point_pair = (ori_yx + offset)[:, ::-1] * out_strid / np.array(
	[ratio_w, ratio_h]).reshape(-1, 2)
	point_pair_list.append(point_pair)

	# ndarry: (x, 2), expand poly along width
	detected_poly = self.point_pair2poly(point_pair_list)
	detected_poly = self.expand_poly_along_width(detected_poly,
	shrink_ratio_of_width)
	detected_poly[:, 0] = np.clip(
	detected_poly[:, 0], a_min=0, a_max=src_w)
	detected_poly[:, 1] = np.clip(
	detected_poly[:, 1], a_min=0, a_max=src_h)
	poly_list.append(detected_poly)

	return poly_list

	def __call__(self, outs_dict, shape_list):
	score_list = outs_dict['f_score']
	border_list = outs_dict['f_border']
	tvo_list = outs_dict['f_tvo']
	tco_list = outs_dict['f_tco']
	if isinstance(score_list, paddle.Tensor):
	score_list = score_list.numpy()
	border_list = border_list.numpy()
	tvo_list = tvo_list.numpy()
	tco_list = tco_list.numpy()

	img_num = len(shape_list)
	poly_lists = []
	for ino in range(img_num):
	p_score = score_list[ino].transpose((1, 2, 0))
	p_border = border_list[ino].transpose((1, 2, 0))
	p_tvo = tvo_list[ino].transpose((1, 2, 0))
	p_tco = tco_list[ino].transpose((1, 2, 0))
	src_h, src_w, ratio_h, ratio_w = shape_list[ino]

	poly_list = self.detect_sast(
	p_score,
	p_tvo,
	p_border,
	p_tco,
	ratio_w,
	ratio_h,
	src_w,
	src_h,
	shrink_ratio_of_width=self.shrink_ratio_of_width,
	tcl_map_thresh=self.tcl_map_thresh,
	offset_expand=self.expand_scale)
	poly_lists.append({'points': np.array(poly_list)})

	return poly_lists