Spaces:

datdo2717
/

Scan_Doc_App

Runtime error

App Files Files Community

Scan_Doc_App / Rotate /ppocr /losses /e2e_pg_loss.py

datdo2717

rotate

c5b5437 almost 2 years ago

raw

history blame contribute delete

6.59 kB

	# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	from paddle import nn
	import paddle

	from .det_basic_loss import DiceLoss
	from ppocr.utils.e2e_utils.extract_batchsize import pre_process


	class PGLoss(nn.Layer):
	def __init__(self,
	tcl_bs,
	max_text_length,
	max_text_nums,
	pad_num,
	eps=1e-6,
	**kwargs):
	super(PGLoss, self).__init__()
	self.tcl_bs = tcl_bs
	self.max_text_nums = max_text_nums
	self.max_text_length = max_text_length
	self.pad_num = pad_num
	self.dice_loss = DiceLoss(eps=eps)

	def border_loss(self, f_border, l_border, l_score, l_mask):
	l_border_split, l_border_norm = paddle.tensor.split(
	l_border, num_or_sections=[4, 1], axis=1)
	f_border_split = f_border
	b, c, h, w = l_border_norm.shape
	l_border_norm_split = paddle.expand(
	x=l_border_norm, shape=[b, 4 * c, h, w])
	b, c, h, w = l_score.shape
	l_border_score = paddle.expand(x=l_score, shape=[b, 4 * c, h, w])
	b, c, h, w = l_mask.shape
	l_border_mask = paddle.expand(x=l_mask, shape=[b, 4 * c, h, w])
	border_diff = l_border_split - f_border_split
	abs_border_diff = paddle.abs(border_diff)
	border_sign = abs_border_diff < 1.0
	border_sign = paddle.cast(border_sign, dtype='float32')
	border_sign.stop_gradient = True
	border_in_loss = 0.5 * abs_border_diff * abs_border_diff * border_sign + \
	(abs_border_diff - 0.5) * (1.0 - border_sign)
	border_out_loss = l_border_norm_split * border_in_loss
	border_loss = paddle.sum(border_out_loss * l_border_score * l_border_mask) / \
	(paddle.sum(l_border_score * l_border_mask) + 1e-5)
	return border_loss

	def direction_loss(self, f_direction, l_direction, l_score, l_mask):
	l_direction_split, l_direction_norm = paddle.tensor.split(
	l_direction, num_or_sections=[2, 1], axis=1)
	f_direction_split = f_direction
	b, c, h, w = l_direction_norm.shape
	l_direction_norm_split = paddle.expand(
	x=l_direction_norm, shape=[b, 2 * c, h, w])
	b, c, h, w = l_score.shape
	l_direction_score = paddle.expand(x=l_score, shape=[b, 2 * c, h, w])
	b, c, h, w = l_mask.shape
	l_direction_mask = paddle.expand(x=l_mask, shape=[b, 2 * c, h, w])
	direction_diff = l_direction_split - f_direction_split
	abs_direction_diff = paddle.abs(direction_diff)
	direction_sign = abs_direction_diff < 1.0
	direction_sign = paddle.cast(direction_sign, dtype='float32')
	direction_sign.stop_gradient = True
	direction_in_loss = 0.5 * abs_direction_diff * abs_direction_diff * direction_sign + \
	(abs_direction_diff - 0.5) * (1.0 - direction_sign)
	direction_out_loss = l_direction_norm_split * direction_in_loss
	direction_loss = paddle.sum(direction_out_loss * l_direction_score * l_direction_mask) / \
	(paddle.sum(l_direction_score * l_direction_mask) + 1e-5)
	return direction_loss

	def ctcloss(self, f_char, tcl_pos, tcl_mask, tcl_label, label_t):
	f_char = paddle.transpose(f_char, [0, 2, 3, 1])
	tcl_pos = paddle.reshape(tcl_pos, [-1, 3])
	tcl_pos = paddle.cast(tcl_pos, dtype=int)
	f_tcl_char = paddle.gather_nd(f_char, tcl_pos)
	f_tcl_char = paddle.reshape(
	f_tcl_char, [-1, 64, self.pad_num + 1]) # len(Lexicon_Table)+1
	f_tcl_char_fg, f_tcl_char_bg = paddle.split(
	f_tcl_char, [self.pad_num, 1], axis=2)
	f_tcl_char_bg = f_tcl_char_bg * tcl_mask + (1.0 - tcl_mask) * 20.0
	b, c, l = tcl_mask.shape
	tcl_mask_fg = paddle.expand(x=tcl_mask, shape=[b, c, self.pad_num * l])
	tcl_mask_fg.stop_gradient = True
	f_tcl_char_fg = f_tcl_char_fg * tcl_mask_fg + (1.0 - tcl_mask_fg) * (
	-20.0)
	f_tcl_char_mask = paddle.concat([f_tcl_char_fg, f_tcl_char_bg], axis=2)
	f_tcl_char_ld = paddle.transpose(f_tcl_char_mask, (1, 0, 2))
	N, B, _ = f_tcl_char_ld.shape
	input_lengths = paddle.to_tensor([N] * B, dtype='int64')
	cost = paddle.nn.functional.ctc_loss(
	log_probs=f_tcl_char_ld,
	labels=tcl_label,
	input_lengths=input_lengths,
	label_lengths=label_t,
	blank=self.pad_num,
	reduction='none')
	cost = cost.mean()
	return cost

	def forward(self, predicts, labels):
	images, tcl_maps, tcl_label_maps, border_maps \
	, direction_maps, training_masks, label_list, pos_list, pos_mask = labels
	# for all the batch_size
	pos_list, pos_mask, label_list, label_t = pre_process(
	label_list, pos_list, pos_mask, self.max_text_length,
	self.max_text_nums, self.pad_num, self.tcl_bs)

	f_score, f_border, f_direction, f_char = predicts['f_score'], predicts['f_border'], predicts['f_direction'], \
	predicts['f_char']
	score_loss = self.dice_loss(f_score, tcl_maps, training_masks)
	border_loss = self.border_loss(f_border, border_maps, tcl_maps,
	training_masks)
	direction_loss = self.direction_loss(f_direction, direction_maps,
	tcl_maps, training_masks)
	ctc_loss = self.ctcloss(f_char, pos_list, pos_mask, label_list, label_t)
	loss_all = score_loss + border_loss + direction_loss + 5 * ctc_loss

	losses = {
	'loss': loss_all,
	"score_loss": score_loss,
	"border_loss": border_loss,
	"direction_loss": direction_loss,
	"ctc_loss": ctc_loss
	}
	return losses