Spaces:

datdo2717
/

Scan_Doc_App

Runtime error

App Files Files Community

Scan_Doc_App / Rotate /ppocr /modeling /backbones /table_master_resnet.py

datdo2717

rotate

c5b5437 almost 2 years ago

raw

history blame contribute delete

11.6 kB

	# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""
	This code is refer from:
	https://github.com/JiaquanYe/TableMASTER-mmocr/blob/master/mmocr/models/textrecog/backbones/table_resnet_extra.py
	"""

	import paddle
	import paddle.nn as nn
	import paddle.nn.functional as F


	class BasicBlock(nn.Layer):
	expansion = 1

	def __init__(self,
	inplanes,
	planes,
	stride=1,
	downsample=None,
	gcb_config=None):
	super(BasicBlock, self).__init__()
	self.conv1 = nn.Conv2D(
	inplanes,
	planes,
	kernel_size=3,
	stride=stride,
	padding=1,
	bias_attr=False)
	self.bn1 = nn.BatchNorm2D(planes, momentum=0.9)
	self.relu = nn.ReLU()
	self.conv2 = nn.Conv2D(
	planes, planes, kernel_size=3, stride=1, padding=1, bias_attr=False)
	self.bn2 = nn.BatchNorm2D(planes, momentum=0.9)
	self.downsample = downsample
	self.stride = stride
	self.gcb_config = gcb_config

	if self.gcb_config is not None:
	gcb_ratio = gcb_config['ratio']
	gcb_headers = gcb_config['headers']
	att_scale = gcb_config['att_scale']
	fusion_type = gcb_config['fusion_type']
	self.context_block = MultiAspectGCAttention(
	inplanes=planes,
	ratio=gcb_ratio,
	headers=gcb_headers,
	att_scale=att_scale,
	fusion_type=fusion_type)

	def forward(self, x):
	residual = x

	out = self.conv1(x)
	out = self.bn1(out)
	out = self.relu(out)

	out = self.conv2(out)
	out = self.bn2(out)

	if self.gcb_config is not None:
	out = self.context_block(out)

	if self.downsample is not None:
	residual = self.downsample(x)

	out += residual
	out = self.relu(out)

	return out


	def get_gcb_config(gcb_config, layer):
	if gcb_config is None or not gcb_config['layers'][layer]:
	return None
	else:
	return gcb_config


	class TableResNetExtra(nn.Layer):
	def __init__(self, layers, in_channels=3, gcb_config=None):
	assert len(layers) >= 4

	super(TableResNetExtra, self).__init__()
	self.inplanes = 128
	self.conv1 = nn.Conv2D(
	in_channels,
	64,
	kernel_size=3,
	stride=1,
	padding=1,
	bias_attr=False)
	self.bn1 = nn.BatchNorm2D(64)
	self.relu1 = nn.ReLU()

	self.conv2 = nn.Conv2D(
	64, 128, kernel_size=3, stride=1, padding=1, bias_attr=False)
	self.bn2 = nn.BatchNorm2D(128)
	self.relu2 = nn.ReLU()

	self.maxpool1 = nn.MaxPool2D(kernel_size=2, stride=2)

	self.layer1 = self._make_layer(
	BasicBlock,
	256,
	layers[0],
	stride=1,
	gcb_config=get_gcb_config(gcb_config, 0))

	self.conv3 = nn.Conv2D(
	256, 256, kernel_size=3, stride=1, padding=1, bias_attr=False)
	self.bn3 = nn.BatchNorm2D(256)
	self.relu3 = nn.ReLU()

	self.maxpool2 = nn.MaxPool2D(kernel_size=2, stride=2)

	self.layer2 = self._make_layer(
	BasicBlock,
	256,
	layers[1],
	stride=1,
	gcb_config=get_gcb_config(gcb_config, 1))

	self.conv4 = nn.Conv2D(
	256, 256, kernel_size=3, stride=1, padding=1, bias_attr=False)
	self.bn4 = nn.BatchNorm2D(256)
	self.relu4 = nn.ReLU()

	self.maxpool3 = nn.MaxPool2D(kernel_size=2, stride=2)

	self.layer3 = self._make_layer(
	BasicBlock,
	512,
	layers[2],
	stride=1,
	gcb_config=get_gcb_config(gcb_config, 2))

	self.conv5 = nn.Conv2D(
	512, 512, kernel_size=3, stride=1, padding=1, bias_attr=False)
	self.bn5 = nn.BatchNorm2D(512)
	self.relu5 = nn.ReLU()

	self.layer4 = self._make_layer(
	BasicBlock,
	512,
	layers[3],
	stride=1,
	gcb_config=get_gcb_config(gcb_config, 3))

	self.conv6 = nn.Conv2D(
	512, 512, kernel_size=3, stride=1, padding=1, bias_attr=False)
	self.bn6 = nn.BatchNorm2D(512)
	self.relu6 = nn.ReLU()

	self.out_channels = [256, 256, 512]

	def _make_layer(self, block, planes, blocks, stride=1, gcb_config=None):
	downsample = None
	if stride != 1 or self.inplanes != planes * block.expansion:
	downsample = nn.Sequential(
	nn.Conv2D(
	self.inplanes,
	planes * block.expansion,
	kernel_size=1,
	stride=stride,
	bias_attr=False),
	nn.BatchNorm2D(planes * block.expansion), )

	layers = []
	layers.append(
	block(
	self.inplanes,
	planes,
	stride,
	downsample,
	gcb_config=gcb_config))
	self.inplanes = planes * block.expansion
	for _ in range(1, blocks):
	layers.append(block(self.inplanes, planes))

	return nn.Sequential(*layers)

	def forward(self, x):
	f = []
	x = self.conv1(x)

	x = self.bn1(x)
	x = self.relu1(x)

	x = self.conv2(x)
	x = self.bn2(x)
	x = self.relu2(x)

	x = self.maxpool1(x)
	x = self.layer1(x)

	x = self.conv3(x)
	x = self.bn3(x)
	x = self.relu3(x)
	f.append(x)

	x = self.maxpool2(x)
	x = self.layer2(x)

	x = self.conv4(x)
	x = self.bn4(x)
	x = self.relu4(x)
	f.append(x)

	x = self.maxpool3(x)

	x = self.layer3(x)
	x = self.conv5(x)
	x = self.bn5(x)
	x = self.relu5(x)

	x = self.layer4(x)
	x = self.conv6(x)
	x = self.bn6(x)
	x = self.relu6(x)
	f.append(x)
	return f


	class MultiAspectGCAttention(nn.Layer):
	def __init__(self,
	inplanes,
	ratio,
	headers,
	pooling_type='att',
	att_scale=False,
	fusion_type='channel_add'):
	super(MultiAspectGCAttention, self).__init__()
	assert pooling_type in ['avg', 'att']

	assert fusion_type in ['channel_add', 'channel_mul', 'channel_concat']
	assert inplanes % headers == 0 and inplanes >= 8 # inplanes must be divided by headers evenly

	self.headers = headers
	self.inplanes = inplanes
	self.ratio = ratio
	self.planes = int(inplanes * ratio)
	self.pooling_type = pooling_type
	self.fusion_type = fusion_type
	self.att_scale = False

	self.single_header_inplanes = int(inplanes / headers)

	if pooling_type == 'att':
	self.conv_mask = nn.Conv2D(
	self.single_header_inplanes, 1, kernel_size=1)
	self.softmax = nn.Softmax(axis=2)
	else:
	self.avg_pool = nn.AdaptiveAvgPool2D(1)

	if fusion_type == 'channel_add':
	self.channel_add_conv = nn.Sequential(
	nn.Conv2D(
	self.inplanes, self.planes, kernel_size=1),
	nn.LayerNorm([self.planes, 1, 1]),
	nn.ReLU(),
	nn.Conv2D(
	self.planes, self.inplanes, kernel_size=1))
	elif fusion_type == 'channel_concat':
	self.channel_concat_conv = nn.Sequential(
	nn.Conv2D(
	self.inplanes, self.planes, kernel_size=1),
	nn.LayerNorm([self.planes, 1, 1]),
	nn.ReLU(),
	nn.Conv2D(
	self.planes, self.inplanes, kernel_size=1))
	# for concat
	self.cat_conv = nn.Conv2D(
	2 * self.inplanes, self.inplanes, kernel_size=1)
	elif fusion_type == 'channel_mul':
	self.channel_mul_conv = nn.Sequential(
	nn.Conv2D(
	self.inplanes, self.planes, kernel_size=1),
	nn.LayerNorm([self.planes, 1, 1]),
	nn.ReLU(),
	nn.Conv2D(
	self.planes, self.inplanes, kernel_size=1))

	def spatial_pool(self, x):
	batch, channel, height, width = x.shape
	if self.pooling_type == 'att':
	# [Nheaders, C', H , W] C = headers C'
	x = x.reshape([
	batch * self.headers, self.single_header_inplanes, height, width
	])
	input_x = x

	# [Nheaders, C', H W] C = headers * C'
	# input_x = input_x.view(batch, channel, height * width)
	input_x = input_x.reshape([
	batch * self.headers, self.single_header_inplanes,
	height * width
	])

	# [Nheaders, 1, C', H W]
	input_x = input_x.unsqueeze(1)
	# [N*headers, 1, H, W]
	context_mask = self.conv_mask(x)
	# [Nheaders, 1, H W]
	context_mask = context_mask.reshape(
	[batch * self.headers, 1, height * width])

	# scale variance
	if self.att_scale and self.headers > 1:
	context_mask = context_mask / paddle.sqrt(
	self.single_header_inplanes)

	# [Nheaders, 1, H W]
	context_mask = self.softmax(context_mask)

	# [Nheaders, 1, H W, 1]
	context_mask = context_mask.unsqueeze(-1)
	# [Nheaders, 1, C', 1] = [Nheaders, 1, C', H * W] * [Nheaders, 1, H W, 1]
	context = paddle.matmul(input_x, context_mask)

	# [N, headers * C', 1, 1]
	context = context.reshape(
	[batch, self.headers * self.single_header_inplanes, 1, 1])
	else:
	# [N, C, 1, 1]
	context = self.avg_pool(x)

	return context

	def forward(self, x):
	# [N, C, 1, 1]
	context = self.spatial_pool(x)

	out = x

	if self.fusion_type == 'channel_mul':
	# [N, C, 1, 1]
	channel_mul_term = F.sigmoid(self.channel_mul_conv(context))
	out = out * channel_mul_term
	elif self.fusion_type == 'channel_add':
	# [N, C, 1, 1]
	channel_add_term = self.channel_add_conv(context)
	out = out + channel_add_term
	else:
	# [N, C, 1, 1]
	channel_concat_term = self.channel_concat_conv(context)

	# use concat
	_, C1, _, _ = channel_concat_term.shape
	N, C2, H, W = out.shape

	out = paddle.concat(
	[out, channel_concat_term.expand([-1, -1, H, W])], axis=1)
	out = self.cat_conv(out)
	out = F.layer_norm(out, [self.inplanes, H, W])
	out = F.relu(out)

	return out