Spaces:

datdo2717
/

Scan_Doc_App

Runtime error

App Files Files Community

Scan_Doc_App / Rotate /ppocr /modeling /backbones /rec_micronet.py

datdo2717

rotate

c5b5437 almost 2 years ago

raw

history blame contribute delete

17.7 kB

	# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""
	This code is refer from:
	https://github.com/liyunsheng13/micronet/blob/main/backbone/micronet.py
	https://github.com/liyunsheng13/micronet/blob/main/backbone/activation.py
	"""

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	import paddle
	import paddle.nn as nn

	from ppocr.modeling.backbones.det_mobilenet_v3 import make_divisible

	M0_cfgs = [
	# s, n, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r
	[2, 1, 8, 3, 2, 2, 0, 4, 8, 2, 2, 2, 0, 1, 1],
	[2, 1, 12, 3, 2, 2, 0, 8, 12, 4, 4, 2, 2, 1, 1],
	[2, 1, 16, 5, 2, 2, 0, 12, 16, 4, 4, 2, 2, 1, 1],
	[1, 1, 32, 5, 1, 4, 4, 4, 32, 4, 4, 2, 2, 1, 1],
	[2, 1, 64, 5, 1, 4, 8, 8, 64, 8, 8, 2, 2, 1, 1],
	[1, 1, 96, 3, 1, 4, 8, 8, 96, 8, 8, 2, 2, 1, 2],
	[1, 1, 384, 3, 1, 4, 12, 12, 0, 0, 0, 2, 2, 1, 2],
	]
	M1_cfgs = [
	# s, n, c, ks, c1, c2, g1, g2, c3, g3, g4
	[2, 1, 8, 3, 2, 2, 0, 6, 8, 2, 2, 2, 0, 1, 1],
	[2, 1, 16, 3, 2, 2, 0, 8, 16, 4, 4, 2, 2, 1, 1],
	[2, 1, 16, 5, 2, 2, 0, 16, 16, 4, 4, 2, 2, 1, 1],
	[1, 1, 32, 5, 1, 6, 4, 4, 32, 4, 4, 2, 2, 1, 1],
	[2, 1, 64, 5, 1, 6, 8, 8, 64, 8, 8, 2, 2, 1, 1],
	[1, 1, 96, 3, 1, 6, 8, 8, 96, 8, 8, 2, 2, 1, 2],
	[1, 1, 576, 3, 1, 6, 12, 12, 0, 0, 0, 2, 2, 1, 2],
	]
	M2_cfgs = [
	# s, n, c, ks, c1, c2, g1, g2, c3, g3, g4
	[2, 1, 12, 3, 2, 2, 0, 8, 12, 4, 4, 2, 0, 1, 1],
	[2, 1, 16, 3, 2, 2, 0, 12, 16, 4, 4, 2, 2, 1, 1],
	[1, 1, 24, 3, 2, 2, 0, 16, 24, 4, 4, 2, 2, 1, 1],
	[2, 1, 32, 5, 1, 6, 6, 6, 32, 4, 4, 2, 2, 1, 1],
	[1, 1, 32, 5, 1, 6, 8, 8, 32, 4, 4, 2, 2, 1, 2],
	[1, 1, 64, 5, 1, 6, 8, 8, 64, 8, 8, 2, 2, 1, 2],
	[2, 1, 96, 5, 1, 6, 8, 8, 96, 8, 8, 2, 2, 1, 2],
	[1, 1, 128, 3, 1, 6, 12, 12, 128, 8, 8, 2, 2, 1, 2],
	[1, 1, 768, 3, 1, 6, 16, 16, 0, 0, 0, 2, 2, 1, 2],
	]
	M3_cfgs = [
	# s, n, c, ks, c1, c2, g1, g2, c3, g3, g4
	[2, 1, 16, 3, 2, 2, 0, 12, 16, 4, 4, 0, 2, 0, 1],
	[2, 1, 24, 3, 2, 2, 0, 16, 24, 4, 4, 0, 2, 0, 1],
	[1, 1, 24, 3, 2, 2, 0, 24, 24, 4, 4, 0, 2, 0, 1],
	[2, 1, 32, 5, 1, 6, 6, 6, 32, 4, 4, 0, 2, 0, 1],
	[1, 1, 32, 5, 1, 6, 8, 8, 32, 4, 4, 0, 2, 0, 2],
	[1, 1, 64, 5, 1, 6, 8, 8, 48, 8, 8, 0, 2, 0, 2],
	[1, 1, 80, 5, 1, 6, 8, 8, 80, 8, 8, 0, 2, 0, 2],
	[1, 1, 80, 5, 1, 6, 10, 10, 80, 8, 8, 0, 2, 0, 2],
	[1, 1, 120, 5, 1, 6, 10, 10, 120, 10, 10, 0, 2, 0, 2],
	[1, 1, 120, 5, 1, 6, 12, 12, 120, 10, 10, 0, 2, 0, 2],
	[1, 1, 144, 3, 1, 6, 12, 12, 144, 12, 12, 0, 2, 0, 2],
	[1, 1, 432, 3, 1, 3, 12, 12, 0, 0, 0, 0, 2, 0, 2],
	]


	def get_micronet_config(mode):
	return eval(mode + '_cfgs')


	class MaxGroupPooling(nn.Layer):
	def __init__(self, channel_per_group=2):
	super(MaxGroupPooling, self).__init__()
	self.channel_per_group = channel_per_group

	def forward(self, x):
	if self.channel_per_group == 1:
	return x
	# max op
	b, c, h, w = x.shape

	# reshape
	y = paddle.reshape(x, [b, c // self.channel_per_group, -1, h, w])
	out = paddle.max(y, axis=2)
	return out


	class SpatialSepConvSF(nn.Layer):
	def __init__(self, inp, oups, kernel_size, stride):
	super(SpatialSepConvSF, self).__init__()

	oup1, oup2 = oups
	self.conv = nn.Sequential(
	nn.Conv2D(
	inp,
	oup1, (kernel_size, 1), (stride, 1), (kernel_size // 2, 0),
	bias_attr=False,
	groups=1),
	nn.BatchNorm2D(oup1),
	nn.Conv2D(
	oup1,
	oup1 * oup2, (1, kernel_size), (1, stride),
	(0, kernel_size // 2),
	bias_attr=False,
	groups=oup1),
	nn.BatchNorm2D(oup1 * oup2),
	ChannelShuffle(oup1), )

	def forward(self, x):
	out = self.conv(x)
	return out


	class ChannelShuffle(nn.Layer):
	def __init__(self, groups):
	super(ChannelShuffle, self).__init__()
	self.groups = groups

	def forward(self, x):
	b, c, h, w = x.shape

	channels_per_group = c // self.groups

	# reshape
	x = paddle.reshape(x, [b, self.groups, channels_per_group, h, w])

	x = paddle.transpose(x, (0, 2, 1, 3, 4))
	out = paddle.reshape(x, [b, -1, h, w])

	return out


	class StemLayer(nn.Layer):
	def __init__(self, inp, oup, stride, groups=(4, 4)):
	super(StemLayer, self).__init__()

	g1, g2 = groups
	self.stem = nn.Sequential(
	SpatialSepConvSF(inp, groups, 3, stride),
	MaxGroupPooling(2) if g1 * g2 == 2 * oup else nn.ReLU6())

	def forward(self, x):
	out = self.stem(x)
	return out


	class DepthSpatialSepConv(nn.Layer):
	def __init__(self, inp, expand, kernel_size, stride):
	super(DepthSpatialSepConv, self).__init__()

	exp1, exp2 = expand

	hidden_dim = inp * exp1
	oup = inp * exp1 * exp2

	self.conv = nn.Sequential(
	nn.Conv2D(
	inp,
	inp * exp1, (kernel_size, 1), (stride, 1),
	(kernel_size // 2, 0),
	bias_attr=False,
	groups=inp),
	nn.BatchNorm2D(inp * exp1),
	nn.Conv2D(
	hidden_dim,
	oup, (1, kernel_size),
	1, (0, kernel_size // 2),
	bias_attr=False,
	groups=hidden_dim),
	nn.BatchNorm2D(oup))

	def forward(self, x):
	x = self.conv(x)
	return x


	class GroupConv(nn.Layer):
	def __init__(self, inp, oup, groups=2):
	super(GroupConv, self).__init__()
	self.inp = inp
	self.oup = oup
	self.groups = groups
	self.conv = nn.Sequential(
	nn.Conv2D(
	inp, oup, 1, 1, 0, bias_attr=False, groups=self.groups[0]),
	nn.BatchNorm2D(oup))

	def forward(self, x):
	x = self.conv(x)
	return x


	class DepthConv(nn.Layer):
	def __init__(self, inp, oup, kernel_size, stride):
	super(DepthConv, self).__init__()
	self.conv = nn.Sequential(
	nn.Conv2D(
	inp,
	oup,
	kernel_size,
	stride,
	kernel_size // 2,
	bias_attr=False,
	groups=inp),
	nn.BatchNorm2D(oup))

	def forward(self, x):
	out = self.conv(x)
	return out


	class DYShiftMax(nn.Layer):
	def __init__(self,
	inp,
	oup,
	reduction=4,
	act_max=1.0,
	act_relu=True,
	init_a=[0.0, 0.0],
	init_b=[0.0, 0.0],
	relu_before_pool=False,
	g=None,
	expansion=False):
	super(DYShiftMax, self).__init__()
	self.oup = oup
	self.act_max = act_max * 2
	self.act_relu = act_relu
	self.avg_pool = nn.Sequential(nn.ReLU() if relu_before_pool == True else
	nn.Sequential(), nn.AdaptiveAvgPool2D(1))

	self.exp = 4 if act_relu else 2
	self.init_a = init_a
	self.init_b = init_b

	# determine squeeze
	squeeze = make_divisible(inp // reduction, 4)
	if squeeze < 4:
	squeeze = 4

	self.fc = nn.Sequential(
	nn.Linear(inp, squeeze),
	nn.ReLU(), nn.Linear(squeeze, oup * self.exp), nn.Hardsigmoid())

	if g is None:
	g = 1
	self.g = g[1]
	if self.g != 1 and expansion:
	self.g = inp // self.g

	self.gc = inp // self.g
	index = paddle.to_tensor([range(inp)])
	index = paddle.reshape(index, [1, inp, 1, 1])
	index = paddle.reshape(index, [1, self.g, self.gc, 1, 1])
	indexgs = paddle.split(index, [1, self.g - 1], axis=1)
	indexgs = paddle.concat((indexgs[1], indexgs[0]), axis=1)
	indexs = paddle.split(indexgs, [1, self.gc - 1], axis=2)
	indexs = paddle.concat((indexs[1], indexs[0]), axis=2)
	self.index = paddle.reshape(indexs, [inp])
	self.expansion = expansion

	def forward(self, x):
	x_in = x
	x_out = x

	b, c, _, _ = x_in.shape
	y = self.avg_pool(x_in)
	y = paddle.reshape(y, [b, c])
	y = self.fc(y)
	y = paddle.reshape(y, [b, self.oup * self.exp, 1, 1])
	y = (y - 0.5) * self.act_max

	n2, c2, h2, w2 = x_out.shape
	x2 = paddle.to_tensor(x_out.numpy()[:, self.index.numpy(), :, :])

	if self.exp == 4:
	temp = y.shape
	a1, b1, a2, b2 = paddle.split(y, temp[1] // self.oup, axis=1)

	a1 = a1 + self.init_a[0]
	a2 = a2 + self.init_a[1]

	b1 = b1 + self.init_b[0]
	b2 = b2 + self.init_b[1]

	z1 = x_out * a1 + x2 * b1
	z2 = x_out * a2 + x2 * b2

	out = paddle.maximum(z1, z2)

	elif self.exp == 2:
	temp = y.shape
	a1, b1 = paddle.split(y, temp[1] // self.oup, axis=1)
	a1 = a1 + self.init_a[0]
	b1 = b1 + self.init_b[0]
	out = x_out * a1 + x2 * b1

	return out


	class DYMicroBlock(nn.Layer):
	def __init__(self,
	inp,
	oup,
	kernel_size=3,
	stride=1,
	ch_exp=(2, 2),
	ch_per_group=4,
	groups_1x1=(1, 1),
	depthsep=True,
	shuffle=False,
	activation_cfg=None):
	super(DYMicroBlock, self).__init__()

	self.identity = stride == 1 and inp == oup

	y1, y2, y3 = activation_cfg['dy']
	act_reduction = 8 * activation_cfg['ratio']
	init_a = activation_cfg['init_a']
	init_b = activation_cfg['init_b']

	t1 = ch_exp
	gs1 = ch_per_group
	hidden_fft, g1, g2 = groups_1x1
	hidden_dim2 = inp * t1[0] * t1[1]

	if gs1[0] == 0:
	self.layers = nn.Sequential(
	DepthSpatialSepConv(inp, t1, kernel_size, stride),
	DYShiftMax(
	hidden_dim2,
	hidden_dim2,
	act_max=2.0,
	act_relu=True if y2 == 2 else False,
	init_a=init_a,
	reduction=act_reduction,
	init_b=init_b,
	g=gs1,
	expansion=False) if y2 > 0 else nn.ReLU6(),
	ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(),
	ChannelShuffle(hidden_dim2 // 2)
	if shuffle and y2 != 0 else nn.Sequential(),
	GroupConv(hidden_dim2, oup, (g1, g2)),
	DYShiftMax(
	oup,
	oup,
	act_max=2.0,
	act_relu=False,
	init_a=[1.0, 0.0],
	reduction=act_reduction // 2,
	init_b=[0.0, 0.0],
	g=(g1, g2),
	expansion=False) if y3 > 0 else nn.Sequential(),
	ChannelShuffle(g2) if shuffle else nn.Sequential(),
	ChannelShuffle(oup // 2)
	if shuffle and oup % 2 == 0 and y3 != 0 else nn.Sequential(), )
	elif g2 == 0:
	self.layers = nn.Sequential(
	GroupConv(inp, hidden_dim2, gs1),
	DYShiftMax(
	hidden_dim2,
	hidden_dim2,
	act_max=2.0,
	act_relu=False,
	init_a=[1.0, 0.0],
	reduction=act_reduction,
	init_b=[0.0, 0.0],
	g=gs1,
	expansion=False) if y3 > 0 else nn.Sequential(), )
	else:
	self.layers = nn.Sequential(
	GroupConv(inp, hidden_dim2, gs1),
	DYShiftMax(
	hidden_dim2,
	hidden_dim2,
	act_max=2.0,
	act_relu=True if y1 == 2 else False,
	init_a=init_a,
	reduction=act_reduction,
	init_b=init_b,
	g=gs1,
	expansion=False) if y1 > 0 else nn.ReLU6(),
	ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(),
	DepthSpatialSepConv(hidden_dim2, (1, 1), kernel_size, stride)
	if depthsep else
	DepthConv(hidden_dim2, hidden_dim2, kernel_size, stride),
	nn.Sequential(),
	DYShiftMax(
	hidden_dim2,
	hidden_dim2,
	act_max=2.0,
	act_relu=True if y2 == 2 else False,
	init_a=init_a,
	reduction=act_reduction,
	init_b=init_b,
	g=gs1,
	expansion=True) if y2 > 0 else nn.ReLU6(),
	ChannelShuffle(hidden_dim2 // 4)
	if shuffle and y1 != 0 and y2 != 0 else nn.Sequential()
	if y1 == 0 and y2 == 0 else ChannelShuffle(hidden_dim2 // 2),
	GroupConv(hidden_dim2, oup, (g1, g2)),
	DYShiftMax(
	oup,
	oup,
	act_max=2.0,
	act_relu=False,
	init_a=[1.0, 0.0],
	reduction=act_reduction // 2
	if oup < hidden_dim2 else act_reduction,
	init_b=[0.0, 0.0],
	g=(g1, g2),
	expansion=False) if y3 > 0 else nn.Sequential(),
	ChannelShuffle(g2) if shuffle else nn.Sequential(),
	ChannelShuffle(oup // 2)
	if shuffle and y3 != 0 else nn.Sequential(), )

	def forward(self, x):
	identity = x
	out = self.layers(x)

	if self.identity:
	out = out + identity

	return out


	class MicroNet(nn.Layer):
	"""
	the MicroNet backbone network for recognition module.
	Args:
	mode(str): {'M0', 'M1', 'M2', 'M3'}
	Four models are proposed based on four different computational costs (4M, 6M, 12M, 21M MAdds)
	Default: 'M3'.
	"""

	def __init__(self, mode='M3', **kwargs):
	super(MicroNet, self).__init__()

	self.cfgs = get_micronet_config(mode)

	activation_cfg = {}
	if mode == 'M0':
	input_channel = 4
	stem_groups = 2, 2
	out_ch = 384
	activation_cfg['init_a'] = 1.0, 1.0
	activation_cfg['init_b'] = 0.0, 0.0
	elif mode == 'M1':
	input_channel = 6
	stem_groups = 3, 2
	out_ch = 576
	activation_cfg['init_a'] = 1.0, 1.0
	activation_cfg['init_b'] = 0.0, 0.0
	elif mode == 'M2':
	input_channel = 8
	stem_groups = 4, 2
	out_ch = 768
	activation_cfg['init_a'] = 1.0, 1.0
	activation_cfg['init_b'] = 0.0, 0.0
	elif mode == 'M3':
	input_channel = 12
	stem_groups = 4, 3
	out_ch = 432
	activation_cfg['init_a'] = 1.0, 0.5
	activation_cfg['init_b'] = 0.0, 0.5
	else:
	raise NotImplementedError("mode[" + mode +
	"_model] is not implemented!")

	layers = [StemLayer(3, input_channel, stride=2, groups=stem_groups)]

	for idx, val in enumerate(self.cfgs):
	s, n, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r = val

	t1 = (c1, c2)
	gs1 = (g1, g2)
	gs2 = (c3, g3, g4)
	activation_cfg['dy'] = [y1, y2, y3]
	activation_cfg['ratio'] = r

	output_channel = c
	layers.append(
	DYMicroBlock(
	input_channel,
	output_channel,
	kernel_size=ks,
	stride=s,
	ch_exp=t1,
	ch_per_group=gs1,
	groups_1x1=gs2,
	depthsep=True,
	shuffle=True,
	activation_cfg=activation_cfg, ))
	input_channel = output_channel
	for i in range(1, n):
	layers.append(
	DYMicroBlock(
	input_channel,
	output_channel,
	kernel_size=ks,
	stride=1,
	ch_exp=t1,
	ch_per_group=gs1,
	groups_1x1=gs2,
	depthsep=True,
	shuffle=True,
	activation_cfg=activation_cfg, ))
	input_channel = output_channel
	self.features = nn.Sequential(*layers)

	self.pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0)

	self.out_channels = make_divisible(out_ch)

	def forward(self, x):
	x = self.features(x)
	x = self.pool(x)
	return x