|
|
|
from functools import partial
|
|
import shutil
|
|
from typing import Callable, Optional, Tuple, Union
|
|
import cv2
|
|
import numpy as np
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.nn.functional as F
|
|
import torch.nn.init as init
|
|
|
|
from torchvision.models import resnet34
|
|
|
|
import einops
|
|
import math
|
|
|
|
from timm.layers import trunc_normal_, AvgPool2dSame, DropPath, Mlp, GlobalResponseNormMlp, \
|
|
LayerNorm2d, LayerNorm, create_conv2d, get_act_layer, make_divisible, to_ntuple
|
|
|
|
class Downsample(nn.Module):
|
|
|
|
def __init__(self, in_chs, out_chs, stride=1, dilation=1):
|
|
super().__init__()
|
|
avg_stride = stride if dilation == 1 else 1
|
|
if stride > 1 or dilation > 1:
|
|
avg_pool_fn = AvgPool2dSame if avg_stride == 1 and dilation > 1 else nn.AvgPool2d
|
|
self.pool = avg_pool_fn(2, avg_stride, ceil_mode=True, count_include_pad=False)
|
|
else:
|
|
self.pool = nn.Identity()
|
|
|
|
if in_chs != out_chs:
|
|
self.conv = create_conv2d(in_chs, out_chs, 1, stride=1)
|
|
else:
|
|
self.conv = nn.Identity()
|
|
|
|
def forward(self, x):
|
|
x = self.pool(x)
|
|
x = self.conv(x)
|
|
return x
|
|
|
|
|
|
class ConvNeXtBlock(nn.Module):
|
|
""" ConvNeXt Block
|
|
There are two equivalent implementations:
|
|
(1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
|
|
(2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
|
|
|
|
Unlike the official impl, this one allows choice of 1 or 2, 1x1 conv can be faster with appropriate
|
|
choice of LayerNorm impl, however as model size increases the tradeoffs appear to change and nn.Linear
|
|
is a better choice. This was observed with PyTorch 1.10 on 3090 GPU, it could change over time & w/ different HW.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
in_chs: int,
|
|
out_chs: Optional[int] = None,
|
|
kernel_size: int = 7,
|
|
stride: int = 1,
|
|
dilation: Union[int, Tuple[int, int]] = (1, 1),
|
|
mlp_ratio: float = 4,
|
|
conv_mlp: bool = False,
|
|
conv_bias: bool = True,
|
|
use_grn: bool = False,
|
|
ls_init_value: Optional[float] = 1e-6,
|
|
act_layer: Union[str, Callable] = 'gelu',
|
|
norm_layer: Optional[Callable] = None,
|
|
drop_path: float = 0.,
|
|
):
|
|
"""
|
|
|
|
Args:
|
|
in_chs: Block input channels.
|
|
out_chs: Block output channels (same as in_chs if None).
|
|
kernel_size: Depthwise convolution kernel size.
|
|
stride: Stride of depthwise convolution.
|
|
dilation: Tuple specifying input and output dilation of block.
|
|
mlp_ratio: MLP expansion ratio.
|
|
conv_mlp: Use 1x1 convolutions for MLP and a NCHW compatible norm layer if True.
|
|
conv_bias: Apply bias for all convolution (linear) layers.
|
|
use_grn: Use GlobalResponseNorm in MLP (from ConvNeXt-V2)
|
|
ls_init_value: Layer-scale init values, layer-scale applied if not None.
|
|
act_layer: Activation layer.
|
|
norm_layer: Normalization layer (defaults to LN if not specified).
|
|
drop_path: Stochastic depth probability.
|
|
"""
|
|
super().__init__()
|
|
out_chs = out_chs or in_chs
|
|
dilation = to_ntuple(2)(dilation)
|
|
act_layer = get_act_layer(act_layer)
|
|
if not norm_layer:
|
|
norm_layer = LayerNorm2d if conv_mlp else LayerNorm
|
|
mlp_layer = partial(GlobalResponseNormMlp if use_grn else Mlp, use_conv=conv_mlp)
|
|
self.use_conv_mlp = conv_mlp
|
|
self.conv_dw = create_conv2d(
|
|
in_chs,
|
|
out_chs,
|
|
kernel_size=kernel_size,
|
|
stride=stride,
|
|
dilation=dilation[0],
|
|
depthwise=True if out_chs >= in_chs else False,
|
|
bias=conv_bias,
|
|
)
|
|
self.norm = norm_layer(out_chs)
|
|
self.mlp = mlp_layer(out_chs, int(mlp_ratio * out_chs), act_layer=act_layer)
|
|
self.gamma = nn.Parameter(ls_init_value * torch.ones(out_chs)) if ls_init_value is not None else None
|
|
if in_chs != out_chs or stride != 1 or dilation[0] != dilation[1]:
|
|
self.shortcut = Downsample(in_chs, out_chs, stride=stride, dilation=dilation[0])
|
|
else:
|
|
self.shortcut = nn.Identity()
|
|
self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
|
|
|
|
def forward(self, x):
|
|
shortcut = x
|
|
x = self.conv_dw(x)
|
|
if self.use_conv_mlp:
|
|
x = self.norm(x)
|
|
x = self.mlp(x)
|
|
else:
|
|
x = x.permute(0, 2, 3, 1)
|
|
x = self.norm(x)
|
|
x = self.mlp(x)
|
|
x = x.permute(0, 3, 1, 2)
|
|
if self.gamma is not None:
|
|
x = x.mul(self.gamma.reshape(1, -1, 1, 1))
|
|
|
|
x = self.drop_path(x) + self.shortcut(shortcut)
|
|
return x
|
|
|
|
|
|
class ConvNeXtStage(nn.Module):
|
|
|
|
def __init__(
|
|
self,
|
|
in_chs,
|
|
out_chs,
|
|
kernel_size=7,
|
|
stride=2,
|
|
depth=2,
|
|
dilation=(1, 1),
|
|
drop_path_rates=None,
|
|
ls_init_value=1.0,
|
|
conv_mlp=False,
|
|
conv_bias=True,
|
|
use_grn=False,
|
|
act_layer='gelu',
|
|
norm_layer=None,
|
|
norm_layer_cl=None
|
|
):
|
|
super().__init__()
|
|
self.grad_checkpointing = False
|
|
|
|
if in_chs != out_chs or stride > 1 or dilation[0] != dilation[1]:
|
|
ds_ks = 2 if stride > 1 or dilation[0] != dilation[1] else 1
|
|
pad = 'same' if dilation[1] > 1 else 0
|
|
self.downsample = nn.Sequential(
|
|
norm_layer(in_chs),
|
|
create_conv2d(
|
|
in_chs,
|
|
out_chs,
|
|
kernel_size=ds_ks,
|
|
stride=stride,
|
|
dilation=dilation[0],
|
|
padding=pad,
|
|
bias=conv_bias,
|
|
),
|
|
)
|
|
in_chs = out_chs
|
|
else:
|
|
self.downsample = nn.Identity()
|
|
|
|
drop_path_rates = drop_path_rates or [0.] * depth
|
|
stage_blocks = []
|
|
for i in range(depth):
|
|
stage_blocks.append(ConvNeXtBlock(
|
|
in_chs=in_chs,
|
|
out_chs=out_chs,
|
|
kernel_size=kernel_size,
|
|
dilation=dilation[1],
|
|
drop_path=drop_path_rates[i],
|
|
ls_init_value=ls_init_value,
|
|
conv_mlp=conv_mlp,
|
|
conv_bias=conv_bias,
|
|
use_grn=use_grn,
|
|
act_layer=act_layer,
|
|
norm_layer=norm_layer if conv_mlp else norm_layer_cl,
|
|
))
|
|
in_chs = out_chs
|
|
self.blocks = nn.Sequential(*stage_blocks)
|
|
|
|
def forward(self, x):
|
|
x = self.downsample(x)
|
|
x = self.blocks(x)
|
|
return x
|
|
|
|
|
|
class ConvNeXt(nn.Module):
|
|
r""" ConvNeXt
|
|
A PyTorch impl of : `A ConvNet for the 2020s` - https://arxiv.org/pdf/2201.03545.pdf
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
in_chans: int = 3,
|
|
num_classes: int = 1000,
|
|
global_pool: str = 'avg',
|
|
output_stride: int = 32,
|
|
depths: Tuple[int, ...] = (3, 3, 9, 3),
|
|
dims: Tuple[int, ...] = (96, 192, 384, 768),
|
|
kernel_sizes: Union[int, Tuple[int, ...]] = 7,
|
|
ls_init_value: Optional[float] = 1e-6,
|
|
stem_type: str = 'patch',
|
|
patch_size: int = 4,
|
|
head_init_scale: float = 1.,
|
|
head_norm_first: bool = False,
|
|
head_hidden_size: Optional[int] = None,
|
|
conv_mlp: bool = False,
|
|
conv_bias: bool = True,
|
|
use_grn: bool = False,
|
|
act_layer: Union[str, Callable] = 'gelu',
|
|
norm_layer: Optional[Union[str, Callable]] = None,
|
|
norm_eps: Optional[float] = None,
|
|
drop_rate: float = 0.,
|
|
drop_path_rate: float = 0.,
|
|
):
|
|
"""
|
|
Args:
|
|
in_chans: Number of input image channels.
|
|
num_classes: Number of classes for classification head.
|
|
global_pool: Global pooling type.
|
|
output_stride: Output stride of network, one of (8, 16, 32).
|
|
depths: Number of blocks at each stage.
|
|
dims: Feature dimension at each stage.
|
|
kernel_sizes: Depthwise convolution kernel-sizes for each stage.
|
|
ls_init_value: Init value for Layer Scale, disabled if None.
|
|
stem_type: Type of stem.
|
|
patch_size: Stem patch size for patch stem.
|
|
head_init_scale: Init scaling value for classifier weights and biases.
|
|
head_norm_first: Apply normalization before global pool + head.
|
|
head_hidden_size: Size of MLP hidden layer in head if not None and head_norm_first == False.
|
|
conv_mlp: Use 1x1 conv in MLP, improves speed for small networks w/ chan last.
|
|
conv_bias: Use bias layers w/ all convolutions.
|
|
use_grn: Use Global Response Norm (ConvNeXt-V2) in MLP.
|
|
act_layer: Activation layer type.
|
|
norm_layer: Normalization layer type.
|
|
drop_rate: Head pre-classifier dropout rate.
|
|
drop_path_rate: Stochastic depth drop rate.
|
|
"""
|
|
super().__init__()
|
|
assert output_stride in (8, 16, 32)
|
|
kernel_sizes = to_ntuple(4)(kernel_sizes)
|
|
if norm_layer is None:
|
|
norm_layer = LayerNorm2d
|
|
norm_layer_cl = norm_layer if conv_mlp else LayerNorm
|
|
if norm_eps is not None:
|
|
norm_layer = partial(norm_layer, eps=norm_eps)
|
|
norm_layer_cl = partial(norm_layer_cl, eps=norm_eps)
|
|
else:
|
|
assert conv_mlp,\
|
|
'If a norm_layer is specified, conv MLP must be used so all norm expect rank-4, channels-first input'
|
|
norm_layer_cl = norm_layer
|
|
if norm_eps is not None:
|
|
norm_layer_cl = partial(norm_layer_cl, eps=norm_eps)
|
|
|
|
self.num_classes = num_classes
|
|
self.drop_rate = drop_rate
|
|
self.feature_info = []
|
|
|
|
assert stem_type in ('patch', 'overlap', 'overlap_tiered')
|
|
if stem_type == 'patch':
|
|
|
|
self.stem = nn.Sequential(
|
|
nn.Conv2d(in_chans, dims[0], kernel_size=patch_size, stride=patch_size, bias=conv_bias),
|
|
norm_layer(dims[0]),
|
|
)
|
|
stem_stride = patch_size
|
|
else:
|
|
mid_chs = make_divisible(dims[0] // 2) if 'tiered' in stem_type else dims[0]
|
|
self.stem = nn.Sequential(
|
|
nn.Conv2d(in_chans, mid_chs, kernel_size=3, stride=2, padding=1, bias=conv_bias),
|
|
nn.Conv2d(mid_chs, dims[0], kernel_size=3, stride=2, padding=1, bias=conv_bias),
|
|
norm_layer(dims[0]),
|
|
)
|
|
stem_stride = 4
|
|
|
|
self.stages = nn.Sequential()
|
|
dp_rates = [x.tolist() for x in torch.linspace(0, drop_path_rate, sum(depths)).split(depths)]
|
|
stages = []
|
|
prev_chs = dims[0]
|
|
curr_stride = stem_stride
|
|
dilation = 1
|
|
|
|
for i in range(4):
|
|
stride = 2 if curr_stride == 2 or i > 0 else 1
|
|
if curr_stride >= output_stride and stride > 1:
|
|
dilation *= stride
|
|
stride = 1
|
|
curr_stride *= stride
|
|
first_dilation = 1 if dilation in (1, 2) else 2
|
|
out_chs = dims[i]
|
|
stages.append(ConvNeXtStage(
|
|
prev_chs,
|
|
out_chs,
|
|
kernel_size=kernel_sizes[i],
|
|
stride=stride,
|
|
dilation=(first_dilation, dilation),
|
|
depth=depths[i],
|
|
drop_path_rates=dp_rates[i],
|
|
ls_init_value=ls_init_value,
|
|
conv_mlp=conv_mlp,
|
|
conv_bias=conv_bias,
|
|
use_grn=use_grn,
|
|
act_layer=act_layer,
|
|
norm_layer=norm_layer,
|
|
norm_layer_cl=norm_layer_cl,
|
|
))
|
|
prev_chs = out_chs
|
|
|
|
self.feature_info += [dict(num_chs=prev_chs, reduction=curr_stride, module=f'stages.{i}')]
|
|
self.stages = nn.Sequential(*stages)
|
|
self.num_features = prev_chs
|
|
|
|
@torch.jit.ignore
|
|
def group_matcher(self, coarse=False):
|
|
return dict(
|
|
stem=r'^stem',
|
|
blocks=r'^stages\.(\d+)' if coarse else [
|
|
(r'^stages\.(\d+)\.downsample', (0,)),
|
|
(r'^stages\.(\d+)\.blocks\.(\d+)', None),
|
|
(r'^norm_pre', (99999,))
|
|
]
|
|
)
|
|
|
|
@torch.jit.ignore
|
|
def set_grad_checkpointing(self, enable=True):
|
|
for s in self.stages:
|
|
s.grad_checkpointing = enable
|
|
|
|
@torch.jit.ignore
|
|
def get_classifier(self):
|
|
return self.head.fc
|
|
|
|
def forward_features(self, x):
|
|
x = self.stem(x)
|
|
x = self.stages(x)
|
|
return x
|
|
|
|
def _init_weights(module, name=None, head_init_scale=1.0):
|
|
if isinstance(module, nn.Conv2d):
|
|
trunc_normal_(module.weight, std=.02)
|
|
if module.bias is not None:
|
|
nn.init.zeros_(module.bias)
|
|
elif isinstance(module, nn.Linear):
|
|
trunc_normal_(module.weight, std=.02)
|
|
nn.init.zeros_(module.bias)
|
|
if name and 'head.' in name:
|
|
module.weight.data.mul_(head_init_scale)
|
|
module.bias.data.mul_(head_init_scale)
|
|
|
|
class UpconvSkip(nn.Module) :
|
|
def __init__(self, ch1, ch2, out_ch) -> None:
|
|
super().__init__()
|
|
self.conv = ConvNeXtBlock(
|
|
in_chs=ch1 + ch2,
|
|
out_chs=out_ch,
|
|
kernel_size=7,
|
|
dilation=1,
|
|
drop_path=0,
|
|
ls_init_value=1.0,
|
|
conv_mlp=False,
|
|
conv_bias=True,
|
|
use_grn=False,
|
|
act_layer='gelu',
|
|
norm_layer=LayerNorm,
|
|
)
|
|
self.upconv = nn.ConvTranspose2d(out_ch, out_ch, 2, 2, 0, 0)
|
|
|
|
def forward(self, x) :
|
|
x = self.conv(x)
|
|
x = self.upconv(x)
|
|
return x
|
|
|
|
class DBHead(nn.Module):
|
|
def __init__(self, in_channels, k = 50):
|
|
super().__init__()
|
|
self.k = k
|
|
self.binarize = nn.Sequential(
|
|
nn.Conv2d(in_channels, in_channels // 4, 3, padding=1),
|
|
|
|
nn.SiLU(inplace=True),
|
|
nn.ConvTranspose2d(in_channels // 4, in_channels // 4, 4, 2, 1),
|
|
|
|
nn.SiLU(inplace=True),
|
|
nn.ConvTranspose2d(in_channels // 4, 1, 4, 2, 1),
|
|
)
|
|
self.binarize.apply(self.weights_init)
|
|
|
|
self.thresh = self._init_thresh(in_channels)
|
|
self.thresh.apply(self.weights_init)
|
|
|
|
def forward(self, x):
|
|
shrink_maps = self.binarize(x)
|
|
threshold_maps = self.thresh(x)
|
|
if self.training:
|
|
binary_maps = self.step_function(shrink_maps.sigmoid(), threshold_maps)
|
|
y = torch.cat((shrink_maps, threshold_maps, binary_maps), dim=1)
|
|
else:
|
|
y = torch.cat((shrink_maps, threshold_maps), dim=1)
|
|
return y
|
|
|
|
def weights_init(self, m):
|
|
classname = m.__class__.__name__
|
|
if classname.find('Conv') != -1:
|
|
nn.init.kaiming_normal_(m.weight.data)
|
|
elif classname.find('BatchNorm') != -1:
|
|
m.weight.data.fill_(1.)
|
|
m.bias.data.fill_(1e-4)
|
|
|
|
def _init_thresh(self, inner_channels, serial=False, smooth=False, bias=False):
|
|
in_channels = inner_channels
|
|
if serial:
|
|
in_channels += 1
|
|
self.thresh = nn.Sequential(
|
|
nn.Conv2d(in_channels, inner_channels // 4, 3, padding=1, bias=bias),
|
|
|
|
nn.SiLU(inplace=True),
|
|
self._init_upsample(inner_channels // 4, inner_channels // 4, smooth=smooth, bias=bias),
|
|
|
|
nn.SiLU(inplace=True),
|
|
self._init_upsample(inner_channels // 4, 1, smooth=smooth, bias=bias),
|
|
nn.Sigmoid())
|
|
return self.thresh
|
|
|
|
def _init_upsample(self, in_channels, out_channels, smooth=False, bias=False):
|
|
if smooth:
|
|
inter_out_channels = out_channels
|
|
if out_channels == 1:
|
|
inter_out_channels = in_channels
|
|
module_list = [
|
|
nn.Upsample(scale_factor=2, mode='bilinear'),
|
|
nn.Conv2d(in_channels, inter_out_channels, 3, 1, 1, bias=bias)]
|
|
if out_channels == 1:
|
|
module_list.append(nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=1, bias=True))
|
|
return nn.Sequential(module_list)
|
|
else:
|
|
return nn.ConvTranspose2d(in_channels, out_channels, 4, 2, 1)
|
|
|
|
def step_function(self, x, y):
|
|
return torch.reciprocal(1 + torch.exp(-self.k * (x - y)))
|
|
|
|
class DBNetConvNext(nn.Module) :
|
|
def __init__(self) :
|
|
super(DBNetConvNext, self).__init__()
|
|
self.backbone = ConvNeXt(depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024])
|
|
|
|
self.conv_mask = nn.Sequential(
|
|
nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.SiLU(inplace=True),
|
|
nn.Conv2d(64, 32, kernel_size=3, padding=1), nn.SiLU(inplace=True),
|
|
nn.Conv2d(32, 1, kernel_size=1),
|
|
nn.Sigmoid()
|
|
)
|
|
|
|
self.down_conv1 = ConvNeXtStage(1024, 1024, depth = 2, norm_layer = LayerNorm2d)
|
|
self.down_conv2 = ConvNeXtStage(1024, 1024, depth = 2, norm_layer = LayerNorm2d)
|
|
|
|
self.upconv1 = UpconvSkip(0, 1024, 128)
|
|
self.upconv2 = UpconvSkip(128, 1024, 128)
|
|
self.upconv3 = UpconvSkip(128, 1024, 128)
|
|
self.upconv4 = UpconvSkip(128, 512, 128)
|
|
self.upconv5 = UpconvSkip(128, 256, 128)
|
|
self.upconv6 = UpconvSkip(128, 128, 64)
|
|
|
|
self.conv_db = DBHead(128)
|
|
|
|
def forward(self, x) :
|
|
|
|
x = self.backbone.stem(x)
|
|
h4 = self.backbone.stages[0](x)
|
|
h8 = self.backbone.stages[1](h4)
|
|
h16 = self.backbone.stages[2](h8)
|
|
h32 = self.backbone.stages[3](h16)
|
|
h64 = self.down_conv1(h32)
|
|
h128 = self.down_conv2(h64)
|
|
|
|
up128 = self.upconv1(h128)
|
|
up64 = self.upconv2(torch.cat([up128, h64], dim = 1))
|
|
up32 = self.upconv3(torch.cat([up64, h32], dim = 1))
|
|
up16 = self.upconv4(torch.cat([up32, h16], dim = 1))
|
|
up8 = self.upconv5(torch.cat([up16, h8], dim = 1))
|
|
up4 = self.upconv6(torch.cat([up8, h4], dim = 1))
|
|
|
|
return self.conv_db(up8), self.conv_mask(up4)
|
|
|
|
import os
|
|
from .default_utils import imgproc, dbnet_utils, craft_utils
|
|
from .common import OfflineDetector
|
|
from ..utils import TextBlock, Quadrilateral, det_rearrange_forward
|
|
|
|
MODEL = None
|
|
def det_batch_forward_default(batch: np.ndarray, device: str):
|
|
global MODEL
|
|
if isinstance(batch, list):
|
|
batch = np.array(batch)
|
|
batch = einops.rearrange(batch.astype(np.float32) / 127.5 - 1.0, 'n h w c -> n c h w')
|
|
batch = torch.from_numpy(batch).to(device)
|
|
with torch.no_grad():
|
|
db, mask = MODEL(batch)
|
|
db = db.sigmoid().cpu().numpy()
|
|
mask = mask.cpu().numpy()
|
|
return db, mask
|
|
|
|
|
|
class DBConvNextDetector(OfflineDetector):
|
|
_MODEL_MAPPING = {
|
|
'model': {
|
|
'url': '',
|
|
'hash': '',
|
|
'file': '.',
|
|
}
|
|
}
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
os.makedirs(self.model_dir, exist_ok=True)
|
|
if os.path.exists('dbnet_convnext.ckpt'):
|
|
shutil.move('dbnet_convnext.ckpt', self._get_file_path('dbnet_convnext.ckpt'))
|
|
super().__init__(*args, **kwargs)
|
|
|
|
async def _load(self, device: str):
|
|
self.model = DBNetConvNext()
|
|
sd = torch.load(self._get_file_path('dbnet_convnext.ckpt'), map_location='cpu')
|
|
self.model.load_state_dict(sd['model'] if 'model' in sd else sd)
|
|
self.model.eval()
|
|
self.device = device
|
|
if device == 'cuda' or device == 'mps':
|
|
self.model = self.model.to(self.device)
|
|
global MODEL
|
|
MODEL = self.model
|
|
|
|
async def _unload(self):
|
|
del self.model
|
|
|
|
async def _infer(self, image: np.ndarray, detect_size: int, text_threshold: float, box_threshold: float,
|
|
unclip_ratio: float, verbose: bool = False):
|
|
|
|
|
|
db, mask = det_rearrange_forward(image, det_batch_forward_default, detect_size, 4, device=self.device, verbose=verbose)
|
|
|
|
if db is None:
|
|
|
|
img_resized, target_ratio, _, pad_w, pad_h = imgproc.resize_aspect_ratio(cv2.bilateralFilter(image, 17, 80, 80), detect_size, cv2.INTER_LINEAR, mag_ratio = 1)
|
|
img_resized_h, img_resized_w = img_resized.shape[:2]
|
|
ratio_h = ratio_w = 1 / target_ratio
|
|
db, mask = det_batch_forward_default([img_resized], self.device)
|
|
else:
|
|
img_resized_h, img_resized_w = image.shape[:2]
|
|
ratio_w = ratio_h = 1
|
|
pad_h = pad_w = 0
|
|
self.logger.info(f'Detection resolution: {img_resized_w}x{img_resized_h}')
|
|
|
|
mask = mask[0, 0, :, :]
|
|
det = dbnet_utils.SegDetectorRepresenter(text_threshold, box_threshold, unclip_ratio=unclip_ratio)
|
|
|
|
boxes, scores = det({'shape':[(img_resized_h, img_resized_w)]}, db)
|
|
boxes, scores = boxes[0], scores[0]
|
|
if boxes.size == 0:
|
|
polys = []
|
|
else:
|
|
idx = boxes.reshape(boxes.shape[0], -1).sum(axis=1) > 0
|
|
polys, _ = boxes[idx], scores[idx]
|
|
polys = polys.astype(np.float64)
|
|
polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net=1)
|
|
polys = polys.astype(np.int16)
|
|
|
|
textlines = [Quadrilateral(pts.astype(int), '', score) for pts, score in zip(polys, scores)]
|
|
textlines = list(filter(lambda q: q.area > 16, textlines))
|
|
mask_resized = cv2.resize(mask, (mask.shape[1] * 2, mask.shape[0] * 2), interpolation=cv2.INTER_LINEAR)
|
|
if pad_h > 0:
|
|
mask_resized = mask_resized[:-pad_h, :]
|
|
elif pad_w > 0:
|
|
mask_resized = mask_resized[:, :-pad_w]
|
|
raw_mask = np.clip(mask_resized * 255, 0, 255).astype(np.uint8)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return textlines, raw_mask, None
|
|
|
|
|
|
if __name__ == '__main__' :
|
|
net = DBNetConvNext().cuda()
|
|
img = torch.randn(2, 3, 1536, 1536).cuda()
|
|
ret1, ret2 = net.forward(img)
|
|
print(ret1.shape)
|
|
print(ret2.shape)
|
|
|