Spaces:

jmliu
/

CJK-Text-Detection

Sleeping

App Files Files Community

jmliu commited on Apr 16, 2023

Commit

0742dfe

1 Parent(s): dbd0af1

Add app

Browse files

Files changed (18) hide show

app.py +136 -0
models/__init__.py +15 -0
models/loss.py +132 -0
models/model.py +69 -0
models/modules/__init__.py +6 -0
models/modules/mobilenetv2.py +178 -0
models/modules/resnet.py +300 -0
models/modules/segmentation_head.py +206 -0
models/modules/shufflenetv2.py +198 -0
requirements.txt +7 -0
utils/__init__.py +6 -0
utils/cal_recall/__init__.py +4 -0
utils/cal_recall/rrc_evaluation_funcs.py +394 -0
utils/cal_recall/script.py +325 -0
utils/make_trainfile.py +20 -0
utils/metrics.py +53 -0
utils/schedulers.py +64 -0
utils/util.py +137 -0

app.py ADDED Viewed

	@@ -0,0 +1,136 @@

+# -*- coding: utf-8 -*-
+import torch
+from torchvision import transforms
+import os
+import cv2
+import time
+import numpy as np
+import pyclipper
+import gradio as gr
+from models import get_model
+from utils.util import show_img, draw_bbox
+class Pytorch_model:
+    def __init__(self, model_path, gpu_id=None):
+        '''
+        初始化pytorch模型
+        :param model_path: 模型地址(可以是模型的参数或者参数和计算图一起保存的文件)
+        :param gpu_id: 在哪一块gpu上运行
+        '''
+        self.gpu_id = gpu_id
+        if self.gpu_id is not None and isinstance(self.gpu_id, int) and torch.cuda.is_available():
+            self.device = torch.device("cuda:%s" % self.gpu_id)
+            checkpoint = torch.load(model_path)
+        else:
+            self.device = torch.device("cpu")
+            checkpoint = torch.load(model_path, map_location='cpu')
+        print('device:', self.device)
+        config = checkpoint['config']
+        config['arch']['args']['pretrained'] = False
+        self.net = get_model(config)
+        self.img_channel = config['data_loader']['args']['dataset']['img_channel']
+        self.net.load_state_dict(checkpoint['state_dict']) ## load weights
+        self.net.to(self.device)
+        self.net.eval()
+    def predict(self, img, short_size: int = 736, min_area: int = 100):
+        '''
+        对传入的图像进行预测，支持图像地址, opencv读取图片，偏慢
+        :param img: the image as an np array
+        :param short_size:
+        :param min_area: 小于该尺度的bbox忽略
+        :return:
+        '''
+        img = cv2.imread(img)
+        if self.img_channel == 3:
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        h, w = img.shape[:2]
+        scale = short_size / min(h, w)
+        img = cv2.resize(img, None, fx=scale, fy=scale)
+        tensor = transforms.ToTensor()(img)
+        tensor = tensor.unsqueeze_(0)
+        tensor = tensor.to(self.device)
+        with torch.no_grad():
+            torch.cuda.synchronize(self.device)
+            start = time.time()
+            preds = self.net(tensor)[0]
+            torch.cuda.synchronize(self.device)
+            scale = (preds.shape[2] / w, preds.shape[1] / h)
+            t = time.time() - start
+        '''inference'''
+        start = time.time()
+        prob_map, thres_map = preds[0], preds[1]
+        ## Step 1: Use threshold to get the binary map
+        thr = 0.2
+        out = (prob_map > thr).float() * 255
+        out = out.data.cpu().numpy().astype(np.uint8)
+        # cv2.imwrite('c_bin_map.png', out)
+        ## Step 2: Connected components findContours
+        contours, hierarchy = cv2.findContours(out, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        contours = [(i / scale).astype(int) for i in contours if len(i)>=4]
+        # Step 3: Dilate the shrunk region (not necessary)
+        ratio_prime = 1.5
+        dilated_polys = []
+        for poly in contours:
+            poly = poly[:,0,:]
+            D_prime = cv2.contourArea(poly) * ratio_prime / cv2.arcLength(poly, True) # formula(10) in the thesis
+            pco = pyclipper.PyclipperOffset()
+            pco.AddPath(poly, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+            dilated_poly = np.array(pco.Execute(D_prime))
+            if dilated_poly.size == 0 or dilated_poly.dtype != int or len(dilated_poly) != 1:
+                continue
+            dilated_polys.append(dilated_poly)
+        boxes_list = []
+        for cnt in dilated_polys:
+            # print('=============')
+            # print(cnt)
+            # print(len(cnt))
+            if cv2.contourArea(cnt) < min_area:
+                continue
+            rect = cv2.minAreaRect(cnt)
+            box = (cv2.boxPoints(rect)).astype(int)
+            boxes_list.append(box)
+        t = time.time() - start + t
+        boxes_list = np.array(boxes_list)
+        return dilated_polys, boxes_list, t
+def run(img):
+    # os.environ["CUDA_VISIBLE_DEVICES"] = "5"
+    model_path = 'pre_trained/MobileNetv2_best_loss.pth'
+    # init model
+    model = Pytorch_model(model_path, gpu_id = None)  ## set GPU id or None if you only have cpu
+    contours, boxes_list, t = model.predict(img)
+    print('Time: %.4f' %t)
+    img = img[:, :, ::-1]
+    imgc = img.copy()
+    cv2.drawContours(imgc, contours, -1, (22,222,22), 2, cv2.LINE_AA)
+    return imgc
+    # cv2.imwrite('contour.png', imgc)
+    # img = draw_bbox(img, boxes_list)
+    # cv2.imwrite('predict.jpg', img)
+if __name__ == '__main__':
+    iface = gr.Interface(fn=run,
+        title="CJK Font Detection Using DBNet",
+        description="Gives an image containing CJK fonts, we will mark the texts out!",
+        inputs="image",
+        outputs="image")
+    iface.launch()

models/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# -*- coding: utf-8 -*-
+from .model import Model
+from .loss import DBLoss
+def get_model(config):
+    model_config = config['arch']['args']
+    return Model(model_config)
+def get_loss(config):
+    alpha = config['loss']['args']['alpha']
+    beta = config['loss']['args']['beta']
+    ohem_ratio = config['loss']['args']['ohem_ratio']
+    return DBLoss(alpha=alpha, beta=beta, ohem_ratio=ohem_ratio)

models/loss.py ADDED Viewed

	@@ -0,0 +1,132 @@

+# -*- coding: utf-8 -*-
+import itertools
+import torch
+from torch import nn
+import numpy as np
+import cv2
+import torchvision.transforms as transforms
+# import torchsnooper  ## for debug
+class DBLoss(nn.Module):
+    def __init__(self, alpha=1., beta=10., ohem_ratio=3):
+        """
+        Implement DB Loss.
+        :param alpha: loss binary_map 前面的系数
+        :param beta: loss threshold 前面的系数
+        :param ohem_ratio: OHEM的比例
+        """
+        super().__init__()
+        self.alpha = alpha
+        self.beta = beta
+        self.ohem_ratio = ohem_ratio
+    def forward(self, outputs, labels, training_masks, G_d):
+        """
+        Implement DB Loss.
+        :param outputs: N 2 H W
+        :param labels: N 2 H W
+        :param training_masks:
+        """
+        prob_map = outputs[:, 0, :, :]
+        thres_map = outputs[:, 1, :, :]
+        gt_prob = labels[:, 0, :, :]
+        gt_thres = labels[:, 1, :, :]
+        G_d = G_d.to(dtype = torch.float32)
+        training_masks = training_masks.to(dtype = torch.float32)
+        # OHEM mask (todo)
+        # selected_masks = self.ohem_batch(prob_map, gt_prob)
+        # selected_masks = selected_masks.to(outputs.device)
+        # 计算 prob loss
+        loss_prob = self.dice_loss(prob_map, gt_prob, training_masks)
+        # loss_prob = self.bce_loss(prob_map, gt_prob, selected_masks)
+        # 计算 binary map loss
+        bin_map = self.DB(prob_map, thres_map)
+        loss_bin = self.dice_loss(bin_map, gt_prob, training_masks)
+        # loss_prob = self.bce_loss(bin_map, gt_prob, selected_masks)
+        # 计算 threshold map loss
+        loss_fn = torch.nn.L1Loss(reduction='mean')
+        L1_loss = loss_fn(thres_map, gt_thres)
+        loss_thres = L1_loss * G_d
+        loss_prob = loss_prob.mean()
+        loss_bin = loss_bin.mean()
+        loss_thres = loss_thres.mean()
+        loss_all = loss_prob + self.alpha * loss_bin + self.beta * loss_thres
+        return loss_all, loss_prob, loss_bin, loss_thres
+    def DB(self, prob_map, thres_map, k=50):
+        '''
+        Differentiable binarization
+        another form: torch.sigmoid(k * (prob_map - thres_map))
+        '''
+        return 1. / (torch.exp((-k * (prob_map - thres_map))) + 1)
+    def dice_loss(self, pred_cls, gt_cls, training_mask):
+        '''
+        dice loss
+        此处默认真实值和预测值的格式均为 NCHW
+        :param gt_cls:
+        :param pred_cls:
+        :param training_mask:
+        :return:
+        '''
+        eps = 1e-5
+        intersection = torch.sum(gt_cls * pred_cls * training_mask)
+        union = torch.sum(gt_cls * training_mask) + torch.sum(pred_cls * training_mask) + eps
+        loss = 1. - (2 * intersection / union)
+        return loss
+    def bce_loss(self, input, target, mask):
+        if mask.sum() == 0:
+            return torch.tensor(0.0, device=input.device, requires_grad=True)
+        target[target <= 0.5] = 0
+        target[target > 0.5] = 1
+        input = input[mask.bool()]
+        target = target[mask.bool()]
+        loss = nn.BCELoss(reduction='mean')(input, target)
+        return loss
+    def ohem_single(self, score, gt_text):
+        pos_num = (int)(np.sum(gt_text > 0.5))
+        if pos_num == 0:
+            selected_mask = np.zeros_like(score)
+            selected_mask = selected_mask.reshape(1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32')
+            return selected_mask
+        neg_num = (int)(np.sum(gt_text <= 0.5))
+        neg_num = (int)(min(pos_num * self.ohem_ratio, neg_num))
+        if neg_num == 0:
+            selected_mask = np.zeros_like(score)
+            selected_mask = selected_mask.reshape(1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32')
+            return selected_mask
+        neg_score = score[gt_text <= 0.5]
+        neg_score_sorted = np.sort(-neg_score)
+        threshold = -neg_score_sorted[neg_num - 1]
+        selected_mask = (score >= threshold) | (gt_text > 0.5)
+        selected_mask = selected_mask.reshape(1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32')
+        return selected_mask
+    def ohem_batch(self, scores, gt_texts):
+        scores = scores.data.cpu().numpy()
+        gt_texts = gt_texts.data.cpu().numpy()
+        selected_masks = []
+        for i in range(scores.shape[0]):
+            selected_masks.append(self.ohem_single(scores[i, :, :], gt_texts[i, :, :]))
+        selected_masks = np.concatenate(selected_masks, 0)
+        selected_masks = torch.from_numpy(selected_masks).float()
+        return selected_masks

models/model.py ADDED Viewed

	@@ -0,0 +1,69 @@

+# -*- coding: utf-8 -*-
+import torch
+from torch import nn
+import torch.nn.functional as F
+from models.modules import *
+#from modules import *
+backbone_dict = {'resnet18': {'models': resnet18, 'out': [64, 128, 256, 512]},
+                 'resnet34': {'models': resnet34, 'out': [64, 128, 256, 512]},
+                 'resnet50': {'models': resnet50, 'out': [256, 512, 1024, 2048]},
+                 'resnet101': {'models': resnet101, 'out': [256, 512, 1024, 2048]},
+                 'resnet152': {'models': resnet152, 'out': [256, 512, 1024, 2048]},
+                 'resnext50_32x4d': {'models': resnext50_32x4d, 'out': [256, 512, 1024, 2048]},
+                 'resnext101_32x8d': {'models': resnext101_32x8d, 'out': [256, 512, 1024, 2048]},
+                 'shufflenetv2': {'models': shufflenet_v2_x1_0, 'out': [24, 116, 232, 464]},
+                 'mobilenetv2': {'models': mobilenet_v2_x1_0, 'out': [24, 40, 160, 160]}
+                 }
+segmentation_head_dict = {'FPN': FPN, 'FPEM_FFM': FPEM_FFM}
+# 'MobileNetV3_Large': {'models': MobileNetV3_Large, 'out': [24, 40, 160, 160]},
+# 'MobileNetV3_Small': {'models': MobileNetV3_Small, 'out': [16, 24, 48, 96]},
+# 'shufflenetv2': {'models': shufflenet_v2_x1_0, 'out': [24, 116, 232, 464]}}
+class Model(nn.Module):
+    def __init__(self, model_config: dict):
+        """
+        PANnet
+        :param model_config: 模型配置
+        """
+        super().__init__()
+        backbone = model_config['backbone']
+        pretrained = model_config['pretrained']
+        segmentation_head = model_config['segmentation_head']
+        assert backbone in backbone_dict, 'backbone must in: {}'.format(backbone_dict)
+        assert segmentation_head in segmentation_head_dict, 'segmentation_head must in: {}'.format(
+            segmentation_head_dict)
+        backbone_model, backbone_out = backbone_dict[backbone]['models'], backbone_dict[backbone]['out']
+        self.backbone = backbone_model(pretrained=pretrained)
+        self.segmentation_head = segmentation_head_dict[segmentation_head](backbone_out, **model_config)
+        self.name = '{}_{}'.format(backbone, segmentation_head)
+    def forward(self, x):
+        _, _, H, W = x.size()
+        backbone_out = self.backbone(x)
+        segmentation_head_out = self.segmentation_head(backbone_out)
+        y = segmentation_head_out
+        return y
+if __name__ == '__main__':
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    print(device)
+    x = torch.zeros(1, 3, 640, 640).to(device)
+    model_config = {
+        'backbone': 'mobilenetv2',
+        'fpem_repeat': 2,  # fpem模块重复的次数
+        'pretrained': False,  # backbone 是否使用imagesnet的预训练模型
+        'segmentation_head': 'FPN'  # 分割头，FPN or FPEM_FFM
+    }
+    model = Model(model_config=model_config).to(device)
+    y = model(x)
+    print(model)
+    #torch.save(model.state_dict(), 'PAN.pth')

models/modules/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+# -*- coding: utf-8 -*-
+from .resnet import *
+from .shufflenetv2 import *
+from .mobilenetv2 import *
+from .segmentation_head import FPEM_FFM,FPN

models/modules/mobilenetv2.py ADDED Viewed

	@@ -0,0 +1,178 @@

+import math
+import torch
+import torch.nn as nn
+from torch.hub import load_state_dict_from_url
+__all__ = ['mobilenet_v2_x1_0']
+model_urls = {
+    #currently hadn't found a pretrained weight
+    'mobilenet_v2_x1_0': None,
+}
+def _make_divisible(v, divisor, min_value=None):
+        """
+        This function is taken from the original tf repo.
+        It ensures that all layers have a channel number that is divisible by 8
+        It can be seen here:
+        https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
+        :param v:
+        :param divisor:
+        :param min_value:
+        :return:
+        """
+        if min_value is None:
+            min_value = divisor
+        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+        # Make sure that round down does not go down by more than 10%.
+        if new_v < 0.9 * v:
+            new_v += divisor
+        return new_v
+class InvertedResidual(nn.Module):
+    def __init__(self, inp, oup, stride, expand_ratio):
+        super(InvertedResidual, self).__init__()
+        if not (1 <= stride <= 2):
+            raise ValueError('illegal stride value')
+        self.stride = stride
+        self.exp_r = expand_ratio;
+        hidden_dim = round(inp * self.exp_r);
+        if self.exp_r == 1:
+            self.branch = nn.Sequential(
+                # dw conv
+                self.depthwise_conv(hidden_dim, hidden_dim, 3, stride, padding = 1, bias=False),
+                nn.BatchNorm2d(hidden_dim),
+                nn.ReLU6(inplace=True),
+                # pw-linear
+                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(oup),
+            )
+        else:
+            self.branch = nn.Sequential(
+                # pw
+                nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(hidden_dim),
+                nn.ReLU6(inplace=True),
+                # dw
+                self.depthwise_conv(hidden_dim, hidden_dim, 3, stride, padding = 1, bias=False),
+                nn.BatchNorm2d(hidden_dim),
+                nn.ReLU6(inplace=True),
+                # pw-linear
+                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(oup),
+            )
+        self.downsample = nn.Sequential(
+            nn.Conv2d(inp, oup, kernel_size=1, stride=stride, bias=False),
+            nn.BatchNorm2d(oup),
+        )
+        self.identity = stride == 1
+    @staticmethod
+    def depthwise_conv(i, o, kernel_size, stride, padding, bias=False):
+        return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i)
+    def forward(self, x):
+        if self.identity:
+            downsampx = self.downsample(x)
+            return downsampx + self.branch(x)
+        else:
+            return self.branch(x)
+class MobileNetV2(nn.Module):
+    def __init__(self,  stages_repeats, stages_out_channels, num_classes=1000, width_mult=1.):
+        super(MobileNetV2, self).__init__()
+        if len(stages_repeats) != 5:
+            raise ValueError('expected stages_repeats as list of 4 positive ints')
+        if len(stages_out_channels) != 6:
+            raise ValueError('expected stages_out_channels as list of 5 positive ints')
+        self._stage_out_channels = stages_out_channels
+        self.tlist = [1,6,6,6,6]
+        self.slist = [1,2,2,2,1]
+        input_channels = 3
+        output_channels = self._stage_out_channels[0] # 32
+        #output_channels = _make_divisible(output_channels * width_mult, 4 if width_mult == 0.1 else 8)
+        self.conv1 = nn.Sequential(
+            nn.Conv2d(input_channels, output_channels, 3, 2, 1, bias=False),
+            nn.BatchNorm2d(output_channels),
+            nn.ReLU6(inplace=True),
+        )
+        input_channels = output_channels
+        stage_names = ['stage{}'.format(i) for i in [2, 3, 4, 5, 6]]
+        for name, repeats, output_channels, t, s in zip(
+                stage_names, stages_repeats, self._stage_out_channels[1:], self.tlist, self.slist):
+            #output_channels = _make_divisible(output_channels * width_mult, 4 if width_mult == 0.1 else 8)
+            seq = [InvertedResidual(input_channels, output_channels, s, t)]
+            for i in range(repeats - 1):
+                seq.append(InvertedResidual(output_channels, output_channels, 1, t))
+            setattr(self, name, nn.Sequential(*seq))
+            input_channels = output_channels
+        output_channels = self._stage_out_channels[-1]
+        self.conv9 = nn.Sequential(
+            nn.Conv2d(input_channels, output_channels, 1, 1, 0, bias=False),
+            nn.BatchNorm2d(output_channels),
+            nn.ReLU6(inplace=True),
+        )
+    def forward(self, x):
+        x = self.conv1(x)
+        c2 = self.stage2(x)
+        c3 = self.stage3(c2)
+        c4 = self.stage4(c3)
+        c5 = self.stage5(c4)
+        c6 = self.stage6(c5)
+        #c7 = self.stage7(c6)
+        #c8 = self.stage8(c7)
+        #c9 = self.conv9(c8)
+        return c3, c4, c5, c6
+def _mobilenetv2(arch, pretrained, progress, *args, **kwargs):
+    model = MobileNetV2(*args, **kwargs)
+    if pretrained:
+        model_url = model_urls[arch]
+        if model_url is None:
+            raise NotImplementedError('pretrained {} is not supported as of now'.format(arch))
+        else:
+            state_dict = load_state_dict_from_url(model_url, progress=progress)
+            model.load_state_dict(state_dict,strict=False)
+    return model
+def mobilenet_v2_x1_0(pretrained=False, progress=True, **kwargs):
+    """
+    Constructs a ShuffleNetV2 with 0.5x output channels, as described in
+    `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
+    <https://arxiv.org/abs/1807.11164>`_.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _mobilenetv2('mobilenet_v2_x1_0', pretrained, progress,
+                         [1, 2, 3, 4, 3], [32, 16, 24, 40, 160, 160], **kwargs)

models/modules/resnet.py ADDED Viewed

	@@ -0,0 +1,300 @@

+# -*- coding: utf-8 -*-
+import torch.nn as nn
+from torch.hub import load_state_dict_from_url
+#from torchvision.models.utils import load_state_dict_from_url
+__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
+           'resnet152', 'resnext50_32x4d', 'resnext101_32x8d']
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+    'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
+    'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
+}
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=dilation, groups=groups, bias=False, dilation=dilation)
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None):
+        super(BasicBlock, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        if groups != 1 or base_width != 64:
+            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
+        if dilation > 1:
+            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
+        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = norm_layer(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = norm_layer(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None):
+        super(Bottleneck, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        width = int(planes * (base_width / 64.)) * groups
+        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv1x1(inplanes, width)
+        self.bn1 = norm_layer(width)
+        self.conv2 = conv3x3(width, width, stride, groups, dilation)
+        self.bn2 = norm_layer(width)
+        self.conv3 = conv1x1(width, planes * self.expansion)
+        self.bn3 = norm_layer(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class ResNet(nn.Module):
+    def __init__(self, block, layers, zero_init_residual=False,
+                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
+                 norm_layer=None):
+        super(ResNet, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self._norm_layer = norm_layer
+        self.inplanes = 64
+        self.dilation = 1
+        if replace_stride_with_dilation is None:
+            # each element in the tuple indicates if we should replace
+            # the 2x2 stride with a dilated convolution instead
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError("replace_stride_with_dilation should be None "
+                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+        self.groups = groups
+        self.base_width = width_per_group
+        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = norm_layer(self.inplanes)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
+                                       dilate=replace_stride_with_dilation[0])
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+                                       dilate=replace_stride_with_dilation[1])
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
+                                       dilate=replace_stride_with_dilation[2])
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)
+                elif isinstance(m, BasicBlock):
+                    nn.init.constant_(m.bn2.weight, 0)
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+        norm_layer = self._norm_layer
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
+                            self.base_width, previous_dilation, norm_layer))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes, groups=self.groups,
+                                base_width=self.base_width, dilation=self.dilation,
+                                norm_layer=norm_layer))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        c2 = self.layer1(x)
+        c3 = self.layer2(c2)
+        c4 = self.layer3(c3)
+        c5 = self.layer4(c4)
+        return c2, c3, c4, c5
+def _resnet(arch, block, layers, pretrained, progress, **kwargs):
+    model = ResNet(block, layers, **kwargs)
+    if pretrained:
+        state_dict = load_state_dict_from_url(model_urls[arch],
+                                              progress=progress)
+        model.load_state_dict(state_dict, strict=False)
+        print('load pretrained models from imagenet')
+    return model
+def resnet18(pretrained=False, progress=True, **kwargs):
+    """Constructs a ResNet-18 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
+                   **kwargs)
+def resnet34(pretrained=False, progress=True, **kwargs):
+    """Constructs a ResNet-34 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
+                   **kwargs)
+def resnet50(pretrained=False, progress=True, **kwargs):
+    """Constructs a ResNet-50 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
+                   **kwargs)
+def resnet101(pretrained=False, progress=True, **kwargs):
+    """Constructs a ResNet-101 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
+                   **kwargs)
+def resnet152(pretrained=False, progress=True, **kwargs):
+    """Constructs a ResNet-152 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
+                   **kwargs)
+def resnext50_32x4d(pretrained=False, progress=True, **kwargs):
+    """Constructs a ResNeXt-50 32x4d model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 4
+    return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
+                   pretrained, progress, **kwargs)
+def resnext101_32x8d(pretrained=False, progress=True, **kwargs):
+    """Constructs a ResNeXt-101 32x8d model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 8
+    return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
+                   pretrained, progress, **kwargs)
+if __name__ == '__main__':
+    import torch
+    x = torch.zeros(1, 3, 640, 640)
+    net = resnext101_32x8d(pretrained=False)
+    y = net(x)
+    for u in y:
+        print(u.shape)

models/modules/segmentation_head.py ADDED Viewed

	@@ -0,0 +1,206 @@

+# -*- coding: utf-8 -*-
+import torch
+from torch import nn
+import torch.nn.functional as F
+class FPN(nn.Module):
+    def __init__(self, backbone_out_channels, **kwargs):
+        """
+        :param backbone_out_channels: 基础网络输出的维度
+        :param kwargs:
+        """
+        super().__init__()
+        # result_num = kwargs.get('result_num', 6)
+        inplace = True
+        conv_out = 256
+        # reduce layers
+        self.reduce_conv_c2 = nn.Sequential(
+            nn.Conv2d(backbone_out_channels[0], conv_out, kernel_size=1, stride=1, padding=0),
+            nn.BatchNorm2d(conv_out),
+            nn.ReLU(inplace=inplace)
+        )
+        self.reduce_conv_c3 = nn.Sequential(
+            nn.Conv2d(backbone_out_channels[1], conv_out, kernel_size=1, stride=1, padding=0),
+            nn.BatchNorm2d(conv_out),
+            nn.ReLU(inplace=inplace)
+        )
+        self.reduce_conv_c4 = nn.Sequential(
+            nn.Conv2d(backbone_out_channels[2], conv_out, kernel_size=1, stride=1, padding=0),
+            nn.BatchNorm2d(conv_out),
+            nn.ReLU(inplace=inplace)
+        )
+        self.reduce_conv_c5 = nn.Sequential(
+            nn.Conv2d(backbone_out_channels[3], conv_out, kernel_size=1, stride=1, padding=0),
+            nn.BatchNorm2d(conv_out),
+            nn.ReLU(inplace=inplace)
+        )
+        # Smooth layers
+        self.smooth_p4 = nn.Sequential(
+            nn.Conv2d(conv_out, conv_out, kernel_size=3, stride=1, padding=1),
+            nn.BatchNorm2d(conv_out),
+            nn.ReLU(inplace=inplace)
+        )
+        self.smooth_p3 = nn.Sequential(
+            nn.Conv2d(conv_out, conv_out, kernel_size=3, stride=1, padding=1),
+            nn.BatchNorm2d(conv_out),
+            nn.ReLU(inplace=inplace)
+        )
+        self.smooth_p2 = nn.Sequential(
+            nn.Conv2d(conv_out, conv_out, kernel_size=3, stride=1, padding=1),
+            nn.BatchNorm2d(conv_out),
+            nn.ReLU(inplace=inplace)
+        )
+        self.conv = nn.Sequential(
+            nn.Conv2d(conv_out * 4, conv_out, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(conv_out),
+            nn.ReLU(inplace=inplace)
+        )
+        # self.out_conv = nn.Conv2d(conv_out, result_num, kernel_size=1, stride=1)
+        self.pred_conv = nn.Sequential(
+            nn.Conv2d(conv_out, 2, kernel_size=1, stride=1, padding=0),
+            nn.Sigmoid()
+        )
+    def forward(self, x):
+        c2, c3, c4, c5 = x
+        # Top-down
+        p5 = self.reduce_conv_c5(c5)
+        p4 = self._upsample_add(p5, self.reduce_conv_c4(c4))
+        p4 = self.smooth_p4(p4)
+        p3 = self._upsample_add(p4, self.reduce_conv_c3(c3))
+        p3 = self.smooth_p3(p3)
+        p2 = self._upsample_add(p3, self.reduce_conv_c2(c2))
+        p2 = self.smooth_p2(p2)
+        x = self._upsample_cat(p2, p3, p4, p5)
+        x = self.conv(x)
+        # x = self.out_conv(x)
+        x = self.pred_conv(x)
+        return x
+    def _upsample_add(self, x, y):
+        return F.interpolate(x, size=y.size()[2:], mode='bilinear', align_corners=True) + y
+    def _upsample_cat(self, p2, p3, p4, p5):
+        h, w = p2.size()[2:]
+        p3 = F.interpolate(p3, size=(h, w), mode='bilinear', align_corners=True)
+        p4 = F.interpolate(p4, size=(h, w), mode='bilinear', align_corners=True)
+        p5 = F.interpolate(p5, size=(h, w), mode='bilinear', align_corners=True)
+        return torch.cat([p2, p3, p4, p5], dim=1)
+class FPEM_FFM(nn.Module):
+    def __init__(self, backbone_out_channels, **kwargs):
+        """
+        PANnet
+        :param backbone_out_channels: 基础网络输出的维度
+        """
+        super().__init__()
+        fpem_repeat = kwargs.get('fpem_repeat', 2)
+        conv_out = 128
+        # reduce layers
+        self.reduce_conv_c2 = nn.Sequential(
+            nn.Conv2d(in_channels=backbone_out_channels[0], out_channels=conv_out, kernel_size=1),
+            nn.BatchNorm2d(conv_out),
+            nn.ReLU()
+        )
+        self.reduce_conv_c3 = nn.Sequential(
+            nn.Conv2d(in_channels=backbone_out_channels[1], out_channels=conv_out, kernel_size=1),
+            nn.BatchNorm2d(conv_out),
+            nn.ReLU()
+        )
+        self.reduce_conv_c4 = nn.Sequential(
+            nn.Conv2d(in_channels=backbone_out_channels[2], out_channels=conv_out, kernel_size=1),
+            nn.BatchNorm2d(conv_out),
+            nn.ReLU()
+        )
+        self.reduce_conv_c5 = nn.Sequential(
+            nn.Conv2d(in_channels=backbone_out_channels[3], out_channels=conv_out, kernel_size=1),
+            nn.BatchNorm2d(conv_out),
+            nn.ReLU()
+        )
+        self.fpems = nn.ModuleList()
+        for i in range(fpem_repeat):
+            self.fpems.append(FPEM(conv_out))
+        self.out_conv = nn.Conv2d(in_channels=conv_out * 4, out_channels=6, kernel_size=1)
+    def forward(self, x):
+        c2, c3, c4, c5 = x
+        # reduce channel
+        c2 = self.reduce_conv_c2(c2)
+        c3 = self.reduce_conv_c3(c3)
+        c4 = self.reduce_conv_c4(c4)
+        c5 = self.reduce_conv_c5(c5)
+        # FPEM
+        for i, fpem in enumerate(self.fpems):
+            c2, c3, c4, c5 = fpem(c2, c3, c4, c5)
+            if i == 0:
+                c2_ffm = c2
+                c3_ffm = c3
+                c4_ffm = c4
+                c5_ffm = c5
+            else:
+                c2_ffm += c2
+                c3_ffm += c3
+                c4_ffm += c4
+                c5_ffm += c5
+        # FFM
+        c5 = F.interpolate(c5_ffm, c2_ffm.size()[-2:], mode='bilinear')
+        c4 = F.interpolate(c4_ffm, c2_ffm.size()[-2:], mode='bilinear')
+        c3 = F.interpolate(c3_ffm, c2_ffm.size()[-2:], mode='bilinear')
+        Fy = torch.cat([c2_ffm, c3, c4, c5], dim=1)
+        y = self.out_conv(Fy)
+        return y
+class FPEM(nn.Module):
+    def __init__(self, in_channels=128):
+        super().__init__()
+        self.up_add1 = SeparableConv2d(in_channels, in_channels, 1)
+        self.up_add2 = SeparableConv2d(in_channels, in_channels, 1)
+        self.up_add3 = SeparableConv2d(in_channels, in_channels, 1)
+        self.down_add1 = SeparableConv2d(in_channels, in_channels, 2)
+        self.down_add2 = SeparableConv2d(in_channels, in_channels, 2)
+        self.down_add3 = SeparableConv2d(in_channels, in_channels, 2)
+    def forward(self, c2, c3, c4, c5):
+        # up阶段
+        c4 = self.up_add1(self._upsample_add(c5, c4))
+        c3 = self.up_add2(self._upsample_add(c4, c3))
+        c2 = self.up_add3(self._upsample_add(c3, c2))
+        # down 阶段
+        c3 = self.down_add1(self._upsample_add(c3, c2))
+        c4 = self.down_add2(self._upsample_add(c4, c3))
+        c5 = self.down_add3(self._upsample_add(c5, c4))
+        return c2, c3, c4, c5
+    def _upsample_add(self, x, y):
+        return F.interpolate(x, size=y.size()[2:], mode='bilinear') + y
+class SeparableConv2d(nn.Module):
+    def __init__(self, in_channels, out_channels, stride=1):
+        super(SeparableConv2d, self).__init__()
+        self.depthwise_conv = nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=3, padding=1,
+                                        stride=stride, groups=in_channels)
+        self.pointwise_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1)
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU()
+    def forward(self, x):
+        x = self.depthwise_conv(x)
+        x = self.pointwise_conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x

models/modules/shufflenetv2.py ADDED Viewed

	@@ -0,0 +1,198 @@

+# -*- coding: utf-8 -*-
+import torch
+import torch.nn as nn
+from torch.hub import load_state_dict_from_url
+#from torchvision.models.utils import load_state_dict_from_url
+__all__ = [
+    'ShuffleNetV2', 'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0',
+    'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0'
+]
+model_urls = {
+    'shufflenetv2_x0.5': 'https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth',
+    'shufflenetv2_x1.0': 'https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth',
+    'shufflenetv2_x1.5': None,
+    'shufflenetv2_x2.0': None,
+}
+def channel_shuffle(x, groups):
+    batchsize, num_channels, height, width = x.data.size()
+    channels_per_group = num_channels // groups
+    # reshape
+    x = x.view(batchsize, groups,
+               channels_per_group, height, width)
+    x = torch.transpose(x, 1, 2).contiguous()
+    # flatten
+    x = x.view(batchsize, -1, height, width)
+    return x
+class InvertedResidual(nn.Module):
+    def __init__(self, inp, oup, stride):
+        super(InvertedResidual, self).__init__()
+        if not (1 <= stride <= 3):
+            raise ValueError('illegal stride value')
+        self.stride = stride
+        branch_features = oup // 2
+        assert (self.stride != 1) or (inp == branch_features << 1)
+        if self.stride > 1:
+            self.branch1 = nn.Sequential(
+                self.depthwise_conv(inp, inp, kernel_size=3, stride=self.stride, padding=1),
+                nn.BatchNorm2d(inp),
+                nn.Conv2d(inp, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
+                nn.BatchNorm2d(branch_features),
+                nn.ReLU(inplace=True),
+            )
+        self.branch2 = nn.Sequential(
+            nn.Conv2d(inp if (self.stride > 1) else branch_features,
+                      branch_features, kernel_size=1, stride=1, padding=0, bias=False),
+            nn.BatchNorm2d(branch_features),
+            nn.ReLU(inplace=True),
+            self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1),
+            nn.BatchNorm2d(branch_features),
+            nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
+            nn.BatchNorm2d(branch_features),
+            nn.ReLU(inplace=True),
+        )
+    @staticmethod
+    def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False):
+        return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i)
+    def forward(self, x):
+        if self.stride == 1:
+            x1, x2 = x.chunk(2, dim=1)
+            out = torch.cat((x1, self.branch2(x2)), dim=1)
+        else:
+            out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
+        out = channel_shuffle(out, 2)
+        return out
+class ShuffleNetV2(nn.Module):
+    def __init__(self, stages_repeats, stages_out_channels, num_classes=1000):
+        super(ShuffleNetV2, self).__init__()
+        if len(stages_repeats) != 3:
+            raise ValueError('expected stages_repeats as list of 3 positive ints')
+        if len(stages_out_channels) != 5:
+            raise ValueError('expected stages_out_channels as list of 5 positive ints')
+        self._stage_out_channels = stages_out_channels
+        input_channels = 3
+        output_channels = self._stage_out_channels[0]
+        self.conv1 = nn.Sequential(
+            nn.Conv2d(input_channels, output_channels, 3, 2, 1, bias=False),
+            nn.BatchNorm2d(output_channels),
+            nn.ReLU(inplace=True),
+        )
+        input_channels = output_channels
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        stage_names = ['stage{}'.format(i) for i in [2, 3, 4]]
+        for name, repeats, output_channels in zip(
+                stage_names, stages_repeats, self._stage_out_channels[1:]):
+            seq = [InvertedResidual(input_channels, output_channels, 2)]
+            for i in range(repeats - 1):
+                seq.append(InvertedResidual(output_channels, output_channels, 1))
+            setattr(self, name, nn.Sequential(*seq))
+            input_channels = output_channels
+        output_channels = self._stage_out_channels[-1]
+        self.conv5 = nn.Sequential(
+            nn.Conv2d(input_channels, output_channels, 1, 1, 0, bias=False),
+            nn.BatchNorm2d(output_channels),
+            nn.ReLU(inplace=True),
+        )
+    def forward(self, x):
+        x = self.conv1(x)
+        c2 = self.maxpool(x)
+        c3 = self.stage2(c2)
+        c4 = self.stage3(c3)
+        c5 = self.stage4(c4)
+        # c5 = self.conv5(c5)
+        return c2, c3, c4, c5
+def _shufflenetv2(arch, pretrained, progress, *args, **kwargs):
+    model = ShuffleNetV2(*args, **kwargs)
+    if pretrained:
+        model_url = model_urls[arch]
+        if model_url is None:
+            raise NotImplementedError('pretrained {} is not supported as of now'.format(arch))
+        else:
+            state_dict = load_state_dict_from_url(model_url, progress=progress)
+            model.load_state_dict(state_dict,strict=False)
+    return model
+def shufflenet_v2_x0_5(pretrained=False, progress=True, **kwargs):
+    """
+    Constructs a ShuffleNetV2 with 0.5x output channels, as described in
+    `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
+    <https://arxiv.org/abs/1807.11164>`_.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _shufflenetv2('shufflenetv2_x0.5', pretrained, progress,
+                         [4, 8, 4], [24, 48, 96, 192, 1024], **kwargs)
+def shufflenet_v2_x1_0(pretrained=False, progress=True, **kwargs):
+    """
+    Constructs a ShuffleNetV2 with 1.0x output channels, as described in
+    `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
+    <https://arxiv.org/abs/1807.11164>`_.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _shufflenetv2('shufflenetv2_x1.0', pretrained, progress,
+                         [4, 8, 4], [24, 116, 232, 464, 1024], **kwargs)
+def shufflenet_v2_x1_5(pretrained=False, progress=True, **kwargs):
+    """
+    Constructs a ShuffleNetV2 with 1.5x output channels, as described in
+    `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
+    <https://arxiv.org/abs/1807.11164>`_.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _shufflenetv2('shufflenetv2_x1.5', pretrained, progress,
+                         [4, 8, 4], [24, 176, 352, 704, 1024], **kwargs)
+def shufflenet_v2_x2_0(pretrained=False, progress=True, **kwargs):
+    """
+    Constructs a ShuffleNetV2 with 2.0x output channels, as described in
+    `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
+    <https://arxiv.org/abs/1807.11164>`_.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _shufflenetv2('shufflenetv2_x2.0', pretrained, progress,
+                         [4, 8, 4], [24, 244, 488, 976, 2048], **kwargs)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+torch
+torchvision
+opencv-python
+numpy
+pyclipper
+gradio
+matplotlib

utils/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+# -*- coding: utf-8 -*-
+from .util import *
+from .metrics import *
+from .schedulers import *
+from .cal_recall.script import  cal_recall_precison_f1

utils/cal_recall/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# -*- coding: utf-8 -*-
+from .script import  cal_recall_precison_f1
+__all__ = ['cal_recall_precison_f1']

utils/cal_recall/rrc_evaluation_funcs.py ADDED Viewed

	@@ -0,0 +1,394 @@

+#!/usr/bin/env python2
+#encoding: UTF-8
+import json
+import sys;sys.path.append('./')
+import zipfile
+import re
+import sys
+import os
+import codecs
+import traceback
+import numpy as np
+from utils import order_points_clockwise
+def print_help():
+    sys.stdout.write('Usage: python %s.py -g=<gtFile> -s=<submFile> [-o=<outputFolder> -p=<jsonParams>]' %sys.argv[0])
+    sys.exit(2)
+def load_zip_file_keys(file,fileNameRegExp=''):
+    """
+    Returns an array with the entries of the ZIP file that match with the regular expression.
+    The key's are the names or the file or the capturing group definied in the fileNameRegExp
+    """
+    try:
+        archive=zipfile.ZipFile(file, mode='r', allowZip64=True)
+    except :
+        raise Exception('Error loading the ZIP archive.')
+    pairs = []
+    for name in archive.namelist():
+        addFile = True
+        keyName = name
+        if fileNameRegExp!="":
+            m = re.match(fileNameRegExp,name)
+            if m == None:
+                addFile = False
+            else:
+                if len(m.groups())>0:
+                    keyName = m.group(1)
+        if addFile:
+            pairs.append( keyName )
+    return pairs
+def load_zip_file(file,fileNameRegExp='',allEntries=False):
+    """
+    Returns an array with the contents (filtered by fileNameRegExp) of a ZIP file.
+    The key's are the names or the file or the capturing group definied in the fileNameRegExp
+    allEntries validates that all entries in the ZIP file pass the fileNameRegExp
+    """
+    try:
+        archive=zipfile.ZipFile(file, mode='r', allowZip64=True)
+    except :
+        raise Exception('Error loading the ZIP archive')
+    pairs = []
+    for name in archive.namelist():
+        addFile = True
+        keyName = name
+        if fileNameRegExp!="":
+            m = re.match(fileNameRegExp,name)
+            if m == None:
+                addFile = False
+            else:
+                if len(m.groups())>0:
+                    keyName = m.group(1)
+        if addFile:
+            pairs.append( [ keyName , archive.read(name)] )
+        else:
+            if allEntries:
+                raise Exception('ZIP entry not valid: %s' %name)
+    return dict(pairs)
+def load_folder_file(file, fileNameRegExp='', allEntries=False):
+    """
+    Returns an array with the contents (filtered by fileNameRegExp) of a ZIP file.
+    The key's are the names or the file or the capturing group definied in the fileNameRegExp
+    allEntries validates that all entries in the ZIP file pass the fileNameRegExp
+    """
+    pairs = []
+    for name in os.listdir(file):
+        addFile = True
+        keyName = name
+        print(name)
+        print(fileNameRegExp)
+        if fileNameRegExp != "":
+            m = re.match(fileNameRegExp, name)
+            print(m)
+            if m == None:
+                addFile = False
+            else:
+                if len(m.groups()) > 0:
+                    keyName = m.group(1)
+        if addFile:
+            pairs.append([keyName, open(os.path.join(file,name),encoding="utf-8").read()])
+        else:
+            if allEntries:
+                raise Exception('ZIP entry not valid: %s' % name)
+    return dict(pairs)
+def decode_utf8(raw):
+    """
+    Returns a Unicode object on success, or None on failure
+    """
+    try:
+        raw = codecs.decode(raw,'utf-8', 'replace')
+        #extracts BOM if exists
+        raw = raw.encode('utf8')
+        if raw.startswith(codecs.BOM_UTF8):
+            raw = raw.replace(codecs.BOM_UTF8, '', 1)
+        return raw.decode('utf-8')
+    except:
+       return None
+def validate_lines_in_file(fileName,file_contents,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
+    """
+    This function validates that all lines of the file calling the Line validation function for each line
+    """
+    utf8File = decode_utf8(file_contents)
+    if (utf8File is None) :
+        raise Exception("The file %s is not UTF-8" %fileName)
+    lines = utf8File.split( "\r\n" if CRLF else "\n" )
+    for line in lines:
+        line = line.replace("\r","").replace("\n","")
+        if(line != ""):
+            try:
+                validate_tl_line(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
+            except Exception as e:
+                raise Exception(("Line in sample not valid. Sample: %s Line: %s Error: %s" %(fileName,line,str(e))).encode('utf-8', 'replace'))
+def validate_tl_line(line,LTRB=True,withTranscription=True,withConfidence=True,imWidth=0,imHeight=0):
+    """
+    Validate the format of the line. If the line is not valid an exception will be raised.
+    If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+    Posible values are:
+    LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription]
+    LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription]
+    """
+    get_tl_line_values(line,LTRB,withTranscription,withConfidence,imWidth,imHeight)
+def get_tl_line_values(line,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0):
+    """
+    Validate the format of the line. If the line is not valid an exception will be raised.
+    If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+    Posible values are:
+    LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription]
+    LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription]
+    Returns values from a textline. Points , [Confidences], [Transcriptions]
+    """
+    confidence = 0.0
+    transcription = "";
+    points = []
+    numPoints = 4;
+    if LTRB:
+        numPoints = 4;
+        if withTranscription and withConfidence:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$',line)
+            if m == None :
+                m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$',line)
+                raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax,confidence,transcription")
+        elif withConfidence:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-1].?[0-9]*)\s*$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax,confidence")
+        elif withTranscription:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,(.*)$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax,transcription")
+        else:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,?\s*$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: xmin,ymin,xmax,ymax")
+        xmin = int(m.group(1))
+        ymin = int(m.group(2))
+        xmax = int(m.group(3))
+        ymax = int(m.group(4))
+        if(xmax<xmin):
+                raise Exception("Xmax value (%s) not valid (Xmax < Xmin)." %(xmax))
+        if(ymax<ymin):
+                raise Exception("Ymax value (%s)  not valid (Ymax < Ymin)." %(ymax))
+        points = [ float(m.group(i)) for i in range(1, (numPoints+1) ) ]
+        if (imWidth>0 and imHeight>0):
+            validate_point_inside_bounds(xmin,ymin,imWidth,imHeight);
+            validate_point_inside_bounds(xmax,ymax,imWidth,imHeight);
+    else:
+        numPoints = 8;
+        if withTranscription and withConfidence:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,confidence,transcription")
+        elif withConfidence:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-1].?[0-9]*)\s*$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,confidence")
+        elif withTranscription:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,(.*)$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,transcription")
+        else:
+            m = re.match(r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*$',line)
+            if m == None :
+                raise Exception("Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4")
+        points = [ float(m.group(i)) for i in range(1, (numPoints+1) ) ]
+        points = order_points_clockwise(np.array(points).reshape(-1, 2)).reshape(-1)
+        validate_clockwise_points(points)
+        if (imWidth>0 and imHeight>0):
+            validate_point_inside_bounds(points[0],points[1],imWidth,imHeight);
+            validate_point_inside_bounds(points[2],points[3],imWidth,imHeight);
+            validate_point_inside_bounds(points[4],points[5],imWidth,imHeight);
+            validate_point_inside_bounds(points[6],points[7],imWidth,imHeight);
+    if withConfidence:
+        try:
+            confidence = float(m.group(numPoints+1))
+        except ValueError:
+            raise Exception("Confidence value must be a float")
+    if withTranscription:
+        posTranscription = numPoints + (2 if withConfidence else 1)
+        transcription = m.group(posTranscription)
+        m2 = re.match(r'^\s*\"(.*)\"\s*$',transcription)
+        if m2 != None : #Transcription with double quotes, we extract the value and replace escaped characters
+            transcription = m2.group(1).replace("\\\\", "\\").replace("\\\"", "\"")
+    return points,confidence,transcription
+def validate_point_inside_bounds(x,y,imWidth,imHeight):
+    if(x<0 or x>imWidth):
+            raise Exception("X value (%s) not valid. Image dimensions: (%s,%s)" %(xmin,imWidth,imHeight))
+    if(y<0 or y>imHeight):
+            raise Exception("Y value (%s)  not valid. Image dimensions: (%s,%s) Sample: %s Line:%s" %(ymin,imWidth,imHeight))
+def validate_clockwise_points(points):
+    """
+    Validates that the points that the 4 points that dlimite a polygon are in clockwise order.
+    """
+    if len(points) != 8:
+        raise Exception("Points list not valid." + str(len(points)))
+    point = [
+                [int(points[0]) , int(points[1])],
+                [int(points[2]) , int(points[3])],
+                [int(points[4]) , int(points[5])],
+                [int(points[6]) , int(points[7])]
+            ]
+    edge = [
+                ( point[1][0] - point[0][0])*( point[1][1] + point[0][1]),
+                ( point[2][0] - point[1][0])*( point[2][1] + point[1][1]),
+                ( point[3][0] - point[2][0])*( point[3][1] + point[2][1]),
+                ( point[0][0] - point[3][0])*( point[0][1] + point[3][1])
+    ]
+    summatory = edge[0] + edge[1] + edge[2] + edge[3];
+    if summatory>0:
+        raise Exception("Points are not clockwise. The coordinates of bounding quadrilaterals have to be given in clockwise order. Regarding the correct interpretation of 'clockwise' remember that the image coordinate system used is the standard one, with the image origin at the upper left, the X axis extending to the right and Y axis extending downwards.")
+def get_tl_line_values_from_file_contents(content,CRLF=True,LTRB=True,withTranscription=False,withConfidence=False,imWidth=0,imHeight=0,sort_by_confidences=True):
+    """
+    Returns all points, confindences and transcriptions of a file in lists. Valid line formats:
+    xmin,ymin,xmax,ymax,[confidence],[transcription]
+    x1,y1,x2,y2,x3,y3,x4,y4,[confidence],[transcription]
+    """
+    pointsList = []
+    transcriptionsList = []
+    confidencesList = []
+    lines = content.split( "\r\n" if CRLF else "\n" )
+    for line in lines:
+        line = line.replace("\r","").replace("\n","")
+        if(line != "") :
+            points, confidence, transcription = get_tl_line_values(line,LTRB,withTranscription,withConfidence,imWidth,imHeight);
+            pointsList.append(points)
+            transcriptionsList.append(transcription)
+            confidencesList.append(confidence)
+    if withConfidence and len(confidencesList)>0 and sort_by_confidences:
+        import numpy as np
+        sorted_ind = np.argsort(-np.array(confidencesList))
+        confidencesList = [confidencesList[i] for i in sorted_ind]
+        pointsList = [pointsList[i] for i in sorted_ind]
+        transcriptionsList = [transcriptionsList[i] for i in sorted_ind]
+    return pointsList,confidencesList,transcriptionsList
+def main_evaluation(p,default_evaluation_params_fn,validate_data_fn,evaluate_method_fn,show_result=True,per_sample=True):
+    """
+    This process validates a method, evaluates it and if it succed generates a ZIP file with a JSON entry for each sample.
+    Params:
+    p: Dictionary of parmeters with the GT/submission locations. If None is passed, the parameters send by the system are used.
+    default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation
+    validate_data_fn: points to a method that validates the corrct format of the submission
+    evaluate_method_fn: points to a function that evaluated the submission and return a Dictionary with the results
+    """
+    evalParams = default_evaluation_params_fn()
+    if 'p' in p.keys():
+        evalParams.update( p['p'] if isinstance(p['p'], dict) else json.loads(p['p'][1:-1]) )
+    resDict={'calculated':True,'Message':'','method':'{}','per_sample':'{}'}
+    try:
+        # validate_data_fn(p['g'], p['s'], evalParams)
+        evalData = evaluate_method_fn(p['g'], p['s'], evalParams)
+        resDict.update(evalData)
+    except Exception as e:
+        traceback.print_exc()
+        resDict['Message']= str(e)
+        resDict['calculated']=False
+    if 'o' in p:
+        if not os.path.exists(p['o']):
+            os.makedirs(p['o'])
+        resultsOutputname = p['o'] + '/results.zip'
+        outZip = zipfile.ZipFile(resultsOutputname, mode='w', allowZip64=True)
+        del resDict['per_sample']
+        if 'output_items' in resDict.keys():
+            del resDict['output_items']
+        outZip.writestr('method.json',json.dumps(resDict))
+    if not resDict['calculated']:
+        if show_result:
+            sys.stderr.write('Error!\n'+ resDict['Message']+'\n\n')
+        if 'o' in p:
+            outZip.close()
+        return resDict
+    if 'o' in p:
+        if per_sample == True:
+            for k,v in evalData['per_sample'].iteritems():
+                outZip.writestr( k + '.json',json.dumps(v))
+            if 'output_items' in evalData.keys():
+                for k, v in evalData['output_items'].iteritems():
+                    outZip.writestr( k,v)
+        outZip.close()
+    if show_result:
+        sys.stdout.write("Calculated!")
+        sys.stdout.write(json.dumps(resDict['method']))
+    return resDict
+def main_validation(default_evaluation_params_fn,validate_data_fn):
+    """
+    This process validates a method
+    Params:
+    default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation
+    validate_data_fn: points to a method that validates the corrct format of the submission
+    """
+    try:
+        p = dict([s[1:].split('=') for s in sys.argv[1:]])
+        evalParams = default_evaluation_params_fn()
+        if 'p' in p.keys():
+            evalParams.update( p['p'] if isinstance(p['p'], dict) else json.loads(p['p'][1:-1]) )
+        validate_data_fn(p['g'], p['s'], evalParams)
+        print('SUCCESS')
+        sys.exit(0)
+    except Exception as e:
+        print(str(e))
+        sys.exit(101)

utils/cal_recall/script.py ADDED Viewed

	@@ -0,0 +1,325 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+from collections import namedtuple
+from . import rrc_evaluation_funcs
+#import Polygon as plg
+import shapely.geometry as plg
+import numpy as np
+def default_evaluation_params():
+    """
+    default_evaluation_params: Default parameters to use for the validation and evaluation.
+    """
+    return {
+        'IOU_CONSTRAINT': 0.5,
+        'AREA_PRECISION_CONSTRAINT': 0.5,
+        'GT_SAMPLE_NAME_2_ID': '.+_([0-9]+).txt',
+        'DET_SAMPLE_NAME_2_ID': 'res_.+_([0-9]+).txt',
+        'LTRB': False,  # LTRB:2points(left,top,right,bottom) or 4 points(x1,y1,x2,y2,x3,y3,x4,y4)
+        'CRLF': False,  # Lines are delimited by Windows CRLF format
+        'CONFIDENCES': False,  # Detections must include confidence value. AP will be calculated
+        'PER_SAMPLE_RESULTS': True  # Generate per sample results and produce data for visualization
+    }
+def validate_data(gtFilePath, submFilePath, evaluationParams):
+    """
+    Method validate_data: validates that all files in the results folder are correct (have the correct name contents).
+                            Validates also that there are no missing files in the folder.
+                            If some error detected, the method raises the error
+    """
+    gt = rrc_evaluation_funcs.load_folder_file(gtFilePath, evaluationParams['GT_SAMPLE_NAME_2_ID'])
+    subm = rrc_evaluation_funcs.load_folder_file(submFilePath, evaluationParams['DET_SAMPLE_NAME_2_ID'], True)
+    # Validate format of GroundTruth
+    for k in gt:
+        rrc_evaluation_funcs.validate_lines_in_file(k, gt[k], evaluationParams['CRLF'], evaluationParams['LTRB'], True)
+    # Validate format of results
+    for k in subm:
+        if (k in gt) == False:
+            raise Exception("The sample %s not present in GT" % k)
+        rrc_evaluation_funcs.validate_lines_in_file(k, subm[k], evaluationParams['CRLF'], evaluationParams['LTRB'],
+                                                    False, evaluationParams['CONFIDENCES'])
+def evaluate_method(gtFilePath, submFilePath, evaluationParams):
+    """
+    Method evaluate_method: evaluate method and returns the results
+        Results. Dictionary with the following values:
+        - method (required)  Global method metrics. Ex: { 'Precision':0.8,'Recall':0.9 }
+        - samples (optional) Per sample metrics. Ex: {'sample1' : { 'Precision':0.8,'Recall':0.9 } , 'sample2' : { 'Precision':0.8,'Recall':0.9 }
+    """
+    def polygon_from_points(points):
+        """
+        Returns a Polygon object to use with the Polygon2 class from a list of 8 points: x1,y1,x2,y2,x3,y3,x4,y4
+        """
+        resBoxes = np.empty([1, 8], dtype='int32')
+        resBoxes[0, 0] = int(points[0])
+        resBoxes[0, 4] = int(points[1])
+        resBoxes[0, 1] = int(points[2])
+        resBoxes[0, 5] = int(points[3])
+        resBoxes[0, 2] = int(points[4])
+        resBoxes[0, 6] = int(points[5])
+        resBoxes[0, 3] = int(points[6])
+        resBoxes[0, 7] = int(points[7])
+        pointMat = resBoxes[0].reshape([2, 4]).T
+        return plg.Polygon(pointMat)
+    def rectangle_to_polygon(rect):
+        resBoxes = np.empty([1, 8], dtype='int32')
+        resBoxes[0, 0] = int(rect.xmin)
+        resBoxes[0, 4] = int(rect.ymax)
+        resBoxes[0, 1] = int(rect.xmin)
+        resBoxes[0, 5] = int(rect.ymin)
+        resBoxes[0, 2] = int(rect.xmax)
+        resBoxes[0, 6] = int(rect.ymin)
+        resBoxes[0, 3] = int(rect.xmax)
+        resBoxes[0, 7] = int(rect.ymax)
+        pointMat = resBoxes[0].reshape([2, 4]).T
+        return plg.Polygon(pointMat)
+    def rectangle_to_points(rect):
+        points = [int(rect.xmin), int(rect.ymax), int(rect.xmax), int(rect.ymax), int(rect.xmax), int(rect.ymin),
+                  int(rect.xmin), int(rect.ymin)]
+        return points
+    def get_union(pD, pG):
+        areaA = pD.area;
+        areaB = pG.area;
+        return areaA + areaB - get_intersection(pD, pG);
+    def get_intersection_over_union(pD, pG):
+        try:
+            return get_intersection(pD, pG) / get_union(pD, pG);
+        except:
+            return 0
+    def get_intersection(pD, pG):
+        pInt = pD & pG
+        if pInt.is_empty:
+            return 0
+        return pInt.area
+    def compute_ap(confList, matchList, numGtCare):
+        correct = 0
+        AP = 0
+        if len(confList) > 0:
+            confList = np.array(confList)
+            matchList = np.array(matchList)
+            sorted_ind = np.argsort(-confList)
+            confList = confList[sorted_ind]
+            matchList = matchList[sorted_ind]
+            for n in range(len(confList)):
+                match = matchList[n]
+                if match:
+                    correct += 1
+                    AP += float(correct) / (n + 1)
+            if numGtCare > 0:
+                AP /= numGtCare
+        return AP
+    perSampleMetrics = {}
+    matchedSum = 0
+    Rectangle = namedtuple('Rectangle', 'xmin ymin xmax ymax')
+    gt = rrc_evaluation_funcs.load_folder_file(gtFilePath, evaluationParams['GT_SAMPLE_NAME_2_ID'])
+    subm = rrc_evaluation_funcs.load_folder_file(submFilePath, evaluationParams['DET_SAMPLE_NAME_2_ID'], True)
+    numGlobalCareGt = 0;
+    numGlobalCareDet = 0;
+    arrGlobalConfidences = [];
+    arrGlobalMatches = [];
+    for resFile in gt:
+        gtFile = gt[resFile]  # rrc_evaluation_funcs.decode_utf8(gt[resFile])
+        recall = 0
+        precision = 0
+        hmean = 0
+        detMatched = 0
+        iouMat = np.empty([1, 1])
+        gtPols = []
+        detPols = []
+        gtPolPoints = []
+        detPolPoints = []
+        # Array of Ground Truth Polygons' keys marked as don't Care
+        gtDontCarePolsNum = []
+        # Array of Detected Polygons' matched with a don't Care GT
+        detDontCarePolsNum = []
+        pairs = []
+        detMatchedNums = []
+        arrSampleConfidences = [];
+        arrSampleMatch = [];
+        sampleAP = 0;
+        evaluationLog = ""
+        pointsList, _, transcriptionsList = rrc_evaluation_funcs.get_tl_line_values_from_file_contents(gtFile,
+                                                                                                       evaluationParams[
+                                                                                                           'CRLF'],
+                                                                                                       evaluationParams[
+                                                                                                           'LTRB'],
+                                                                                                       True, False)
+        for n in range(len(pointsList)):
+            points = pointsList[n]
+            transcription = transcriptionsList[n]
+            dontCare = transcription == "###"
+            if evaluationParams['LTRB']:
+                gtRect = Rectangle(*points)
+                gtPol = rectangle_to_polygon(gtRect)
+            else:
+                gtPol = polygon_from_points(points)
+            gtPols.append(gtPol)
+            gtPolPoints.append(points)
+            if dontCare:
+                gtDontCarePolsNum.append(len(gtPols) - 1)
+        evaluationLog += "GT polygons: " + str(len(gtPols)) + (
+            " (" + str(len(gtDontCarePolsNum)) + " don't care)\n" if len(gtDontCarePolsNum) > 0 else "\n")
+        if resFile in subm:
+            detFile = subm[resFile]  # rrc_evaluation_funcs.decode_utf8(subm[resFile])
+            pointsList, confidencesList, _ = rrc_evaluation_funcs.get_tl_line_values_from_file_contents(detFile,
+                                                                                                        evaluationParams[
+                                                                                                            'CRLF'],
+                                                                                                        evaluationParams[
+                                                                                                            'LTRB'],
+                                                                                                        False,
+                                                                                                        evaluationParams[
+                                                                                                            'CONFIDENCES'])
+            for n in range(len(pointsList)):
+                points = pointsList[n]
+                if evaluationParams['LTRB']:
+                    detRect = Rectangle(*points)
+                    detPol = rectangle_to_polygon(detRect)
+                else:
+                    detPol = polygon_from_points(points)
+                detPols.append(detPol)
+                detPolPoints.append(points)
+                if len(gtDontCarePolsNum) > 0:
+                    for dontCarePol in gtDontCarePolsNum:
+                        dontCarePol = gtPols[dontCarePol]
+                        intersected_area = get_intersection(dontCarePol, detPol)
+                        pdDimensions = detPol.area
+                        precision = 0 if pdDimensions == 0 else intersected_area / pdDimensions
+                        if (precision > evaluationParams['AREA_PRECISION_CONSTRAINT']):
+                            detDontCarePolsNum.append(len(detPols) - 1)
+                            break
+            evaluationLog += "DET polygons: " + str(len(detPols)) + (
+                " (" + str(len(detDontCarePolsNum)) + " don't care)\n" if len(detDontCarePolsNum) > 0 else "\n")
+            if len(gtPols) > 0 and len(detPols) > 0:
+                # Calculate IoU and precision matrixs
+                outputShape = [len(gtPols), len(detPols)]
+                iouMat = np.empty(outputShape)
+                gtRectMat = np.zeros(len(gtPols), np.int8)
+                detRectMat = np.zeros(len(detPols), np.int8)
+                for gtNum in range(len(gtPols)):
+                    for detNum in range(len(detPols)):
+                        pG = gtPols[gtNum]
+                        pD = detPols[detNum]
+                        iouMat[gtNum, detNum] = get_intersection_over_union(pD, pG)
+                for gtNum in range(len(gtPols)):
+                    for detNum in range(len(detPols)):
+                        if gtRectMat[gtNum] == 0 and detRectMat[
+                            detNum] == 0 and gtNum not in gtDontCarePolsNum and detNum not in detDontCarePolsNum:
+                            if iouMat[gtNum, detNum] > evaluationParams['IOU_CONSTRAINT']:
+                                gtRectMat[gtNum] = 1
+                                detRectMat[detNum] = 1
+                                detMatched += 1
+                                pairs.append({'gt': gtNum, 'det': detNum})
+                                detMatchedNums.append(detNum)
+                                evaluationLog += "Match GT #" + str(gtNum) + " with Det #" + str(detNum) + "\n"
+            if evaluationParams['CONFIDENCES']:
+                for detNum in range(len(detPols)):
+                    if detNum not in detDontCarePolsNum:
+                        # we exclude the don't care detections
+                        match = detNum in detMatchedNums
+                        arrSampleConfidences.append(confidencesList[detNum])
+                        arrSampleMatch.append(match)
+                        arrGlobalConfidences.append(confidencesList[detNum]);
+                        arrGlobalMatches.append(match);
+        numGtCare = (len(gtPols) - len(gtDontCarePolsNum))
+        numDetCare = (len(detPols) - len(detDontCarePolsNum))
+        if numGtCare == 0:
+            recall = float(1)
+            precision = float(0) if numDetCare > 0 else float(1)
+            sampleAP = precision
+        else:
+            recall = float(detMatched) / numGtCare
+            precision = 0 if numDetCare == 0 else float(detMatched) / numDetCare
+            if evaluationParams['CONFIDENCES'] and evaluationParams['PER_SAMPLE_RESULTS']:
+                sampleAP = compute_ap(arrSampleConfidences, arrSampleMatch, numGtCare)
+        hmean = 0 if (precision + recall) == 0 else 2.0 * precision * recall / (precision + recall)
+        matchedSum += detMatched
+        numGlobalCareGt += numGtCare
+        numGlobalCareDet += numDetCare
+        if evaluationParams['PER_SAMPLE_RESULTS']:
+            perSampleMetrics[resFile] = {
+                'precision': precision,
+                'recall': recall,
+                'hmean': hmean,
+                'pairs': pairs,
+                'AP': sampleAP,
+                'iouMat': [] if len(detPols) > 100 else iouMat.tolist(),
+                'gtPolPoints': gtPolPoints,
+                'detPolPoints': detPolPoints,
+                'gtDontCare': gtDontCarePolsNum,
+                'detDontCare': detDontCarePolsNum,
+                'evaluationParams': evaluationParams,
+                'evaluationLog': evaluationLog
+            }
+    # Compute MAP and MAR
+    AP = 0
+    if evaluationParams['CONFIDENCES']:
+        AP = compute_ap(arrGlobalConfidences, arrGlobalMatches, numGlobalCareGt)
+    methodRecall = 0 if numGlobalCareGt == 0 else float(matchedSum) / numGlobalCareGt
+    methodPrecision = 0 if numGlobalCareDet == 0 else float(matchedSum) / numGlobalCareDet
+    methodHmean = 0 if methodRecall + methodPrecision == 0 else 2 * methodRecall * methodPrecision / (
+            methodRecall + methodPrecision)
+    methodMetrics = {'precision': methodPrecision, 'recall': methodRecall, 'hmean': methodHmean, 'AP': AP}
+    resDict = {'calculated': True, 'Message': '', 'method': methodMetrics, 'per_sample': perSampleMetrics}
+    return resDict;
+def cal_recall_precison_f1(gt_path, result_path, show_result=False):
+    p = {'g': gt_path, 's': result_path}
+    result = rrc_evaluation_funcs.main_evaluation(p, default_evaluation_params, validate_data, evaluate_method,
+                                                  show_result)
+    return result['method']

utils/make_trainfile.py ADDED Viewed

	@@ -0,0 +1,20 @@

+# -*- coding: utf-8 -*-
+import os
+import glob
+import pathlib
+data_path = r'E:\zj\dataset\icdar2015\test'
+# data_path/img 存放图片
+# data_path/gt 存放标签文件
+f_w = open(os.path.join(data_path, 'test.txt'), 'w', encoding='utf8')
+for img_path in glob.glob(data_path + '/img/*.jpg', recursive=True):
+    d = pathlib.Path(img_path)
+    label_path = os.path.join(data_path, 'gt', ('gt_' + str(d.stem) + '.txt'))
+    if os.path.exists(img_path) and os.path.exists(label_path):
+        print(img_path, label_path)
+    else:
+        print('不存在', img_path, label_path)
+    f_w.write('{}\t{}\n'.format(img_path, label_path))
+f_w.close()

utils/metrics.py ADDED Viewed

	@@ -0,0 +1,53 @@

+# Adapted from score written by wkentaro
+# https://github.com/wkentaro/pytorch-fcn/blob/master/torchfcn/utils.py
+import numpy as np
+class runningScore(object):
+    def __init__(self, n_classes):
+        self.n_classes = n_classes
+        self.confusion_matrix = np.zeros((n_classes, n_classes))
+    def _fast_hist(self, label_true, label_pred, n_class):
+        mask = (label_true >= 0) & (label_true < n_class)
+        if np.sum((label_pred[mask] < 0)) > 0:
+            print(label_pred[label_pred < 0])
+        hist = np.bincount(n_class * label_true[mask].astype(int) +
+                           label_pred[mask], minlength=n_class ** 2).reshape(n_class, n_class)
+        return hist
+    def update(self, label_trues, label_preds):
+        # print label_trues.dtype, label_preds.dtype
+        for lt, lp in zip(label_trues, label_preds):
+            try:
+                self.confusion_matrix += self._fast_hist(lt.flatten(), lp.flatten(), self.n_classes)
+            except:
+                pass
+    def get_scores(self):
+        """Returns accuracy score evaluation result.
+            - overall accuracy
+            - mean accuracy
+            - mean IU
+            - fwavacc
+        """
+        hist = self.confusion_matrix
+        acc = np.diag(hist).sum() / (hist.sum() + 0.0001)
+        acc_cls = np.diag(hist) / (hist.sum(axis=1) + 0.0001)
+        acc_cls = np.nanmean(acc_cls)
+        iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist) + 0.0001)
+        mean_iu = np.nanmean(iu)
+        freq = hist.sum(axis=1) / (hist.sum() + 0.0001)
+        fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
+        cls_iu = dict(zip(range(self.n_classes), iu))
+        return {'Overall Acc': acc,
+                'Mean Acc': acc_cls,
+                'FreqW Acc': fwavacc,
+                'Mean IoU': mean_iu, }, cls_iu
+    def reset(self):
+        self.confusion_matrix = np.zeros((self.n_classes, self.n_classes))

utils/schedulers.py ADDED Viewed

	@@ -0,0 +1,64 @@

+from torch.optim.lr_scheduler import _LRScheduler
+class ConstantLR(_LRScheduler):
+    def __init__(self, optimizer, last_epoch=-1):
+        super(ConstantLR, self).__init__(optimizer, last_epoch)
+    def get_lr(self):
+        return [base_lr for base_lr in self.base_lrs]
+class PolynomialLR(_LRScheduler):
+    def __init__(self, optimizer, max_iter, power=0.9, last_epoch=-1):
+        self.max_iter = max_iter
+        self.power = power
+        super(PolynomialLR, self).__init__(optimizer, last_epoch)
+    def get_lr(self):
+        factor = (1 - self.last_epoch / float(self.max_iter)) ** self.power
+        return [base_lr * factor for base_lr in self.base_lrs]
+class WarmUpLR(_LRScheduler):
+    def __init__(
+        self, optimizer, scheduler, mode="linear", warmup_iters=100, gamma=0.2, last_epoch=-1
+    ):
+        self.mode = mode
+        self.scheduler = scheduler
+        self.warmup_iters = warmup_iters
+        self.gamma = gamma
+        super(WarmUpLR, self).__init__(optimizer, last_epoch)
+    def get_lr(self):
+        cold_lrs = self.scheduler.get_lr()
+        if self.last_epoch < self.warmup_iters:
+            if self.mode == "linear":
+                alpha = self.last_epoch / float(self.warmup_iters)
+                factor = self.gamma * (1 - alpha) + alpha
+            elif self.mode == "constant":
+                factor = self.gamma
+            else:
+                raise KeyError("WarmUp type {} not implemented".format(self.mode))
+            return [factor * base_lr for base_lr in cold_lrs]
+        return cold_lrs
+if __name__ == '__main__':
+    import torch
+    from torchvision.models import resnet18
+    max_iter = 600 * 125
+    model = resnet18()
+    op = torch.optim.SGD(model.parameters(),0.001)
+    sc = PolynomialLR(op,max_iter)
+    lr = []
+    for i in range(max_iter):
+        sc.step()
+        print(i,sc.last_epoch,sc.get_lr()[0])
+        lr.append(sc.get_lr()[0])
+    from matplotlib import pyplot as plt
+    plt.plot(list(range(max_iter)),lr)
+    plt.show()

utils/util.py ADDED Viewed

	@@ -0,0 +1,137 @@

+# -*- coding: utf-8 -*-
+import time
+import json
+import cv2
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+def setup_logger(log_file_path: str = None):
+    import logging
+    from colorlog import ColoredFormatter
+    logging.basicConfig(filename=log_file_path,
+                        format='%(asctime)s %(levelname)-8s %(filename)s[line:%(lineno)d]: %(message)s',
+                        # 定义输出log的格式
+                        datefmt='%Y-%m-%d %H:%M:%S', )
+    """Return a logger with a default ColoredFormatter."""
+    formatter = ColoredFormatter(
+        "%(asctime)s %(log_color)s%(levelname)-8s %(reset)s %(filename)s[line:%(lineno)d]: %(message)s",
+        datefmt='%Y-%m-%d %H:%M:%S',
+        reset=True,
+        log_colors={
+            'DEBUG': 'blue',
+            'INFO': 'green',
+            'WARNING': 'yellow',
+            'ERROR': 'red',
+            'CRITICAL': 'red',
+        })
+    logger = logging.getLogger('PAN')
+    handler = logging.StreamHandler()
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+    logger.setLevel(logging.DEBUG)
+    logger.info('logger init finished')
+    return logger
+# --exeTime
+def exe_time(func):
+    def newFunc(*args, **args2):
+        t0 = time.time()
+        back = func(*args, **args2)
+        print("{} cost {:.3f}s".format(func.__name__, time.time() - t0))
+        return back
+    return newFunc
+def save_json(data, json_path):
+    with open(json_path, mode='w', encoding='utf8') as f:
+        json.dump(data, f, indent=4)
+def load_json(json_path):
+    with open(json_path, mode='r', encoding='utf8') as f:
+        data = json.load(f)
+    return data
+def show_img(imgs: np.ndarray, color=False):
+    if (len(imgs.shape) == 3 and color) or (len(imgs.shape) == 2 and not color):
+        imgs = np.expand_dims(imgs, axis=0)
+    for img in imgs:
+        plt.figure()
+        plt.imshow(img, cmap=None if color else 'gray')
+def draw_bbox(img_path, result, color=(128, 240, 128), thickness=3):
+    if isinstance(img_path, str):
+        img_path = cv2.imread(img_path)
+        # img_path = cv2.cvtColor(img_path, cv2.COLOR_BGR2RGB)
+    img_path = img_path.copy()
+    for point in result:
+        point = point.astype(int)
+        cv2.line(img_path, tuple(point[0]), tuple(point[1]), color, thickness)
+        cv2.line(img_path, tuple(point[1]), tuple(point[2]), color, thickness)
+        cv2.line(img_path, tuple(point[2]), tuple(point[3]), color, thickness)
+        cv2.line(img_path, tuple(point[3]), tuple(point[0]), color, thickness)
+    return img_path
+def cal_text_score(texts, gt_texts, training_masks, running_metric_text):
+    training_masks = training_masks.data.cpu().numpy()
+    pred_text = torch.sigmoid(texts).data.cpu().numpy() * training_masks
+    pred_text[pred_text <= 0.5] = 0
+    pred_text[pred_text > 0.5] = 1
+    pred_text = pred_text.astype(np.int32)
+    gt_text = gt_texts.data.cpu().numpy() * training_masks
+    gt_text = gt_text.astype(np.int32)
+    running_metric_text.update(gt_text, pred_text)
+    score_text, _ = running_metric_text.get_scores()
+    return score_text
+def cal_kernel_score(kernel, gt_kernel, gt_texts, training_masks, running_metric_kernel):
+    mask = (gt_texts * training_masks.float()).data.cpu().numpy()
+    pred_kernel = torch.sigmoid(kernel).data.cpu().numpy()
+    pred_kernel[pred_kernel <= 0.5] = 0
+    pred_kernel[pred_kernel > 0.5] = 1
+    pred_kernel = (pred_kernel * mask).astype(np.int32)
+    gt_kernel = gt_kernel.data.cpu().numpy()
+    gt_kernel = (gt_kernel * mask).astype(np.int32)
+    running_metric_kernel.update(gt_kernel, pred_kernel)
+    score_kernel, _ = running_metric_kernel.get_scores()
+    return score_kernel
+def order_points_clockwise(pts):
+    rect = np.zeros((4, 2), dtype="float32")
+    s = pts.sum(axis=1)
+    rect[0] = pts[np.argmin(s)]
+    rect[2] = pts[np.argmax(s)]
+    diff = np.diff(pts, axis=1)
+    rect[1] = pts[np.argmin(diff)]
+    rect[3] = pts[np.argmax(diff)]
+    return rect
+def order_points_clockwise_list(pts):
+    pts = pts.tolist()
+    pts.sort(key=lambda x: (x[1], x[0]))
+    pts[:2] = sorted(pts[:2], key=lambda x: x[0])
+    pts[2:] = sorted(pts[2:], key=lambda x: -x[0])
+    pts = np.array(pts)
+    return pts
+if __name__ == '__main__':
+    #box = np.array([382, 1080, 443, 999, 423, 1014, 362, 1095]).reshape(-1, 2)
+    box = np.array([0, 4, 2, 2, 0, 8, 4, 4]).reshape(-1, 2)
+    # box = np.array([0, 0, 2, 2, 0, 4, 4, 4]).reshape(-1, 2)
+    from scipy.spatial import ConvexHull
+    print(box)
+    print(order_points_clockwise(box))
+    print(order_points_clockwise_list(box))