Spaces:
Runtime error
Runtime error
File size: 9,455 Bytes
f549064 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 |
# Copyright (c) OpenMMLab. All rights reserved.
import copy
from typing import List, Tuple
import torch
import torch.nn as nn
import torch.nn.functional as F
from mmcv.ops import nms
from mmengine.structures import InstanceData
from torch import Tensor
from mmdet.registry import MODELS
from mmdet.utils import ConfigType, InstanceList, MultiConfig, OptInstanceList
from .guided_anchor_head import GuidedAnchorHead
@MODELS.register_module()
class GARPNHead(GuidedAnchorHead):
"""Guided-Anchor-based RPN head."""
def __init__(self,
in_channels: int,
num_classes: int = 1,
init_cfg: MultiConfig = dict(
type='Normal',
layer='Conv2d',
std=0.01,
override=dict(
type='Normal',
name='conv_loc',
std=0.01,
bias_prob=0.01)),
**kwargs) -> None:
super().__init__(
num_classes=num_classes,
in_channels=in_channels,
init_cfg=init_cfg,
**kwargs)
def _init_layers(self) -> None:
"""Initialize layers of the head."""
self.rpn_conv = nn.Conv2d(
self.in_channels, self.feat_channels, 3, padding=1)
super(GARPNHead, self)._init_layers()
def forward_single(self, x: Tensor) -> Tuple[Tensor]:
"""Forward feature of a single scale level."""
x = self.rpn_conv(x)
x = F.relu(x, inplace=True)
(cls_score, bbox_pred, shape_pred,
loc_pred) = super().forward_single(x)
return cls_score, bbox_pred, shape_pred, loc_pred
def loss_by_feat(
self,
cls_scores: List[Tensor],
bbox_preds: List[Tensor],
shape_preds: List[Tensor],
loc_preds: List[Tensor],
batch_gt_instances: InstanceList,
batch_img_metas: List[dict],
batch_gt_instances_ignore: OptInstanceList = None) -> dict:
"""Calculate the loss based on the features extracted by the detection
head.
Args:
cls_scores (list[Tensor]): Box scores for each scale level
has shape (N, num_anchors * num_classes, H, W).
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level with shape (N, num_anchors * 4, H, W).
shape_preds (list[Tensor]): shape predictions for each scale
level with shape (N, 1, H, W).
loc_preds (list[Tensor]): location predictions for each scale
level with shape (N, num_anchors * 2, H, W).
batch_gt_instances (list[:obj:`InstanceData`]): Batch of
gt_instance. It usually includes ``bboxes`` and ``labels``
attributes.
batch_img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
batch_gt_instances_ignore (list[:obj:`InstanceData`], optional):
Batch of gt_instances_ignore. It includes ``bboxes`` attribute
data that is ignored during training and testing.
Defaults to None.
Returns:
dict: A dictionary of loss components.
"""
losses = super().loss_by_feat(
cls_scores,
bbox_preds,
shape_preds,
loc_preds,
batch_gt_instances,
batch_img_metas,
batch_gt_instances_ignore=batch_gt_instances_ignore)
return dict(
loss_rpn_cls=losses['loss_cls'],
loss_rpn_bbox=losses['loss_bbox'],
loss_anchor_shape=losses['loss_shape'],
loss_anchor_loc=losses['loss_loc'])
def _predict_by_feat_single(self,
cls_scores: List[Tensor],
bbox_preds: List[Tensor],
mlvl_anchors: List[Tensor],
mlvl_masks: List[Tensor],
img_meta: dict,
cfg: ConfigType,
rescale: bool = False) -> InstanceData:
"""Transform a single image's features extracted from the head into
bbox results.
Args:
cls_scores (list[Tensor]): Box scores from all scale
levels of a single image, each item has shape
(num_priors * num_classes, H, W).
bbox_preds (list[Tensor]): Box energies / deltas from
all scale levels of a single image, each item has shape
(num_priors * 4, H, W).
mlvl_anchors (list[Tensor]): Each element in the list is
the anchors of a single level in feature pyramid. it has
shape (num_priors, 4).
mlvl_masks (list[Tensor]): Each element in the list is location
masks of a single level.
img_meta (dict): Image meta info.
cfg (:obj:`ConfigDict` or dict): Test / postprocessing
configuration, if None, test_cfg would be used.
rescale (bool): If True, return boxes in original image space.
Defaults to False.
Returns:
:obj:`InstanceData`: Detection results of each image
after the post process.
Each item usually contains following keys.
- scores (Tensor): Classification scores, has a shape
(num_instance, )
- labels (Tensor): Labels of bboxes, has a shape (num_instances, ).
- bboxes (Tensor): Has a shape (num_instances, 4), the last
dimension 4 arrange as (x1, y1, x2, y2).
"""
cfg = self.test_cfg if cfg is None else cfg
cfg = copy.deepcopy(cfg)
assert cfg.nms.get('type', 'nms') == 'nms', 'GARPNHead only support ' \
'naive nms.'
mlvl_proposals = []
for idx in range(len(cls_scores)):
rpn_cls_score = cls_scores[idx]
rpn_bbox_pred = bbox_preds[idx]
anchors = mlvl_anchors[idx]
mask = mlvl_masks[idx]
assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]
# if no location is kept, end.
if mask.sum() == 0:
continue
rpn_cls_score = rpn_cls_score.permute(1, 2, 0)
if self.use_sigmoid_cls:
rpn_cls_score = rpn_cls_score.reshape(-1)
scores = rpn_cls_score.sigmoid()
else:
rpn_cls_score = rpn_cls_score.reshape(-1, 2)
# remind that we set FG labels to [0, num_class-1]
# since mmdet v2.0
# BG cat_id: num_class
scores = rpn_cls_score.softmax(dim=1)[:, :-1]
# filter scores, bbox_pred w.r.t. mask.
# anchors are filtered in get_anchors() beforehand.
scores = scores[mask]
rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1,
4)[mask, :]
if scores.dim() == 0:
rpn_bbox_pred = rpn_bbox_pred.unsqueeze(0)
anchors = anchors.unsqueeze(0)
scores = scores.unsqueeze(0)
# filter anchors, bbox_pred, scores w.r.t. scores
if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre:
_, topk_inds = scores.topk(cfg.nms_pre)
rpn_bbox_pred = rpn_bbox_pred[topk_inds, :]
anchors = anchors[topk_inds, :]
scores = scores[topk_inds]
# get proposals w.r.t. anchors and rpn_bbox_pred
proposals = self.bbox_coder.decode(
anchors, rpn_bbox_pred, max_shape=img_meta['img_shape'])
# filter out too small bboxes
if cfg.min_bbox_size >= 0:
w = proposals[:, 2] - proposals[:, 0]
h = proposals[:, 3] - proposals[:, 1]
valid_mask = (w > cfg.min_bbox_size) & (h > cfg.min_bbox_size)
if not valid_mask.all():
proposals = proposals[valid_mask]
scores = scores[valid_mask]
# NMS in current level
proposals, _ = nms(proposals, scores, cfg.nms.iou_threshold)
proposals = proposals[:cfg.nms_post, :]
mlvl_proposals.append(proposals)
proposals = torch.cat(mlvl_proposals, 0)
if cfg.get('nms_across_levels', False):
# NMS across multi levels
proposals, _ = nms(proposals[:, :4], proposals[:, -1],
cfg.nms.iou_threshold)
proposals = proposals[:cfg.max_per_img, :]
else:
scores = proposals[:, 4]
num = min(cfg.max_per_img, proposals.shape[0])
_, topk_inds = scores.topk(num)
proposals = proposals[topk_inds, :]
bboxes = proposals[:, :-1]
scores = proposals[:, -1]
if rescale:
assert img_meta.get('scale_factor') is not None
bboxes /= bboxes.new_tensor(img_meta['scale_factor']).repeat(
(1, 2))
results = InstanceData()
results.bboxes = bboxes
results.scores = scores
results.labels = scores.new_zeros(scores.size(0), dtype=torch.long)
return results
|