|
|
|
|
|
|
|
"""
|
|
MIT License
|
|
Copyright (c) 2019 Microsoft
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
The above copyright notice and this permission notice shall be included in all
|
|
copies or substantial portions of the Software.
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
SOFTWARE.
|
|
"""
|
|
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.nn.functional as F
|
|
|
|
from detectron2.layers import ShapeSpec
|
|
from detectron2.modeling.backbone import BACKBONE_REGISTRY
|
|
from detectron2.modeling.backbone.backbone import Backbone
|
|
|
|
from .hrnet import build_pose_hrnet_backbone
|
|
|
|
|
|
class HRFPN(Backbone):
|
|
"""HRFPN (High Resolution Feature Pyramids)
|
|
Transforms outputs of HRNet backbone so they are suitable for the ROI_heads
|
|
arXiv: https://arxiv.org/abs/1904.04514
|
|
Adapted from https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/necks/hrfpn.py
|
|
Args:
|
|
bottom_up: (list) output of HRNet
|
|
in_features (list): names of the input features (output of HRNet)
|
|
in_channels (list): number of channels for each branch
|
|
out_channels (int): output channels of feature pyramids
|
|
n_out_features (int): number of output stages
|
|
pooling (str): pooling for generating feature pyramids (from {MAX, AVG})
|
|
share_conv (bool): Have one conv per output, or share one with all the outputs
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
bottom_up,
|
|
in_features,
|
|
n_out_features,
|
|
in_channels,
|
|
out_channels,
|
|
pooling="AVG",
|
|
share_conv=False,
|
|
):
|
|
super(HRFPN, self).__init__()
|
|
assert isinstance(in_channels, list)
|
|
self.bottom_up = bottom_up
|
|
self.in_features = in_features
|
|
self.n_out_features = n_out_features
|
|
self.in_channels = in_channels
|
|
self.out_channels = out_channels
|
|
self.num_ins = len(in_channels)
|
|
self.share_conv = share_conv
|
|
|
|
if self.share_conv:
|
|
self.fpn_conv = nn.Conv2d(
|
|
in_channels=out_channels, out_channels=out_channels, kernel_size=3, padding=1
|
|
)
|
|
else:
|
|
self.fpn_conv = nn.ModuleList()
|
|
for _ in range(self.n_out_features):
|
|
self.fpn_conv.append(
|
|
nn.Conv2d(
|
|
in_channels=out_channels,
|
|
out_channels=out_channels,
|
|
kernel_size=3,
|
|
padding=1,
|
|
)
|
|
)
|
|
|
|
|
|
self.interp_conv = nn.ModuleList()
|
|
for i in range(len(self.in_features)):
|
|
self.interp_conv.append(
|
|
nn.Sequential(
|
|
nn.ConvTranspose2d(
|
|
in_channels=in_channels[i],
|
|
out_channels=in_channels[i],
|
|
kernel_size=4,
|
|
stride=2**i,
|
|
padding=0,
|
|
output_padding=0,
|
|
bias=False,
|
|
),
|
|
nn.BatchNorm2d(in_channels[i], momentum=0.1),
|
|
nn.ReLU(inplace=True),
|
|
)
|
|
)
|
|
|
|
|
|
self.reduction_pooling_conv = nn.ModuleList()
|
|
for i in range(self.n_out_features):
|
|
self.reduction_pooling_conv.append(
|
|
nn.Sequential(
|
|
nn.Conv2d(sum(in_channels), out_channels, kernel_size=2**i, stride=2**i),
|
|
nn.BatchNorm2d(out_channels, momentum=0.1),
|
|
nn.ReLU(inplace=True),
|
|
)
|
|
)
|
|
|
|
if pooling == "MAX":
|
|
self.pooling = F.max_pool2d
|
|
else:
|
|
self.pooling = F.avg_pool2d
|
|
|
|
self._out_features = []
|
|
self._out_feature_channels = {}
|
|
self._out_feature_strides = {}
|
|
|
|
for i in range(self.n_out_features):
|
|
self._out_features.append("p%d" % (i + 1))
|
|
self._out_feature_channels.update({self._out_features[-1]: self.out_channels})
|
|
self._out_feature_strides.update({self._out_features[-1]: 2 ** (i + 2)})
|
|
|
|
|
|
def init_weights(self):
|
|
for m in self.modules():
|
|
if isinstance(m, nn.Conv2d):
|
|
nn.init.kaiming_normal_(m.weight, a=1)
|
|
nn.init.constant_(m.bias, 0)
|
|
|
|
def forward(self, inputs):
|
|
bottom_up_features = self.bottom_up(inputs)
|
|
assert len(bottom_up_features) == len(self.in_features)
|
|
inputs = [bottom_up_features[f] for f in self.in_features]
|
|
|
|
outs = []
|
|
for i in range(len(inputs)):
|
|
outs.append(self.interp_conv[i](inputs[i]))
|
|
shape_2 = min(o.shape[2] for o in outs)
|
|
shape_3 = min(o.shape[3] for o in outs)
|
|
out = torch.cat([o[:, :, :shape_2, :shape_3] for o in outs], dim=1)
|
|
outs = []
|
|
for i in range(self.n_out_features):
|
|
outs.append(self.reduction_pooling_conv[i](out))
|
|
for i in range(len(outs)):
|
|
outs[-1 - i] = outs[-1 - i][
|
|
:, :, : outs[-1].shape[2] * 2**i, : outs[-1].shape[3] * 2**i
|
|
]
|
|
outputs = []
|
|
for i in range(len(outs)):
|
|
if self.share_conv:
|
|
outputs.append(self.fpn_conv(outs[i]))
|
|
else:
|
|
outputs.append(self.fpn_conv[i](outs[i]))
|
|
|
|
assert len(self._out_features) == len(outputs)
|
|
return dict(zip(self._out_features, outputs))
|
|
|
|
|
|
@BACKBONE_REGISTRY.register()
|
|
def build_hrfpn_backbone(cfg, input_shape: ShapeSpec) -> HRFPN:
|
|
|
|
in_channels = cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS
|
|
in_features = ["p%d" % (i + 1) for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES)]
|
|
n_out_features = len(cfg.MODEL.ROI_HEADS.IN_FEATURES)
|
|
out_channels = cfg.MODEL.HRNET.HRFPN.OUT_CHANNELS
|
|
hrnet = build_pose_hrnet_backbone(cfg, input_shape)
|
|
hrfpn = HRFPN(
|
|
hrnet,
|
|
in_features,
|
|
n_out_features,
|
|
in_channels,
|
|
out_channels,
|
|
pooling="AVG",
|
|
share_conv=False,
|
|
)
|
|
|
|
return hrfpn
|
|
|