|
from detectron2.config import LazyCall as L
|
|
from detectron2.layers import ShapeSpec
|
|
from detectron2.modeling.meta_arch import GeneralizedRCNN
|
|
from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
|
|
from detectron2.modeling.backbone import BasicStem, BottleneckBlock, ResNet
|
|
from detectron2.modeling.box_regression import Box2BoxTransform
|
|
from detectron2.modeling.matcher import Matcher
|
|
from detectron2.modeling.poolers import ROIPooler
|
|
from detectron2.modeling.proposal_generator import RPN, StandardRPNHead
|
|
from detectron2.modeling.roi_heads import (
|
|
FastRCNNOutputLayers,
|
|
MaskRCNNConvUpsampleHead,
|
|
Res5ROIHeads,
|
|
)
|
|
|
|
from ..data.constants import constants
|
|
|
|
model = L(GeneralizedRCNN)(
|
|
backbone=L(ResNet)(
|
|
stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
|
|
stages=L(ResNet.make_default_stages)(
|
|
depth=50,
|
|
stride_in_1x1=True,
|
|
norm="FrozenBN",
|
|
),
|
|
out_features=["res4"],
|
|
),
|
|
proposal_generator=L(RPN)(
|
|
in_features=["res4"],
|
|
head=L(StandardRPNHead)(in_channels=1024, num_anchors=15),
|
|
anchor_generator=L(DefaultAnchorGenerator)(
|
|
sizes=[[32, 64, 128, 256, 512]],
|
|
aspect_ratios=[0.5, 1.0, 2.0],
|
|
strides=[16],
|
|
offset=0.0,
|
|
),
|
|
anchor_matcher=L(Matcher)(
|
|
thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
|
|
),
|
|
box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
|
|
batch_size_per_image=256,
|
|
positive_fraction=0.5,
|
|
pre_nms_topk=(12000, 6000),
|
|
post_nms_topk=(2000, 1000),
|
|
nms_thresh=0.7,
|
|
),
|
|
roi_heads=L(Res5ROIHeads)(
|
|
num_classes=80,
|
|
batch_size_per_image=512,
|
|
positive_fraction=0.25,
|
|
proposal_matcher=L(Matcher)(
|
|
thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
|
|
),
|
|
in_features=["res4"],
|
|
pooler=L(ROIPooler)(
|
|
output_size=14,
|
|
scales=(1.0 / 16,),
|
|
sampling_ratio=0,
|
|
pooler_type="ROIAlignV2",
|
|
),
|
|
res5=L(ResNet.make_stage)(
|
|
block_class=BottleneckBlock,
|
|
num_blocks=3,
|
|
stride_per_block=[2, 1, 1],
|
|
in_channels=1024,
|
|
bottleneck_channels=512,
|
|
out_channels=2048,
|
|
norm="FrozenBN",
|
|
stride_in_1x1=True,
|
|
),
|
|
box_predictor=L(FastRCNNOutputLayers)(
|
|
input_shape=L(ShapeSpec)(channels="${...res5.out_channels}", height=1, width=1),
|
|
test_score_thresh=0.05,
|
|
box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
|
|
num_classes="${..num_classes}",
|
|
),
|
|
mask_head=L(MaskRCNNConvUpsampleHead)(
|
|
input_shape=L(ShapeSpec)(
|
|
channels="${...res5.out_channels}",
|
|
width="${...pooler.output_size}",
|
|
height="${...pooler.output_size}",
|
|
),
|
|
num_classes="${..num_classes}",
|
|
conv_dims=[256],
|
|
),
|
|
),
|
|
pixel_mean=constants.imagenet_bgr256_mean,
|
|
pixel_std=constants.imagenet_bgr256_std,
|
|
input_format="BGR",
|
|
)
|
|
|