|
from functools import partial
|
|
import torch.nn as nn
|
|
from detectron2.config import LazyCall as L
|
|
from detectron2.modeling import ViT, SimpleFeaturePyramid
|
|
from detectron2.modeling.backbone.fpn import LastLevelMaxPool
|
|
|
|
from .mask_rcnn_fpn import model
|
|
from ..data.constants import constants
|
|
|
|
model.pixel_mean = constants.imagenet_rgb256_mean
|
|
model.pixel_std = constants.imagenet_rgb256_std
|
|
model.input_format = "RGB"
|
|
|
|
|
|
embed_dim, depth, num_heads, dp = 768, 12, 12, 0.1
|
|
|
|
model.backbone = L(SimpleFeaturePyramid)(
|
|
net=L(ViT)(
|
|
img_size=1024,
|
|
patch_size=16,
|
|
embed_dim=embed_dim,
|
|
depth=depth,
|
|
num_heads=num_heads,
|
|
drop_path_rate=dp,
|
|
window_size=14,
|
|
mlp_ratio=4,
|
|
qkv_bias=True,
|
|
norm_layer=partial(nn.LayerNorm, eps=1e-6),
|
|
window_block_indexes=[
|
|
|
|
0,
|
|
1,
|
|
3,
|
|
4,
|
|
6,
|
|
7,
|
|
9,
|
|
10,
|
|
],
|
|
residual_block_indexes=[],
|
|
use_rel_pos=True,
|
|
out_feature="last_feat",
|
|
),
|
|
in_feature="${.net.out_feature}",
|
|
out_channels=256,
|
|
scale_factors=(4.0, 2.0, 1.0, 0.5),
|
|
top_block=L(LastLevelMaxPool)(),
|
|
norm="LN",
|
|
square_pad=1024,
|
|
)
|
|
|
|
model.roi_heads.box_head.conv_norm = model.roi_heads.mask_head.conv_norm = "LN"
|
|
|
|
|
|
model.proposal_generator.head.conv_dims = [-1, -1]
|
|
|
|
|
|
model.roi_heads.box_head.conv_dims = [256, 256, 256, 256]
|
|
model.roi_heads.box_head.fc_dims = [1024]
|
|
|