|
_base_ = [ |
|
'../_base_/datasets/coco_detection.py', |
|
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' |
|
] |
|
|
|
pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth' |
|
model = dict( |
|
type='ATSS', |
|
data_preprocessor=dict( |
|
type='DetDataPreprocessor', |
|
mean=[123.675, 116.28, 103.53], |
|
std=[58.395, 57.12, 57.375], |
|
bgr_to_rgb=True, |
|
pad_size_divisor=128), |
|
backbone=dict( |
|
type='SwinTransformer', |
|
pretrain_img_size=384, |
|
embed_dims=192, |
|
depths=[2, 2, 18, 2], |
|
num_heads=[6, 12, 24, 48], |
|
window_size=12, |
|
mlp_ratio=4, |
|
qkv_bias=True, |
|
qk_scale=None, |
|
drop_rate=0., |
|
attn_drop_rate=0., |
|
drop_path_rate=0.2, |
|
patch_norm=True, |
|
out_indices=(1, 2, 3), |
|
|
|
|
|
with_cp=False, |
|
convert_weights=True, |
|
init_cfg=dict(type='Pretrained', checkpoint=pretrained)), |
|
neck=[ |
|
dict( |
|
type='FPN', |
|
in_channels=[384, 768, 1536], |
|
out_channels=256, |
|
start_level=0, |
|
add_extra_convs='on_output', |
|
num_outs=5), |
|
dict( |
|
type='DyHead', |
|
in_channels=256, |
|
out_channels=256, |
|
num_blocks=6, |
|
|
|
zero_init_offset=False) |
|
], |
|
bbox_head=dict( |
|
type='ATSSHead', |
|
num_classes=80, |
|
in_channels=256, |
|
pred_kernel_size=1, |
|
stacked_convs=0, |
|
feat_channels=256, |
|
anchor_generator=dict( |
|
type='AnchorGenerator', |
|
ratios=[1.0], |
|
octave_base_scale=8, |
|
scales_per_octave=1, |
|
strides=[8, 16, 32, 64, 128], |
|
center_offset=0.5), |
|
bbox_coder=dict( |
|
type='DeltaXYWHBBoxCoder', |
|
target_means=[.0, .0, .0, .0], |
|
target_stds=[0.1, 0.1, 0.2, 0.2]), |
|
loss_cls=dict( |
|
type='FocalLoss', |
|
use_sigmoid=True, |
|
gamma=2.0, |
|
alpha=0.25, |
|
loss_weight=1.0), |
|
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), |
|
loss_centerness=dict( |
|
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), |
|
|
|
train_cfg=dict( |
|
assigner=dict(type='ATSSAssigner', topk=9), |
|
allowed_border=-1, |
|
pos_weight=-1, |
|
debug=False), |
|
test_cfg=dict( |
|
nms_pre=1000, |
|
min_bbox_size=0, |
|
score_thr=0.05, |
|
nms=dict(type='nms', iou_threshold=0.6), |
|
max_per_img=100)) |
|
|
|
|
|
train_pipeline = [ |
|
dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), |
|
dict(type='LoadAnnotations', with_bbox=True), |
|
dict( |
|
type='RandomResize', |
|
scale=[(2000, 480), (2000, 1200)], |
|
keep_ratio=True, |
|
backend='pillow'), |
|
dict(type='RandomFlip', prob=0.5), |
|
dict(type='PackDetInputs') |
|
] |
|
test_pipeline = [ |
|
dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), |
|
dict(type='Resize', scale=(2000, 1200), keep_ratio=True, backend='pillow'), |
|
dict(type='LoadAnnotations', with_bbox=True), |
|
dict( |
|
type='PackDetInputs', |
|
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', |
|
'scale_factor')) |
|
] |
|
train_dataloader = dict( |
|
dataset=dict( |
|
_delete_=True, |
|
type='RepeatDataset', |
|
times=2, |
|
dataset=dict( |
|
type={{_base_.dataset_type}}, |
|
data_root={{_base_.data_root}}, |
|
ann_file='annotations/instances_train2017.json', |
|
data_prefix=dict(img='train2017/'), |
|
filter_cfg=dict(filter_empty_gt=True, min_size=32), |
|
pipeline=train_pipeline, |
|
backend_args={{_base_.backend_args}}))) |
|
val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) |
|
test_dataloader = val_dataloader |
|
|
|
|
|
optim_wrapper = dict( |
|
_delete_=True, |
|
type='OptimWrapper', |
|
optimizer=dict( |
|
type='AdamW', lr=0.00005, betas=(0.9, 0.999), weight_decay=0.05), |
|
paramwise_cfg=dict( |
|
custom_keys={ |
|
'absolute_pos_embed': dict(decay_mult=0.), |
|
'relative_position_bias_table': dict(decay_mult=0.), |
|
'norm': dict(decay_mult=0.) |
|
}), |
|
clip_grad=None) |
|
|