_base_ = [ '../_base_/datasets/coco_detection.py', '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' ] pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth' # noqa model = dict( type='ATSS', data_preprocessor=dict( type='DetDataPreprocessor', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], bgr_to_rgb=True, pad_size_divisor=128), backbone=dict( type='SwinTransformer', pretrain_img_size=384, embed_dims=192, depths=[2, 2, 18, 2], num_heads=[6, 12, 24, 48], window_size=12, mlp_ratio=4, qkv_bias=True, qk_scale=None, drop_rate=0., attn_drop_rate=0., drop_path_rate=0.2, patch_norm=True, out_indices=(1, 2, 3), # Please only add indices that would be used # in FPN, otherwise some parameter will not be used with_cp=False, convert_weights=True, init_cfg=dict(type='Pretrained', checkpoint=pretrained)), neck=[ dict( type='FPN', in_channels=[384, 768, 1536], out_channels=256, start_level=0, add_extra_convs='on_output', num_outs=5), dict( type='DyHead', in_channels=256, out_channels=256, num_blocks=6, # disable zero_init_offset to follow official implementation zero_init_offset=False) ], bbox_head=dict( type='ATSSHead', num_classes=80, in_channels=256, pred_kernel_size=1, # follow DyHead official implementation stacked_convs=0, feat_channels=256, anchor_generator=dict( type='AnchorGenerator', ratios=[1.0], octave_base_scale=8, scales_per_octave=1, strides=[8, 16, 32, 64, 128], center_offset=0.5), # follow DyHead official implementation bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[.0, .0, .0, .0], target_stds=[0.1, 0.1, 0.2, 0.2]), loss_cls=dict( type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), loss_bbox=dict(type='GIoULoss', loss_weight=2.0), loss_centerness=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), # training and testing settings train_cfg=dict( assigner=dict(type='ATSSAssigner', topk=9), allowed_border=-1, pos_weight=-1, debug=False), test_cfg=dict( nms_pre=1000, min_bbox_size=0, score_thr=0.05, nms=dict(type='nms', iou_threshold=0.6), max_per_img=100)) # dataset settings train_pipeline = [ dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict( type='RandomResize', scale=[(2000, 480), (2000, 1200)], keep_ratio=True, backend='pillow'), dict(type='RandomFlip', prob=0.5), dict(type='PackDetInputs') ] test_pipeline = [ dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='Resize', scale=(2000, 1200), keep_ratio=True, backend='pillow'), dict(type='LoadAnnotations', with_bbox=True), dict( type='PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor')) ] train_dataloader = dict( dataset=dict( _delete_=True, type='RepeatDataset', times=2, dataset=dict( type={{_base_.dataset_type}}, data_root={{_base_.data_root}}, ann_file='annotations/instances_train2017.json', data_prefix=dict(img='train2017/'), filter_cfg=dict(filter_empty_gt=True, min_size=32), pipeline=train_pipeline, backend_args={{_base_.backend_args}}))) val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) test_dataloader = val_dataloader # optimizer optim_wrapper = dict( _delete_=True, type='OptimWrapper', optimizer=dict( type='AdamW', lr=0.00005, betas=(0.9, 0.999), weight_decay=0.05), paramwise_cfg=dict( custom_keys={ 'absolute_pos_embed': dict(decay_mult=0.), 'relative_position_bias_table': dict(decay_mult=0.), 'norm': dict(decay_mult=0.) }), clip_grad=None)