| train_dataset_type = 'MultiViewCocoDataset' | |
| test_dataset_type = 'CocoDataset' | |
| data_root = 'data/coco/' | |
| classes = ['selective_search'] | |
| img_norm_cfg = dict( | |
| mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |
| load_pipeline = [ | |
| dict(type='LoadImageFromFile'), | |
| dict(type='LoadAnnotations', with_bbox=True, with_mask=False) | |
| ] | |
| train_pipeline1 = [ | |
| dict( | |
| type='Resize', | |
| img_scale=[(1600, 400), (1600, 1400)], | |
| multiscale_mode='range', | |
| keep_ratio=True), | |
| dict(type='FilterAnnotations', min_gt_bbox_wh=(0.01, 0.01)), | |
| dict(type='Pad', size_divisor=32), | |
| dict(type='RandFlip', flip_ratio=0.5), | |
| dict( | |
| type='OneOf', | |
| transforms=[ | |
| dict(type='Identity'), | |
| dict(type='AutoContrast'), | |
| dict(type='RandEqualize'), | |
| dict(type='RandSolarize'), | |
| dict(type='RandColor'), | |
| dict(type='RandContrast'), | |
| dict(type='RandBrightness'), | |
| dict(type='RandSharpness'), | |
| dict(type='RandPosterize') | |
| ]), | |
| dict( | |
| type='Normalize', | |
| mean=[123.675, 116.28, 103.53], | |
| std=[58.395, 57.12, 57.375], | |
| to_rgb=True), | |
| dict(type='DefaultFormatBundle'), | |
| dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) | |
| ] | |
| train_pipeline2 = [ | |
| dict( | |
| type='Resize', | |
| img_scale=[(1600, 400), (1600, 1400)], | |
| multiscale_mode='range', | |
| keep_ratio=True), | |
| dict(type='FilterAnnotations', min_gt_bbox_wh=(0.01, 0.01)), | |
| dict(type='Pad', size_divisor=32), | |
| dict(type='RandFlip', flip_ratio=0.5), | |
| dict( | |
| type='OneOf', | |
| transforms=[ | |
| dict(type='Identity'), | |
| dict(type='AutoContrast'), | |
| dict(type='RandEqualize'), | |
| dict(type='RandSolarize'), | |
| dict(type='RandColor'), | |
| dict(type='RandContrast'), | |
| dict(type='RandBrightness'), | |
| dict(type='RandSharpness'), | |
| dict(type='RandPosterize') | |
| ]), | |
| dict( | |
| type='Normalize', | |
| mean=[123.675, 116.28, 103.53], | |
| std=[58.395, 57.12, 57.375], | |
| to_rgb=True), | |
| dict(type='DefaultFormatBundle'), | |
| dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) | |
| ] | |
| test_pipeline = [ | |
| dict(type='LoadImageFromFile'), | |
| dict( | |
| type='MultiScaleFlipAug', | |
| img_scale=(1333, 800), | |
| flip=False, | |
| transforms=[ | |
| dict(type='Resize', keep_ratio=True), | |
| dict(type='RandomFlip'), | |
| dict( | |
| type='Normalize', | |
| mean=[123.675, 116.28, 103.53], | |
| std=[58.395, 57.12, 57.375], | |
| to_rgb=True), | |
| dict(type='Pad', size_divisor=32), | |
| dict(type='ImageToTensor', keys=['img']), | |
| dict(type='Collect', keys=['img']) | |
| ]) | |
| ] | |
| data = dict( | |
| samples_per_gpu=2, | |
| workers_per_gpu=2, | |
| train=dict( | |
| type='MultiViewCocoDataset', | |
| dataset=dict( | |
| type='CocoDataset', | |
| classes=['selective_search'], | |
| ann_file= | |
| 'data/coco/filtered_proposals/[email protected]', | |
| img_prefix='data/coco/train2017/', | |
| pipeline=[ | |
| dict(type='LoadImageFromFile'), | |
| dict(type='LoadAnnotations', with_bbox=True, with_mask=False) | |
| ]), | |
| num_views=2, | |
| pipelines=[[{ | |
| 'type': 'Resize', | |
| 'img_scale': [(1600, 400), (1600, 1400)], | |
| 'multiscale_mode': 'range', | |
| 'keep_ratio': True | |
| }, { | |
| 'type': 'FilterAnnotations', | |
| 'min_gt_bbox_wh': (0.01, 0.01) | |
| }, { | |
| 'type': 'Pad', | |
| 'size_divisor': 32 | |
| }, { | |
| 'type': 'RandFlip', | |
| 'flip_ratio': 0.5 | |
| }, { | |
| 'type': | |
| 'OneOf', | |
| 'transforms': [{ | |
| 'type': 'Identity' | |
| }, { | |
| 'type': 'AutoContrast' | |
| }, { | |
| 'type': 'RandEqualize' | |
| }, { | |
| 'type': 'RandSolarize' | |
| }, { | |
| 'type': 'RandColor' | |
| }, { | |
| 'type': 'RandContrast' | |
| }, { | |
| 'type': 'RandBrightness' | |
| }, { | |
| 'type': 'RandSharpness' | |
| }, { | |
| 'type': 'RandPosterize' | |
| }] | |
| }, { | |
| 'type': 'Normalize', | |
| 'mean': [123.675, 116.28, 103.53], | |
| 'std': [58.395, 57.12, 57.375], | |
| 'to_rgb': True | |
| }, { | |
| 'type': 'DefaultFormatBundle' | |
| }, { | |
| 'type': 'Collect', | |
| 'keys': ['img', 'gt_bboxes', 'gt_labels'] | |
| }], | |
| [{ | |
| 'type': 'Resize', | |
| 'img_scale': [(1600, 400), (1600, 1400)], | |
| 'multiscale_mode': 'range', | |
| 'keep_ratio': True | |
| }, { | |
| 'type': 'FilterAnnotations', | |
| 'min_gt_bbox_wh': (0.01, 0.01) | |
| }, { | |
| 'type': 'Pad', | |
| 'size_divisor': 32 | |
| }, { | |
| 'type': 'RandFlip', | |
| 'flip_ratio': 0.5 | |
| }, { | |
| 'type': | |
| 'OneOf', | |
| 'transforms': [{ | |
| 'type': 'Identity' | |
| }, { | |
| 'type': 'AutoContrast' | |
| }, { | |
| 'type': 'RandEqualize' | |
| }, { | |
| 'type': 'RandSolarize' | |
| }, { | |
| 'type': 'RandColor' | |
| }, { | |
| 'type': 'RandContrast' | |
| }, { | |
| 'type': 'RandBrightness' | |
| }, { | |
| 'type': 'RandSharpness' | |
| }, { | |
| 'type': 'RandPosterize' | |
| }] | |
| }, { | |
| 'type': 'Normalize', | |
| 'mean': [123.675, 116.28, 103.53], | |
| 'std': [58.395, 57.12, 57.375], | |
| 'to_rgb': True | |
| }, { | |
| 'type': 'DefaultFormatBundle' | |
| }, { | |
| 'type': 'Collect', | |
| 'keys': ['img', 'gt_bboxes', 'gt_labels'] | |
| }]]), | |
| val=dict( | |
| type='CocoDataset', | |
| classes=['selective_search'], | |
| ann_file='data/coco/annotations/instances_val2017.json', | |
| img_prefix='data/coco/val2017/', | |
| pipeline=[ | |
| dict(type='LoadImageFromFile'), | |
| dict( | |
| type='MultiScaleFlipAug', | |
| img_scale=(1333, 800), | |
| flip=False, | |
| transforms=[ | |
| dict(type='Resize', keep_ratio=True), | |
| dict(type='RandomFlip'), | |
| dict( | |
| type='Normalize', | |
| mean=[123.675, 116.28, 103.53], | |
| std=[58.395, 57.12, 57.375], | |
| to_rgb=True), | |
| dict(type='Pad', size_divisor=32), | |
| dict(type='ImageToTensor', keys=['img']), | |
| dict(type='Collect', keys=['img']) | |
| ]) | |
| ]), | |
| test=dict( | |
| type='CocoDataset', | |
| classes=['selective_search'], | |
| ann_file='data/coco/annotations/instances_val2017.json', | |
| img_prefix='data/coco/val2017/', | |
| pipeline=[ | |
| dict(type='LoadImageFromFile'), | |
| dict( | |
| type='MultiScaleFlipAug', | |
| img_scale=(1333, 800), | |
| flip=False, | |
| transforms=[ | |
| dict(type='Resize', keep_ratio=True), | |
| dict(type='RandomFlip'), | |
| dict( | |
| type='Normalize', | |
| mean=[123.675, 116.28, 103.53], | |
| std=[58.395, 57.12, 57.375], | |
| to_rgb=True), | |
| dict(type='Pad', size_divisor=32), | |
| dict(type='ImageToTensor', keys=['img']), | |
| dict(type='Collect', keys=['img']) | |
| ]) | |
| ])) | |
| evaluation = dict(interval=65535, gpu_collect=True) | |
| optimizer = dict( | |
| type='AdamW', | |
| lr=0.0001, | |
| betas=(0.9, 0.999), | |
| weight_decay=0.05, | |
| paramwise_cfg=dict( | |
| custom_keys=dict( | |
| absolute_pos_embed=dict(decay_mult=0.0), | |
| relative_position_bias_table=dict(decay_mult=0.0), | |
| norm=dict(decay_mult=0.0)))) | |
| optimizer_config = dict(grad_clip=None) | |
| lr_config = dict( | |
| policy='step', | |
| warmup='linear', | |
| warmup_iters=500, | |
| warmup_ratio=0.001, | |
| step=[8, 11]) | |
| runner = dict(type='EpochBasedRunner', max_epochs=12) | |
| checkpoint_config = dict(interval=1) | |
| log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')]) | |
| custom_hooks = [ | |
| dict(type='MomentumUpdateHook'), | |
| dict( | |
| type='MMDetWandbHook', | |
| init_kwargs=dict(project='I2B', group='pretrain'), | |
| interval=50, | |
| num_eval_images=0, | |
| log_checkpoint=False) | |
| ] | |
| dist_params = dict(backend='nccl') | |
| log_level = 'INFO' | |
| load_from = None | |
| resume_from = None | |
| workflow = [('train', 1)] | |
| opencv_num_threads = 0 | |
| mp_start_method = 'fork' | |
| auto_scale_lr = dict(enable=True, base_batch_size=16) | |
| custom_imports = dict( | |
| imports=[ | |
| 'mmselfsup.datasets.pipelines', | |
| 'selfsup.core.hook.momentum_update_hook', | |
| 'selfsup.datasets.pipelines.selfsup_pipelines', | |
| 'selfsup.datasets.pipelines.rand_aug', | |
| 'selfsup.datasets.single_view_coco', | |
| 'selfsup.datasets.multi_view_coco', | |
| 'selfsup.models.losses.contrastive_loss', | |
| 'selfsup.models.dense_heads.fcos_head', | |
| 'selfsup.models.dense_heads.retina_head', | |
| 'selfsup.models.dense_heads.detr_head', | |
| 'selfsup.models.dense_heads.deformable_detr_head', | |
| 'selfsup.models.roi_heads.bbox_heads.convfc_bbox_head', | |
| 'selfsup.models.roi_heads.standard_roi_head', | |
| 'selfsup.models.roi_heads.htc_roi_head', | |
| 'selfsup.models.roi_heads.cbv2_roi_head', | |
| 'selfsup.models.necks.cb_fpn', 'selfsup.models.backbones.cbv2', | |
| 'selfsup.models.backbones.swinv1', | |
| 'selfsup.models.detectors.selfsup_detector', | |
| 'selfsup.models.detectors.selfsup_fcos', | |
| 'selfsup.models.detectors.selfsup_detr', | |
| 'selfsup.models.detectors.selfsup_deformable_detr', | |
| 'selfsup.models.detectors.selfsup_retinanet', | |
| 'selfsup.models.detectors.selfsup_mask_rcnn', | |
| 'selfsup.models.detectors.selfsup_htc', | |
| 'selfsup.models.detectors.selfsup_cbv2', | |
| 'selfsup.models.detectors.cbv2', | |
| 'selfsup.core.bbox.assigners.hungarian_assigner', | |
| 'selfsup.core.bbox.assigners.pseudo_hungarian_assigner', | |
| 'selfsup.core.bbox.match_costs.match_cost' | |
| ], | |
| allow_failed_imports=False) | |
| model = dict( | |
| type='SelfSupDetector', | |
| backbone=dict( | |
| type='SelfSupCBv2', | |
| backbone=dict( | |
| type='CBSwinTransformer', | |
| embed_dim=192, | |
| depths=[2, 2, 18, 2], | |
| num_heads=[6, 12, 24, 48], | |
| window_size=7, | |
| mlp_ratio=4.0, | |
| qkv_bias=True, | |
| qk_scale=None, | |
| drop_rate=0.0, | |
| attn_drop_rate=0.0, | |
| drop_path_rate=0.2, | |
| ape=False, | |
| patch_norm=True, | |
| out_indices=(0, 1, 2, 3), | |
| pretrained= | |
| 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window7_224_22k.pth', | |
| use_checkpoint=False), | |
| neck=dict( | |
| type='CBFPN', | |
| in_channels=[192, 384, 768, 1536], | |
| out_channels=256, | |
| num_outs=5), | |
| rpn_head=dict( | |
| type='RPNHead', | |
| in_channels=256, | |
| feat_channels=256, | |
| anchor_generator=dict( | |
| type='AnchorGenerator', | |
| scales=[8], | |
| ratios=[0.5, 1.0, 2.0], | |
| strides=[4, 8, 16, 32, 64]), | |
| bbox_coder=dict( | |
| type='DeltaXYWHBBoxCoder', | |
| target_means=[0.0, 0.0, 0.0, 0.0], | |
| target_stds=[1.0, 1.0, 1.0, 1.0]), | |
| loss_cls=dict( | |
| type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), | |
| loss_bbox=dict( | |
| type='SmoothL1Loss', beta=0.1111111111111111, | |
| loss_weight=1.0)), | |
| roi_head=dict( | |
| type='SelfSupCBv2Head', | |
| interleaved=True, | |
| mask_info_flow=True, | |
| num_stages=3, | |
| stage_loss_weights=[1, 0.5, 0.25], | |
| bbox_roi_extractor=dict( | |
| type='SingleRoIExtractor', | |
| roi_layer=dict( | |
| type='RoIAlign', output_size=7, sampling_ratio=0), | |
| out_channels=256, | |
| featmap_strides=[4, 8, 16, 32]), | |
| bbox_head=[ | |
| dict( | |
| type='SelfSupShared4Conv1FCBBoxHead', | |
| in_channels=256, | |
| fc_out_channels=1024, | |
| roi_feat_size=7, | |
| num_classes=256, | |
| bbox_coder=dict( | |
| type='DeltaXYWHBBoxCoder', | |
| target_means=[0.0, 0.0, 0.0, 0.0], | |
| target_stds=[0.1, 0.1, 0.2, 0.2]), | |
| reg_class_agnostic=True, | |
| loss_cls=dict( | |
| type='ContrastiveLoss', | |
| loss_weight=1.0, | |
| temperature=0.5), | |
| loss_bbox=dict( | |
| type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), | |
| dict( | |
| type='SelfSupShared4Conv1FCBBoxHead', | |
| in_channels=256, | |
| fc_out_channels=1024, | |
| roi_feat_size=7, | |
| num_classes=256, | |
| bbox_coder=dict( | |
| type='DeltaXYWHBBoxCoder', | |
| target_means=[0.0, 0.0, 0.0, 0.0], | |
| target_stds=[0.05, 0.05, 0.1, 0.1]), | |
| reg_class_agnostic=True, | |
| loss_cls=dict( | |
| type='ContrastiveLoss', | |
| loss_weight=1.0, | |
| temperature=0.5), | |
| loss_bbox=dict( | |
| type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), | |
| dict( | |
| type='SelfSupShared4Conv1FCBBoxHead', | |
| in_channels=256, | |
| fc_out_channels=1024, | |
| roi_feat_size=7, | |
| num_classes=256, | |
| bbox_coder=dict( | |
| type='DeltaXYWHBBoxCoder', | |
| target_means=[0.0, 0.0, 0.0, 0.0], | |
| target_stds=[0.033, 0.033, 0.067, 0.067]), | |
| reg_class_agnostic=True, | |
| loss_cls=dict( | |
| type='ContrastiveLoss', | |
| loss_weight=1.0, | |
| temperature=0.5), | |
| loss_bbox=dict( | |
| type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |
| ], | |
| mask_roi_extractor=None, | |
| mask_head=None), | |
| train_cfg=dict( | |
| rpn=dict( | |
| assigner=dict( | |
| type='MaxIoUAssigner', | |
| pos_iou_thr=0.7, | |
| neg_iou_thr=0.3, | |
| min_pos_iou=0.3, | |
| ignore_iof_thr=-1), | |
| sampler=dict( | |
| type='RandomSampler', | |
| num=256, | |
| pos_fraction=0.5, | |
| neg_pos_ub=-1, | |
| add_gt_as_proposals=False), | |
| allowed_border=0, | |
| pos_weight=-1, | |
| debug=False), | |
| rpn_proposal=dict( | |
| nms_pre=2000, | |
| max_per_img=2000, | |
| nms=dict(type='nms', iou_threshold=0.7), | |
| min_bbox_size=0), | |
| rcnn=[ | |
| dict( | |
| assigner=dict( | |
| type='MaxIoUAssigner', | |
| pos_iou_thr=0.5, | |
| neg_iou_thr=0.5, | |
| min_pos_iou=0.5, | |
| ignore_iof_thr=-1), | |
| sampler=dict( | |
| type='RandomSampler', | |
| num=512, | |
| pos_fraction=0.25, | |
| neg_pos_ub=-1, | |
| add_gt_as_proposals=True), | |
| mask_size=28, | |
| pos_weight=-1, | |
| debug=False), | |
| dict( | |
| assigner=dict( | |
| type='MaxIoUAssigner', | |
| pos_iou_thr=0.6, | |
| neg_iou_thr=0.6, | |
| min_pos_iou=0.6, | |
| ignore_iof_thr=-1), | |
| sampler=dict( | |
| type='RandomSampler', | |
| num=512, | |
| pos_fraction=0.25, | |
| neg_pos_ub=-1, | |
| add_gt_as_proposals=True), | |
| mask_size=28, | |
| pos_weight=-1, | |
| debug=False), | |
| dict( | |
| assigner=dict( | |
| type='MaxIoUAssigner', | |
| pos_iou_thr=0.7, | |
| neg_iou_thr=0.7, | |
| min_pos_iou=0.7, | |
| ignore_iof_thr=-1), | |
| sampler=dict( | |
| type='RandomSampler', | |
| num=512, | |
| pos_fraction=0.25, | |
| neg_pos_ub=-1, | |
| add_gt_as_proposals=True), | |
| mask_size=28, | |
| pos_weight=-1, | |
| debug=False) | |
| ]), | |
| test_cfg=dict( | |
| rpn=dict( | |
| nms_pre=1000, | |
| max_per_img=1000, | |
| nms=dict(type='nms', iou_threshold=0.7), | |
| min_bbox_size=0), | |
| rcnn=dict( | |
| score_thr=0.001, | |
| nms=dict(type='nms', iou_threshold=0.5), | |
| max_per_img=100, | |
| mask_thr_binary=0.5)))) | |
| find_unused_parameters = True | |
| fp16 = dict(loss_scale='dynamic') | |
| work_dir = 'work_dirs/selfsup_cbv2_swin-L_1x_coco' | |
| auto_resume = False | |
| gpu_ids = range(0, 64) | |