auto_scale_lr = dict(base_batch_size=16) backend_args = None batch_augments = [ dict(size=( 1024, 1024, ), type='BatchFixedSizePad'), ] classes = 'license_plate' custom_imports = dict( allow_failed_imports=False, imports=[ 'projects.CO-DETR.codetr', ]) data_root = '/home/worawit.tepsan/Project_AI/Detection/data' dataset_type = 'CocoDataset' default_hooks = dict( checkpoint=dict( _scope_='mmdet', by_epoch=True, interval=1, max_keep_ckpts=3, type='CheckpointHook'), logger=dict(_scope_='mmdet', interval=50, type='LoggerHook'), param_scheduler=dict(_scope_='mmdet', type='ParamSchedulerHook'), sampler_seed=dict(_scope_='mmdet', type='DistSamplerSeedHook'), timer=dict(_scope_='mmdet', type='IterTimerHook'), visualization=dict( _scope_='mmdet', draw=True, test_out_dir= '/home/worawit.tepsan/Project_AI/Detection/data_testing_LPR', type='DetVisualizationHook')) default_scope = 'mmdet' env_cfg = dict( cudnn_benchmark=False, dist_cfg=dict(backend='nccl'), mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) image_size = ( 1024, 1024, ) launcher = 'slurm' load_from = '/home/worawit.tepsan/Project_AI/Detection/object_detection/workdir/epoch_13.pth' load_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict( keep_ratio=True, ratio_range=( 0.1, 2.0, ), scale=( 1024, 1024, ), type='RandomResize'), dict( allow_negative_crop=True, crop_size=( 1024, 1024, ), crop_type='absolute_range', recompute_bbox=True, type='RandomCrop'), dict(min_gt_bbox_wh=( 0.01, 0.01, ), type='FilterAnnotations'), dict(prob=0.5, type='RandomFlip'), dict(pad_val=dict(img=( 114, 114, 114, )), size=( 1024, 1024, ), type='Pad'), ] log_level = 'INFO' log_processor = dict( _scope_='mmdet', by_epoch=True, type='LogProcessor', window_size=50) loss_lambda = 2.0 max_epochs = 32 max_iters = 270000 metainfo = dict(classes='license_plate') model = dict( backbone=dict( attn_drop_rate=0.0, convert_weights=True, depths=[ 2, 2, 18, 2, ], drop_path_rate=0.3, drop_rate=0.0, embed_dims=192, init_cfg=dict( checkpoint= '/home/worawit.tepsan/Project_AI/Detection/pretrained_models/swin_large_patch4_window12_384_22k.pth', type='Pretrained'), mlp_ratio=4, num_heads=[ 6, 12, 24, 48, ], out_indices=( 0, 1, 2, 3, ), patch_norm=True, pretrain_img_size=384, qk_scale=None, qkv_bias=True, type='SwinTransformer', window_size=12, with_cp=False), bbox_head=[ dict( anchor_generator=dict( octave_base_scale=8, ratios=[ 1.0, ], scales_per_octave=1, strides=[ 4, 8, 16, 32, 64, 128, ], type='AnchorGenerator'), bbox_coder=dict( target_means=[ 0.0, 0.0, 0.0, 0.0, ], target_stds=[ 0.1, 0.1, 0.2, 0.2, ], type='DeltaXYWHBBoxCoder'), feat_channels=256, in_channels=256, loss_bbox=dict(loss_weight=24.0, type='GIoULoss'), loss_centerness=dict( loss_weight=12.0, type='CrossEntropyLoss', use_sigmoid=True), loss_cls=dict( alpha=0.25, gamma=2.0, loss_weight=12.0, type='FocalLoss', use_sigmoid=True), num_classes=1, stacked_convs=1, type='CoATSSHead'), ], data_preprocessor=dict( batch_augments=None, bgr_to_rgb=True, mean=[ 123.675, 116.28, 103.53, ], pad_mask=False, std=[ 58.395, 57.12, 57.375, ], type='DetDataPreprocessor'), eval_module='detr', neck=dict( act_cfg=None, in_channels=[ 192, 384, 768, 1536, ], kernel_size=1, norm_cfg=dict(num_groups=32, type='GN'), num_outs=5, out_channels=256, type='ChannelMapper'), query_head=dict( as_two_stage=True, dn_cfg=dict( box_noise_scale=1.0, group_cfg=dict(dynamic=True, num_dn_queries=100, num_groups=None), label_noise_scale=0.5), in_channels=2048, loss_bbox=dict(loss_weight=5.0, type='L1Loss'), loss_cls=dict( beta=2.0, loss_weight=1.0, type='QualityFocalLoss', use_sigmoid=True), loss_iou=dict(loss_weight=2.0, type='GIoULoss'), num_classes=1, num_query=900, positional_encoding=dict( normalize=True, num_feats=128, temperature=20, type='SinePositionalEncoding'), transformer=dict( decoder=dict( num_layers=6, return_intermediate=True, transformerlayers=dict( attn_cfgs=[ dict( dropout=0.0, embed_dims=256, num_heads=8, type='MultiheadAttention'), dict( dropout=0.0, embed_dims=256, num_levels=5, type='MultiScaleDeformableAttention'), ], feedforward_channels=2048, ffn_dropout=0.0, operation_order=( 'self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm', ), type='DetrTransformerDecoderLayer'), type='DinoTransformerDecoder'), encoder=dict( num_layers=6, transformerlayers=dict( attn_cfgs=dict( dropout=0.0, embed_dims=256, num_levels=5, type='MultiScaleDeformableAttention'), feedforward_channels=2048, ffn_dropout=0.0, operation_order=( 'self_attn', 'norm', 'ffn', 'norm', ), type='BaseTransformerLayer'), type='DetrTransformerEncoder', with_cp=6), num_co_heads=2, num_feature_levels=5, type='CoDinoTransformer', with_coord_feat=False), type='CoDINOHead'), roi_head=[ dict( bbox_head=dict( bbox_coder=dict( target_means=[ 0.0, 0.0, 0.0, 0.0, ], target_stds=[ 0.1, 0.1, 0.2, 0.2, ], type='DeltaXYWHBBoxCoder'), fc_out_channels=1024, in_channels=256, loss_bbox=dict(loss_weight=120.0, type='GIoULoss'), loss_cls=dict( loss_weight=12.0, type='CrossEntropyLoss', use_sigmoid=False), num_classes=1, reg_class_agnostic=False, reg_decoded_bbox=True, roi_feat_size=7, type='Shared2FCBBoxHead'), bbox_roi_extractor=dict( featmap_strides=[ 4, 8, 16, 32, 64, ], finest_scale=56, out_channels=256, roi_layer=dict( output_size=7, sampling_ratio=0, type='RoIAlign'), type='SingleRoIExtractor'), type='CoStandardRoIHead'), ], rpn_head=dict( anchor_generator=dict( octave_base_scale=4, ratios=[ 0.5, 1.0, 2.0, ], scales_per_octave=3, strides=[ 4, 8, 16, 32, 64, 128, ], type='AnchorGenerator'), bbox_coder=dict( target_means=[ 0.0, 0.0, 0.0, 0.0, ], target_stds=[ 1.0, 1.0, 1.0, 1.0, ], type='DeltaXYWHBBoxCoder'), feat_channels=256, in_channels=256, loss_bbox=dict(loss_weight=12.0, type='L1Loss'), loss_cls=dict( loss_weight=12.0, type='CrossEntropyLoss', use_sigmoid=True), type='RPNHead'), test_cfg=[ dict(max_per_img=300, nms=dict(iou_threshold=0.8, type='soft_nms')), dict( rcnn=dict( max_per_img=100, nms=dict(iou_threshold=0.5, type='nms'), score_thr=0.0), rpn=dict( max_per_img=1000, min_bbox_size=0, nms=dict(iou_threshold=0.7, type='nms'), nms_pre=1000)), dict( max_per_img=100, min_bbox_size=0, nms=dict(iou_threshold=0.6, type='nms'), nms_pre=1000, score_thr=0.0), ], train_cfg=[ dict( assigner=dict( match_costs=[ dict(type='FocalLossCost', weight=2.0), dict(box_format='xywh', type='BBoxL1Cost', weight=5.0), dict(iou_mode='giou', type='IoUCost', weight=2.0), ], type='HungarianAssigner')), dict( rcnn=dict( assigner=dict( ignore_iof_thr=-1, match_low_quality=False, min_pos_iou=0.5, neg_iou_thr=0.5, pos_iou_thr=0.5, type='MaxIoUAssigner'), debug=False, pos_weight=-1, sampler=dict( add_gt_as_proposals=True, neg_pos_ub=-1, num=512, pos_fraction=0.25, type='RandomSampler')), rpn=dict( allowed_border=-1, assigner=dict( ignore_iof_thr=-1, match_low_quality=True, min_pos_iou=0.3, neg_iou_thr=0.3, pos_iou_thr=0.7, type='MaxIoUAssigner'), debug=False, pos_weight=-1, sampler=dict( add_gt_as_proposals=False, neg_pos_ub=-1, num=256, pos_fraction=0.5, type='RandomSampler')), rpn_proposal=dict( max_per_img=1000, min_bbox_size=0, nms=dict(iou_threshold=0.7, type='nms'), nms_pre=4000)), dict( allowed_border=-1, assigner=dict(topk=9, type='ATSSAssigner'), debug=False, pos_weight=-1), ], type='CoDETR', use_lsj=False) num_classes = 1 num_dec_layer = 6 optim_wrapper = dict( clip_grad=dict(max_norm=0.1, norm_type=2), optimizer=dict(lr=0.0002, type='AdamW', weight_decay=0.0001), paramwise_cfg=dict(custom_keys=dict(backbone=dict(lr_mult=0.1))), type='OptimWrapper') param_scheduler = [ dict( begin=0, by_epoch=True, end=12, gamma=0.1, milestones=[ 11, ], type='MultiStepLR'), ] pretrained = '/home/worawit.tepsan/Project_AI/Detection/pretrained_models/swin_large_patch4_window12_384_22k.pth' resume = False test_cfg = dict(_scope_='mmdet', type='TestLoop') test_dataloader = dict( batch_size=2, dataset=dict( _scope_='mmdet', ann_file='annotations/instances_test.json', data_prefix=dict(img='test/'), data_root='/home/worawit.tepsan/Project_AI/Detection/data', metainfo=dict(classes='license_plate'), pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(keep_ratio=True, scale=( 1333, 800, ), type='Resize'), dict( meta_keys=( 'img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', ), type='PackDetInputs'), ], test_mode=True, type='CocoDataset'), drop_last=False, num_workers=8, persistent_workers=True, sampler=dict(_scope_='mmdet', shuffle=False, type='DefaultSampler')) test_evaluator = dict( _scope_='mmdet', ann_file='annotations/instances_test.json', format_only=False, metric='bbox', outfile_prefix= '/home/worawit.tepsan/Project_AI/Detection/object_detection/workdir/coco_detection/test', type='CocoMetric') test_pipeline = [ dict(backend_args=None, type='LoadImageFromFile'), dict(keep_ratio=True, scale=( 1333, 800, ), type='Resize'), dict( meta_keys=( 'img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', ), type='PackDetInputs'), ] train_cfg = dict(max_epochs=32, type='EpochBasedTrainLoop', val_interval=1) train_dataloader = dict( batch_size=2, dataset=dict( ann_file='annotations/instances_train.json', backend_args=None, data_prefix=dict(img='train/'), data_root='/home/worawit.tepsan/Project_AI/Detection/data', filter_cfg=dict(filter_empty_gt=False, min_size=32), metainfo=dict(classes='license_plate'), pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(prob=0.5, type='RandomFlip'), dict( transforms=[ [ dict( keep_ratio=True, scales=[ ( 480, 1333, ), ( 512, 1333, ), ( 544, 1333, ), ( 576, 1333, ), ( 608, 1333, ), ( 640, 1333, ), ( 672, 1333, ), ( 704, 1333, ), ( 736, 1333, ), ( 768, 1333, ), ( 800, 1333, ), ], type='RandomChoiceResize'), ], [ dict( keep_ratio=True, scales=[ ( 400, 4200, ), ( 500, 4200, ), ( 600, 4200, ), ], type='RandomChoiceResize'), dict( allow_negative_crop=True, crop_size=( 384, 600, ), crop_type='absolute_range', type='RandomCrop'), dict( keep_ratio=True, scales=[ ( 480, 1333, ), ( 512, 1333, ), ( 544, 1333, ), ( 576, 1333, ), ( 608, 1333, ), ( 640, 1333, ), ( 672, 1333, ), ( 704, 1333, ), ( 736, 1333, ), ( 768, 1333, ), ( 800, 1333, ), ], type='RandomChoiceResize'), ], ], type='RandomChoice'), dict(type='PackDetInputs'), ], type='CocoDataset'), num_workers=2, persistent_workers=True, sampler=dict(_scope_='mmdet', shuffle=True, type='DefaultSampler')) train_pipeline = [ dict(backend_args=None, type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(prob=0.5, type='RandomFlip'), dict( transforms=[ [ dict( keep_ratio=True, scales=[ ( 480, 1333, ), ( 512, 1333, ), ( 544, 1333, ), ( 576, 1333, ), ( 608, 1333, ), ( 640, 1333, ), ( 672, 1333, ), ( 704, 1333, ), ( 736, 1333, ), ( 768, 1333, ), ( 800, 1333, ), ], type='RandomChoiceResize'), ], [ dict( keep_ratio=True, scales=[ ( 400, 4200, ), ( 500, 4200, ), ( 600, 4200, ), ], type='RandomChoiceResize'), dict( allow_negative_crop=True, crop_size=( 384, 600, ), crop_type='absolute_range', type='RandomCrop'), dict( keep_ratio=True, scales=[ ( 480, 1333, ), ( 512, 1333, ), ( 544, 1333, ), ( 576, 1333, ), ( 608, 1333, ), ( 640, 1333, ), ( 672, 1333, ), ( 704, 1333, ), ( 736, 1333, ), ( 768, 1333, ), ( 800, 1333, ), ], type='RandomChoiceResize'), ], ], type='RandomChoice'), dict(type='PackDetInputs'), ] val_cfg = dict(_scope_='mmdet', type='ValLoop') val_dataloader = dict( batch_size=2, dataset=dict( _scope_='mmdet', ann_file='annotations/instances_val.json', backend_args=None, data_prefix=dict(img='val/'), data_root='/home/worawit.tepsan/Project_AI/Detection/data', metainfo=dict(classes='license_plate'), pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(keep_ratio=True, scale=( 1333, 800, ), type='Resize'), dict(type='LoadAnnotations', with_bbox=True), dict( meta_keys=( 'img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', ), type='PackDetInputs'), ], test_mode=True, type='CocoDataset'), drop_last=False, num_workers=2, persistent_workers=True, sampler=dict(_scope_='mmdet', shuffle=False, type='DefaultSampler')) val_evaluator = dict( _scope_='mmdet', ann_file= '/home/worawit.tepsan/Project_AI/Detection/data/annotations/instances_val.json', backend_args=None, format_only=False, metric='bbox', type='CocoMetric') val_pipeline = [ dict(backend_args=None, type='LoadImageFromFile'), dict(keep_ratio=True, scale=( 1333, 800, ), type='Resize'), dict(type='LoadAnnotations', with_bbox=True), dict( meta_keys=( 'img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', ), type='PackDetInputs'), ] vis_backends = [ dict(_scope_='mmdet', type='LocalVisBackend'), ] visualizer = dict( _scope_='mmdet', name='visualizer', type='DetLocalVisualizer', vis_backends=[ dict(type='LocalVisBackend'), ]) work_dir = '/home/worawit.tepsan/Project_AI/Detection/object_detection/workdir'