_target_: det_map.map.map_agent.MapAgent _convert_: 'all' is_train: &is_train is_train: True point_cloud_range: &point_cloud_range pc_range: [ -15.0, -30.0, -10.0, 15.0, 30.0, 10.0 ] lidar_filter_ranges: &lidar_filter_ranges x_range: (-15.0, 15.0) y_range: (-30.0, 30.0) z_range: (-10.0, 10.0) voxel_size: &voxel_size voxel_size: [0.075, 0.075, 20.0] img_voxel_size: &img_voxel_size voxel_size: [0.3, 0.3, 20.0] dbound: &dbound dbound: [1.0, 35.0, 0.5] grid_config: &grid_config grid_config: x: (-15.0, 15.0, 0.6) y: (-30.0, 30.0, 0.6) z: (-10.0, 10.0, 20.0) depth: (1.0, 60.0, 0.5) img_norm_cfg : &img_norm_cfg img_norm_cfg: mean: [123.675, 116.28, 103.53] std: [58.395, 57.12, 57.375] to_rgb: True map_classes: &map_classes map_classes: ['divider', 'ped_crossing','boundary', 'centerline'] #fixed_ptsnum_per_gt_line: &fixed_ptsnum_per_gt_line # fixed_ptsnum_per_gt_line: 20 #fixed_ptsnum_per_pred_line: &fixed_ptsnum_per_pred_line # fixed_ptsnum_per_pred_line: 20 eval_use_same_gt_sample_num_flag: &eval_use_same_gt_sample_num_flag eval_use_same_gt_sample_num_flag: True #_pos_dim_: &_pos_dim_ # _pos_dim_: 128 #_ffn_dim_: &_ffn_dim_ # _ffn_dim_: 512 #_num_levels_: &_num_levels_ # _num_levels_: 1 #bev_h_: &bev_h_ # bev_h_: 100 #bev_w_: &bev_w_ # bev_w_: 200 #queue_length: &queue_length # queue_length: 1 aux_seg : &aux_seg_cfg aux_seg: use_aux_seg: False bev_seg: False pv_seg: False seg_classes: 1 feat_down_sample: 32 pv_thickness: 1 #z_cfg : &z_cfg # # pred_z_flag: True # gt_z_flag: True model: _target_: det_map.map.map_model.MapModel _convert_: 'all' use_grid_mask: True video_test_mode: False pretrained: img: ckpts/resnet50-19c8e357.pth img_backbone: type: ResNet depth: 50 num_stages: 4 out_indices: [3] frozen_stages: 1 norm_cfg: type: BN requires_grad: False norm_eval: True style: pytorch img_neck: type: FPN in_channels: [2048] out_channels: 256 start_level: 0 add_extra_convs: on_output num_outs: 1 relu_before_extra_convs: True pts_bbox_head: type: MapTRv2Head <<: *point_cloud_range bev_h: 100 bev_w: 50 num_query: 900 num_vec_one2one: 20 num_vec_one2many: 300 k_one2many: 6 num_pts_per_vec: 20 num_pts_per_gt_vec: 20 dir_interval: 1 query_embed_type: 'instance_pts' transform_method: 'minmax' gt_shift_pts_pattern: 'v2' num_classes: 2 in_channels: 256 sync_cls_avg_factor: True with_box_refine: True as_two_stage: False code_size: 2 code_weights: None <<: *aux_seg_cfg # z_cfg: *z_cfg transformer: type: MapTRPerceptionTransformer bev_h: 100 bev_w: 50 # fuser: # type: 'ConvFuser' # in_channels: [256, 256] # out_channels: 256 num_cams: 2 # z_cfg: *z_cfg rotate_prev_bev: False use_shift: True use_can_bus: False embed_dims: 256 encoder: type: 'SpatialDecoder' num_layers: 1 <<: *point_cloud_range grid_config: x: [-15.0, 15.0, 0.6] y: [-30.0, 30.0, 0.6] z: [ -10.0, 10.0, 20.0 ] data_config: input_size: [256, 704] transformerlayers: type: 'SpatialDecoderLayer' attn_cfgs: - type: 'SpatialCrossAttention' <<: *point_cloud_range num_cams: 2 dropout: 0.0 embed_dims: 256 deformable_attention: type: 'MSDeformableAttention' embed_dims: 256 num_points: 8 num_levels: 1 ffn_cfgs: type: 'FFN' embed_dims: 256 feedforward_channels: 1024 ffn_drop: 0.0 act_cfg: type: 'ReLU' inplace: True feedforward_channels: 1024 ffn_dropout: 0.0 operation_order: ['cross_attn', 'norm' ,'ffn', 'norm'] decoder: type: MapTRDecoder num_layers: 6 return_intermediate: True transformerlayers: type: DecoupledDetrTransformerDecoderLayer num_vec: 20 num_pts_per_vec: 20 attn_cfgs: - type: MultiheadAttention embed_dims: 256 num_heads: 8 dropout: 0.1 - type: MultiheadAttention embed_dims: 256 num_heads: 8 dropout: 0.1 - type: CustomMSDeformableAttention embed_dims: 256 num_levels: 1 feedforward_channels: 512 ffn_dropout: 0.1 operation_order: ['self_attn', 'norm', 'self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm'] positional_encoding: type: LearnedPositionalEncoding num_feats: 128 row_num_embed: 100 col_num_embed: 50 loss_cls: type: FocalLoss use_sigmoid: True gamma: 2.0 alpha: 0.25 loss_weight: 2.0 loss_bbox: type: L1Loss loss_weight: 0.0 loss_iou: type: GIoULoss loss_weight: 0.0 loss_pts: type: PtsL1Loss loss_weight: 5.0 loss_dir: type: PtsDirCosLoss loss_weight: 0.005 loss_seg: type: SimpleLoss pos_weight: 4.0 loss_weight: 1.0 loss_pv_seg: type: SimpleLoss pos_weight: 1.0 loss_weight: 2.0 # train_cfg: # pts: # grid_size: [512, 512, 1] # <<: *voxel_size # point_cloud_range: [ -15.0, -30.0, -10.0, 15.0, 30.0, 10.0 ] # out_size_factor: 4 # assigner: # type: MapTRAssigner # cls_cost: # type: FocalLossCost # weight: 2.0 # reg_cost: # type: BBoxL1Cost # weight: 0.0 # box_format: 'xywh' # iou_cost: # type: IoUCost # iou_mode: 'giou' # weight: 0.0 # pts_cost: # type: OrderedPtsL1Cost # weight: 5 # pc_range: [ -15.0, -30.0, -10.0, 15.0, 30.0, 10.0 ] pipelines: lidar_filter: _target_: det_map.data.pipelines.filter_lidar.LiDARFilter _convert_: 'all' close_radius: 1.0 <<: *lidar_filter_ranges # only include in training point_shuffle: _target_: det_map.data.pipelines.point_shuffle.PointShuffle <<: *is_train lidar_aug: _target_: det_map.data.pipelines.lidar_aug.LiDARAug bda_aug_conf: rot_lim: (-22.5 * 2, 22.5 * 2) scale_lim: (0.9, 1.1) flip_dx_ratio: 0.5 flip_dy_ratio: 0.5 tran_lim: (0.5, 0.5, 0.5) <<: *lidar_filter_ranges # if no aug for map, set this is_train to False <<: *is_train depth: _target_: det_map.data.pipelines.prepare_depth.LiDAR2Depth <<: *grid_config img: _target_: det_map.data.pipelines.prepare_img.PrepareImageInputs _convert_: 'all' opencv_pp: True # Flag should be False in Eval!!!! <<: *is_train data_config: input_size: (256, 704) src_size: (900, 1600) # Augmentation resize: (-0.06, 0.44) rot: (-5.4, 5.4) flip: True crop_h: (0.0, 0.0) random_crop_height: True vflip: True resize_test: 0.04 pmd: brightness_delta: 32 contrast_lower: 0.5 contrast_upper: 1.5 saturation_lower: 0.5 saturation_upper: 1.5 hue_delta: 18 rate: 0.5 #<<: *is_train checkpoint_path: null hidden_layer_dim: 512 lr: 1e-4