|
_target_: det_map.map.map_agent.MapAgent |
|
_convert_: 'all' |
|
|
|
|
|
is_train: &is_train |
|
is_train: True |
|
|
|
point_cloud_range: &point_cloud_range |
|
pc_range: [ -15.0, -30.0, -10.0, 15.0, 30.0, 10.0 ] |
|
|
|
lidar_filter_ranges: &lidar_filter_ranges |
|
x_range: (-15.0, 15.0) |
|
y_range: (-30.0, 30.0) |
|
z_range: (-10.0, 10.0) |
|
|
|
voxel_size: &voxel_size |
|
voxel_size: [0.075, 0.075, 20.0] |
|
|
|
img_voxel_size: &img_voxel_size |
|
voxel_size: [0.3, 0.3, 20.0] |
|
|
|
|
|
dbound: &dbound |
|
dbound: [1.0, 35.0, 0.5] |
|
|
|
grid_config: &grid_config |
|
grid_config: |
|
x: (-15.0, 15.0, 0.6) |
|
y: (-30.0, 30.0, 0.6) |
|
z: (-10.0, 10.0, 20.0) |
|
depth: (1.0, 60.0, 0.5) |
|
|
|
img_norm_cfg : &img_norm_cfg |
|
img_norm_cfg: |
|
mean: [123.675, 116.28, 103.53] |
|
std: [58.395, 57.12, 57.375] |
|
to_rgb: True |
|
|
|
map_classes: &map_classes |
|
map_classes: ['divider', 'ped_crossing','boundary', 'centerline'] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
eval_use_same_gt_sample_num_flag: &eval_use_same_gt_sample_num_flag |
|
eval_use_same_gt_sample_num_flag: True |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
aux_seg : &aux_seg_cfg |
|
aux_seg: |
|
use_aux_seg: False |
|
bev_seg: False |
|
pv_seg: False |
|
seg_classes: 1 |
|
feat_down_sample: 32 |
|
pv_thickness: 1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
model: |
|
_target_: det_map.map.map_model.MapModel |
|
_convert_: 'all' |
|
use_grid_mask: True |
|
video_test_mode: False |
|
pretrained: |
|
img: ckpts/resnet50-19c8e357.pth |
|
|
|
img_backbone: |
|
type: ResNet |
|
depth: 50 |
|
num_stages: 4 |
|
out_indices: [3] |
|
frozen_stages: 1 |
|
norm_cfg: |
|
type: BN |
|
requires_grad: False |
|
norm_eval: True |
|
style: pytorch |
|
img_neck: |
|
type: FPN |
|
in_channels: [2048] |
|
out_channels: 256 |
|
start_level: 0 |
|
add_extra_convs: on_output |
|
num_outs: 1 |
|
relu_before_extra_convs: True |
|
pts_bbox_head: |
|
type: MapTRv2Head |
|
<<: *point_cloud_range |
|
bev_h: 100 |
|
bev_w: 50 |
|
num_query: 900 |
|
num_vec_one2one: 20 |
|
num_vec_one2many: 300 |
|
k_one2many: 6 |
|
num_pts_per_vec: 20 |
|
num_pts_per_gt_vec: 20 |
|
dir_interval: 1 |
|
query_embed_type: 'instance_pts' |
|
transform_method: 'minmax' |
|
gt_shift_pts_pattern: 'v2' |
|
num_classes: 2 |
|
in_channels: 256 |
|
sync_cls_avg_factor: True |
|
with_box_refine: True |
|
as_two_stage: False |
|
code_size: 2 |
|
code_weights: None |
|
<<: *aux_seg_cfg |
|
|
|
transformer: |
|
type: MapTRPerceptionTransformer |
|
bev_h: 100 |
|
bev_w: 50 |
|
|
|
|
|
|
|
|
|
num_cams: 2 |
|
|
|
rotate_prev_bev: False |
|
use_shift: True |
|
use_can_bus: False |
|
embed_dims: 256 |
|
encoder: |
|
type: 'SpatialDecoder' |
|
num_layers: 1 |
|
<<: *point_cloud_range |
|
grid_config: |
|
x: [-15.0, 15.0, 0.6] |
|
y: [-30.0, 30.0, 0.6] |
|
z: [ -10.0, 10.0, 20.0 ] |
|
data_config: |
|
input_size: [256, 704] |
|
transformerlayers: |
|
type: 'SpatialDecoderLayer' |
|
attn_cfgs: |
|
- type: 'SpatialCrossAttention' |
|
<<: *point_cloud_range |
|
num_cams: 2 |
|
dropout: 0.0 |
|
embed_dims: 256 |
|
deformable_attention: |
|
type: 'MSDeformableAttention' |
|
embed_dims: 256 |
|
num_points: 8 |
|
num_levels: 1 |
|
ffn_cfgs: |
|
type: 'FFN' |
|
embed_dims: 256 |
|
feedforward_channels: 1024 |
|
ffn_drop: 0.0 |
|
act_cfg: |
|
type: 'ReLU' |
|
inplace: True |
|
feedforward_channels: 1024 |
|
ffn_dropout: 0.0 |
|
operation_order: ['cross_attn', 'norm' ,'ffn', 'norm'] |
|
decoder: |
|
type: MapTRDecoder |
|
num_layers: 6 |
|
return_intermediate: True |
|
transformerlayers: |
|
type: DecoupledDetrTransformerDecoderLayer |
|
num_vec: 20 |
|
num_pts_per_vec: 20 |
|
attn_cfgs: |
|
- type: MultiheadAttention |
|
embed_dims: 256 |
|
num_heads: 8 |
|
dropout: 0.1 |
|
- type: MultiheadAttention |
|
embed_dims: 256 |
|
num_heads: 8 |
|
dropout: 0.1 |
|
- type: CustomMSDeformableAttention |
|
embed_dims: 256 |
|
num_levels: 1 |
|
feedforward_channels: 512 |
|
ffn_dropout: 0.1 |
|
operation_order: ['self_attn', 'norm', 'self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm'] |
|
|
|
positional_encoding: |
|
type: LearnedPositionalEncoding |
|
num_feats: 128 |
|
row_num_embed: 100 |
|
col_num_embed: 50 |
|
loss_cls: |
|
type: FocalLoss |
|
use_sigmoid: True |
|
gamma: 2.0 |
|
alpha: 0.25 |
|
loss_weight: 2.0 |
|
loss_bbox: |
|
type: L1Loss |
|
loss_weight: 0.0 |
|
loss_iou: |
|
type: GIoULoss |
|
loss_weight: 0.0 |
|
loss_pts: |
|
type: PtsL1Loss |
|
loss_weight: 5.0 |
|
loss_dir: |
|
type: PtsDirCosLoss |
|
loss_weight: 0.005 |
|
loss_seg: |
|
type: SimpleLoss |
|
pos_weight: 4.0 |
|
loss_weight: 1.0 |
|
loss_pv_seg: |
|
type: SimpleLoss |
|
pos_weight: 1.0 |
|
loss_weight: 2.0 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pipelines: |
|
lidar_filter: |
|
_target_: det_map.data.pipelines.filter_lidar.LiDARFilter |
|
_convert_: 'all' |
|
close_radius: 1.0 |
|
<<: *lidar_filter_ranges |
|
|
|
|
|
point_shuffle: |
|
_target_: det_map.data.pipelines.point_shuffle.PointShuffle |
|
<<: *is_train |
|
|
|
lidar_aug: |
|
_target_: det_map.data.pipelines.lidar_aug.LiDARAug |
|
bda_aug_conf: |
|
rot_lim: (-22.5 * 2, 22.5 * 2) |
|
scale_lim: (0.9, 1.1) |
|
flip_dx_ratio: 0.5 |
|
flip_dy_ratio: 0.5 |
|
tran_lim: (0.5, 0.5, 0.5) |
|
<<: *lidar_filter_ranges |
|
|
|
<<: *is_train |
|
|
|
depth: |
|
_target_: det_map.data.pipelines.prepare_depth.LiDAR2Depth |
|
<<: *grid_config |
|
|
|
img: |
|
_target_: det_map.data.pipelines.prepare_img.PrepareImageInputs |
|
_convert_: 'all' |
|
opencv_pp: True |
|
|
|
<<: *is_train |
|
data_config: |
|
input_size: (256, 704) |
|
src_size: (900, 1600) |
|
|
|
resize: (-0.06, 0.44) |
|
rot: (-5.4, 5.4) |
|
flip: True |
|
crop_h: (0.0, 0.0) |
|
random_crop_height: True |
|
vflip: True |
|
resize_test: 0.04 |
|
pmd: |
|
brightness_delta: 32 |
|
contrast_lower: 0.5 |
|
contrast_upper: 1.5 |
|
saturation_lower: 0.5 |
|
saturation_upper: 1.5 |
|
hue_delta: 18 |
|
rate: 0.5 |
|
|
|
|
|
checkpoint_path: null |
|
hidden_layer_dim: 512 |
|
lr: 1e-4 |
|
|