_target_: det_map.det.det_agent.DetAgent _convert_: 'all' is_train: &is_train is_train: True ranges: &ranges x_range: (-54.0, 54.0) y_range: (-54.0, 54.0) z_range: (-10.0, 10.0) point_cloud_range: &point_cloud_range point_cloud_range: [ -54.0, -54.0, -10.0, 54.0, 54.0, 10.0 ] voxel_size: &voxel_size voxel_size: [0.075, 0.075, 0.2] grid_config: &grid_config grid_config: x: (-54.0, 54.0, 0.6) y: (-54.0, 54.0, 0.6) z: (-10.0, 10.0, 20.0) depth: (1.0, 60.0, 0.5) model: _target_: det_map.det.dal.dal.DAL _convert_: 'all' use_grid_mask: true pts_voxel_layer: max_num_points: 10 <<: *voxel_size <<: *point_cloud_range max_voxels: [ 120000, 160000 ] pts_voxel_encoder: type: HardSimpleVFE num_features: 5 pts_middle_encoder: type: SparseEncoder in_channels: 5 base_channels: 24 sparse_shape: [ 41, 1440, 1440 ] output_channels: 192 order: [ 'conv', 'norm', 'act' ] encoder_channels: ((24, 24, 48), (48, 48, 96), (96, 96, 192), (192, 192)) encoder_paddings: ((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)) block_type: basicblock pts_backbone: type: SECOND in_channels: 384 out_channels: [ 192, 384 ] layer_nums: [ 8, 8 ] layer_strides: [ 1, 2 ] norm_cfg: type: BN eps: 1e-3 momentum: 0.01 conv_cfg: type: Conv2d bias: false pts_neck: type: SECONDFPN in_channels: [ 192, 384 ] out_channels: [ 256, 256 ] upsample_strides: [ 1, 2 ] norm_cfg: type: BN eps: 1e-3 momentum: 0.01 upsample_cfg: type: deconv bias: false use_conv_for_no_stride: true img_backbone: pretrained: 'torchvision://resnet18' type: ResNet depth: 18 num_stages: 4 out_indices: [ 1, 2, 3 ] frozen_stages: -1 norm_cfg: type: BN requires_grad: true norm_eval: false with_cp: false style: pytorch img_neck: type: CustomFPN in_channels: [ 128, 256, 512 ] out_channels: img_feat_dim num_outs: 1 start_level: 0 out_ids: [ 0 ] img_view_transformer: type: LSSViewTransformer <<: *grid_config input_size: data_config['input_size'] in_channels: img_feat_dim out_channels: feat_bev_img_dim downsample: 8 with_depth_from_lidar: true pts_bbox_head: type: DALHead feat_bev_img_dim: feat_bev_img_dim img_feat_dim: img_feat_dim sparse_fuse_layers: 2 dense_fuse_layers: 2 instance_attn: false num_proposals: 200 in_channels: 512 hidden_channel: 128 num_classes: 10 num_decoder_layers: 1 num_heads: 8 nms_kernel_size: 3 ffn_channel: 256 dropout: 0.1 bn_momentum: 0.1 activation: relu auxiliary: true common_heads: center: [ 2, 2 ] height: [ 1, 2 ] dim: [ 3, 2 ] rot: [ 2, 2 ] vel: [ 2, 2 ] bbox_coder: type: TransFusionBBoxCoder pc_range: point_cloud_range[:2] post_center_range: [ -61.2, -61.2, -10.0, 61.2, 61.2, 10.0 ] score_threshold: 0.0 out_size_factor: 8 voxel_size: voxel_size[:2] code_size: 10 loss_cls: type: FocalLoss use_sigmoid: true gamma: 2.0 alpha: 0.25 reduction: mean loss_weight: 1.0 loss_heatmap: type: GaussianFocalLoss reduction: mean pipelines: lidar_filter: _target_: det_map.data.pipelines.filter_lidar.LiDARFilter _convert_: 'all' close_radius: 1.0 <<: *ranges # only include in training point_shuffle: _target_: det_map.data.pipelines.point_shuffle.PointShuffle <<: *is_train lidar_aug: _target_: det_map.data.pipelines.lidar_aug.LiDARAug bda_aug_conf: rot_lim: (-22.5 * 2, 22.5 * 2) scale_lim: (0.9, 1.1) flip_dx_ratio: 0.5 flip_dy_ratio: 0.5 tran_lim: (0.5, 0.5, 0.5) <<: *ranges # if no aug for map, set this is_train to False <<: *is_train depth: _target_: det_map.data.pipelines.prepare_depth.LiDAR2Depth <<: *grid_config img: _target_: det_map.data.pipelines.prepare_img.PrepareImageInputs _convert_: 'all' opencv_pp: True # Flag should be False in Eval!!!! <<: *is_train data_config: input_size: (256, 704) src_size: (900, 1600) # Augmentation resize: (-0.06, 0.44) rot: (-5.4, 5.4) flip: True crop_h: (0.0, 0.0) random_crop_height: True vflip: True resize_test: 0.04 pmd: brightness_delta: 32 contrast_lower: 0.5 contrast_upper: 1.5 saturation_lower: 0.5 saturation_upper: 1.5 hue_delta: 18 rate: 0.5 <<: *is_train checkpoint_path: null hidden_layer_dim: 512 lr: 1e-4