_target_: det_map.det.det_agent.DetAgent
_convert_: 'all'

is_train: &is_train
  is_train: True

ranges: &ranges
  x_range: (-54.0, 54.0)
  y_range: (-54.0, 54.0)
  z_range: (-10.0, 10.0)

point_cloud_range: &point_cloud_range
  point_cloud_range: [ -54.0, -54.0, -10.0, 54.0, 54.0, 10.0 ]
voxel_size: &voxel_size
  voxel_size: [0.075, 0.075, 0.2]


grid_config: &grid_config
  grid_config:
    x: (-54.0, 54.0, 0.6)
    y: (-54.0, 54.0, 0.6)
    z: (-10.0, 10.0, 20.0)
    depth: (1.0, 60.0, 0.5)

model:
  _target_: det_map.det.dal.dal.DAL
  _convert_: 'all'
  use_grid_mask: true
  pts_voxel_layer:
    max_num_points: 10
    <<: *voxel_size
    <<: *point_cloud_range
    max_voxels: [ 120000, 160000 ]
  pts_voxel_encoder:
    type: HardSimpleVFE
    num_features: 5
  pts_middle_encoder:
    type: SparseEncoder
    in_channels: 5
    base_channels: 24
    sparse_shape: [ 41, 1440, 1440 ]
    output_channels: 192
    order: [ 'conv', 'norm', 'act' ]
    encoder_channels: ((24, 24, 48), (48, 48, 96), (96, 96, 192), (192, 192))
    encoder_paddings: ((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0))
    block_type: basicblock
  pts_backbone:
    type: SECOND
    in_channels: 384
    out_channels: [ 192, 384 ]
    layer_nums: [ 8, 8 ]
    layer_strides: [ 1, 2 ]
    norm_cfg:
      type: BN
      eps: 1e-3
      momentum: 0.01
    conv_cfg:
      type: Conv2d
      bias: false
  pts_neck:
    type: SECONDFPN
    in_channels: [ 192, 384 ]
    out_channels: [ 256, 256 ]
    upsample_strides: [ 1, 2 ]
    norm_cfg:
      type: BN
      eps: 1e-3
      momentum: 0.01
    upsample_cfg:
      type: deconv
      bias: false
    use_conv_for_no_stride: true
  img_backbone:
    pretrained: 'torchvision://resnet18'
    type: ResNet
    depth: 18
    num_stages: 4
    out_indices: [ 1, 2, 3 ]
    frozen_stages: -1
    norm_cfg:
      type: BN
      requires_grad: true
    norm_eval: false
    with_cp: false
    style: pytorch
  img_neck:
    type: CustomFPN
    in_channels: [ 128, 256, 512 ]
    out_channels: img_feat_dim
    num_outs: 1
    start_level: 0
    out_ids: [ 0 ]
  img_view_transformer:
    type: LSSViewTransformer
    <<: *grid_config
    input_size: data_config['input_size']
    in_channels: img_feat_dim
    out_channels: feat_bev_img_dim
    downsample: 8
    with_depth_from_lidar: true
  pts_bbox_head:
    type: DALHead
    feat_bev_img_dim: feat_bev_img_dim
    img_feat_dim: img_feat_dim
    sparse_fuse_layers: 2
    dense_fuse_layers: 2
    instance_attn: false
    num_proposals: 200
    in_channels: 512
    hidden_channel: 128
    num_classes: 10
    num_decoder_layers: 1
    num_heads: 8
    nms_kernel_size: 3
    ffn_channel: 256
    dropout: 0.1
    bn_momentum: 0.1
    activation: relu
    auxiliary: true
    common_heads:
      center: [ 2, 2 ]
      height: [ 1, 2 ]
      dim: [ 3, 2 ]
      rot: [ 2, 2 ]
      vel: [ 2, 2 ]
    bbox_coder:
      type: TransFusionBBoxCoder
      pc_range: point_cloud_range[:2]
      post_center_range: [ -61.2, -61.2, -10.0, 61.2, 61.2, 10.0 ]
      score_threshold: 0.0
      out_size_factor: 8
      voxel_size: voxel_size[:2]
      code_size: 10
    loss_cls:
      type: FocalLoss
      use_sigmoid: true
      gamma: 2.0
      alpha: 0.25
      reduction: mean
      loss_weight: 1.0
    loss_heatmap:
      type: GaussianFocalLoss
      reduction: mean

pipelines:
  lidar_filter:
    _target_: det_map.data.pipelines.filter_lidar.LiDARFilter
    _convert_: 'all'
    close_radius: 1.0
    <<: *ranges

  #  only include in training
  point_shuffle:
    _target_: det_map.data.pipelines.point_shuffle.PointShuffle
    <<: *is_train

  lidar_aug:
    _target_: det_map.data.pipelines.lidar_aug.LiDARAug
    bda_aug_conf:
      rot_lim: (-22.5 * 2, 22.5 * 2)
      scale_lim: (0.9, 1.1)
      flip_dx_ratio: 0.5
      flip_dy_ratio: 0.5
      tran_lim: (0.5, 0.5, 0.5)
    <<: *ranges
    #    if no aug for map, set this is_train to False
    <<: *is_train

  depth:
    _target_: det_map.data.pipelines.prepare_depth.LiDAR2Depth
    <<: *grid_config

  img:
    _target_: det_map.data.pipelines.prepare_img.PrepareImageInputs
    _convert_: 'all'
    opencv_pp: True
    #    Flag should be False in Eval!!!!
    <<: *is_train
    data_config:
      input_size: (256, 704)
      src_size: (900, 1600)
      # Augmentation
      resize: (-0.06, 0.44)
      rot: (-5.4, 5.4)
      flip: True
      crop_h: (0.0, 0.0)
      random_crop_height: True
      vflip: True
      resize_test: 0.04
      pmd:
        brightness_delta: 32
        contrast_lower: 0.5
        contrast_upper: 1.5
        saturation_lower: 0.5
        saturation_upper: 1.5
        hue_delta: 18
        rate: 0.5


<<: *is_train
checkpoint_path: null
hidden_layer_dim: 512
lr: 1e-4