lkllkl's picture
Upload folder using huggingface_hub
da2e2ac verified
_target_: det_map.map.map_agent.MapAgent
_convert_: 'all'
is_train: &is_train
is_train: True
point_cloud_range: &point_cloud_range
pc_range: [ -15.0, -30.0, -10.0, 15.0, 30.0, 10.0 ]
lidar_filter_ranges: &lidar_filter_ranges
x_range: (-15.0, 15.0)
y_range: (-30.0, 30.0)
z_range: (-10.0, 10.0)
voxel_size: &voxel_size
voxel_size: [0.075, 0.075, 20.0]
img_voxel_size: &img_voxel_size
voxel_size: [0.3, 0.3, 20.0]
dbound: &dbound
dbound: [1.0, 35.0, 0.5]
grid_config: &grid_config
grid_config:
x: (-15.0, 15.0, 0.6)
y: (-30.0, 30.0, 0.6)
z: (-10.0, 10.0, 20.0)
depth: (1.0, 60.0, 0.5)
img_norm_cfg : &img_norm_cfg
img_norm_cfg:
mean: [123.675, 116.28, 103.53]
std: [58.395, 57.12, 57.375]
to_rgb: True
map_classes: &map_classes
map_classes: ['divider', 'ped_crossing','boundary', 'centerline']
#fixed_ptsnum_per_gt_line: &fixed_ptsnum_per_gt_line
# fixed_ptsnum_per_gt_line: 20
#fixed_ptsnum_per_pred_line: &fixed_ptsnum_per_pred_line
# fixed_ptsnum_per_pred_line: 20
eval_use_same_gt_sample_num_flag: &eval_use_same_gt_sample_num_flag
eval_use_same_gt_sample_num_flag: True
#_pos_dim_: &_pos_dim_
# _pos_dim_: 128
#_ffn_dim_: &_ffn_dim_
# _ffn_dim_: 512
#_num_levels_: &_num_levels_
# _num_levels_: 1
#bev_h_: &bev_h_
# bev_h_: 100
#bev_w_: &bev_w_
# bev_w_: 200
#queue_length: &queue_length
# queue_length: 1
aux_seg : &aux_seg_cfg
aux_seg:
use_aux_seg: False
bev_seg: False
pv_seg: False
seg_classes: 1
feat_down_sample: 32
pv_thickness: 1
#z_cfg : &z_cfg
#
# pred_z_flag: True
# gt_z_flag: True
model:
_target_: det_map.map.map_model.MapModel
_convert_: 'all'
use_grid_mask: True
video_test_mode: False
pretrained:
img: ckpts/resnet50-19c8e357.pth
img_backbone:
type: ResNet
depth: 50
num_stages: 4
out_indices: [3]
frozen_stages: 1
norm_cfg:
type: BN
requires_grad: False
norm_eval: True
style: pytorch
img_neck:
type: FPN
in_channels: [2048]
out_channels: 256
start_level: 0
add_extra_convs: on_output
num_outs: 1
relu_before_extra_convs: True
pts_bbox_head:
type: MapTRv2Head
<<: *point_cloud_range
bev_h: 100
bev_w: 50
num_query: 900
num_vec_one2one: 20
num_vec_one2many: 300
k_one2many: 6
num_pts_per_vec: 20
num_pts_per_gt_vec: 20
dir_interval: 1
query_embed_type: 'instance_pts'
transform_method: 'minmax'
gt_shift_pts_pattern: 'v2'
num_classes: 2
in_channels: 256
sync_cls_avg_factor: True
with_box_refine: True
as_two_stage: False
code_size: 2
code_weights: None
<<: *aux_seg_cfg
# z_cfg: *z_cfg
transformer:
type: MapTRPerceptionTransformer
bev_h: 100
bev_w: 50
# fuser:
# type: 'ConvFuser'
# in_channels: [256, 256]
# out_channels: 256
num_cams: 2
# z_cfg: *z_cfg
rotate_prev_bev: False
use_shift: True
use_can_bus: False
embed_dims: 256
encoder:
type: 'SpatialDecoder'
num_layers: 1
<<: *point_cloud_range
grid_config:
x: [-15.0, 15.0, 0.6]
y: [-30.0, 30.0, 0.6]
z: [ -10.0, 10.0, 20.0 ]
data_config:
input_size: [256, 704]
transformerlayers:
type: 'SpatialDecoderLayer'
attn_cfgs:
- type: 'SpatialCrossAttention'
<<: *point_cloud_range
num_cams: 2
dropout: 0.0
embed_dims: 256
deformable_attention:
type: 'MSDeformableAttention'
embed_dims: 256
num_points: 8
num_levels: 1
ffn_cfgs:
type: 'FFN'
embed_dims: 256
feedforward_channels: 1024
ffn_drop: 0.0
act_cfg:
type: 'ReLU'
inplace: True
feedforward_channels: 1024
ffn_dropout: 0.0
operation_order: ['cross_attn', 'norm' ,'ffn', 'norm']
decoder:
type: MapTRDecoder
num_layers: 6
return_intermediate: True
transformerlayers:
type: DecoupledDetrTransformerDecoderLayer
num_vec: 20
num_pts_per_vec: 20
attn_cfgs:
- type: MultiheadAttention
embed_dims: 256
num_heads: 8
dropout: 0.1
- type: MultiheadAttention
embed_dims: 256
num_heads: 8
dropout: 0.1
- type: CustomMSDeformableAttention
embed_dims: 256
num_levels: 1
feedforward_channels: 512
ffn_dropout: 0.1
operation_order: ['self_attn', 'norm', 'self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm']
positional_encoding:
type: LearnedPositionalEncoding
num_feats: 128
row_num_embed: 100
col_num_embed: 50
loss_cls:
type: FocalLoss
use_sigmoid: True
gamma: 2.0
alpha: 0.25
loss_weight: 2.0
loss_bbox:
type: L1Loss
loss_weight: 0.0
loss_iou:
type: GIoULoss
loss_weight: 0.0
loss_pts:
type: PtsL1Loss
loss_weight: 5.0
loss_dir:
type: PtsDirCosLoss
loss_weight: 0.005
loss_seg:
type: SimpleLoss
pos_weight: 4.0
loss_weight: 1.0
loss_pv_seg:
type: SimpleLoss
pos_weight: 1.0
loss_weight: 2.0
# train_cfg:
# pts:
# grid_size: [512, 512, 1]
# <<: *voxel_size
# point_cloud_range: [ -15.0, -30.0, -10.0, 15.0, 30.0, 10.0 ]
# out_size_factor: 4
# assigner:
# type: MapTRAssigner
# cls_cost:
# type: FocalLossCost
# weight: 2.0
# reg_cost:
# type: BBoxL1Cost
# weight: 0.0
# box_format: 'xywh'
# iou_cost:
# type: IoUCost
# iou_mode: 'giou'
# weight: 0.0
# pts_cost:
# type: OrderedPtsL1Cost
# weight: 5
# pc_range: [ -15.0, -30.0, -10.0, 15.0, 30.0, 10.0 ]
pipelines:
lidar_filter:
_target_: det_map.data.pipelines.filter_lidar.LiDARFilter
_convert_: 'all'
close_radius: 1.0
<<: *lidar_filter_ranges
# only include in training
point_shuffle:
_target_: det_map.data.pipelines.point_shuffle.PointShuffle
<<: *is_train
lidar_aug:
_target_: det_map.data.pipelines.lidar_aug.LiDARAug
bda_aug_conf:
rot_lim: (-22.5 * 2, 22.5 * 2)
scale_lim: (0.9, 1.1)
flip_dx_ratio: 0.5
flip_dy_ratio: 0.5
tran_lim: (0.5, 0.5, 0.5)
<<: *lidar_filter_ranges
# if no aug for map, set this is_train to False
<<: *is_train
depth:
_target_: det_map.data.pipelines.prepare_depth.LiDAR2Depth
<<: *grid_config
img:
_target_: det_map.data.pipelines.prepare_img.PrepareImageInputs
_convert_: 'all'
opencv_pp: True
# Flag should be False in Eval!!!!
<<: *is_train
data_config:
input_size: (256, 704)
src_size: (900, 1600)
# Augmentation
resize: (-0.06, 0.44)
rot: (-5.4, 5.4)
flip: True
crop_h: (0.0, 0.0)
random_crop_height: True
vflip: True
resize_test: 0.04
pmd:
brightness_delta: 32
contrast_lower: 0.5
contrast_upper: 1.5
saturation_lower: 0.5
saturation_upper: 1.5
hue_delta: 18
rate: 0.5
#<<: *is_train
checkpoint_path: null
hidden_layer_dim: 512
lr: 1e-4