mins commited on Aug 13, 2024

Commit

c501468

1 Parent(s): 86755c6

eva_base_tiny

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

iter_21096.pth +3 -0
{finetune-8b-llava-llama3-evabase640-petrv3 → pretrain_tiny}/config.json +0 -0
{finetune-8b-llava-llama3-evabase640-petrv3 → pretrain_tiny}/generation_config.json +0 -0
{finetune-8b-llava-llama3-evabase640-petrv3 → pretrain_tiny}/model.safetensors +0 -0
{finetune-8b-llava-llama3-evabase640-petrv3 → pretrain_tiny}/special_tokens_map.json +0 -0
{finetune-8b-llava-llama3-evabase640-petrv3 → pretrain_tiny}/tokenizer.model +0 -0
{finetune-8b-llava-llama3-evabase640-petrv3 → pretrain_tiny}/tokenizer_config.json +0 -0
{finetune-8b-llava-llama3-evabase640-petrv3 → pretrain_tiny}/trainer_state.json +0 -0
{finetune-8b-llava-llama3-evabase640-petrv3 → pretrain_tiny}/training_args.bin +0 -0
projects/configs/OmniDrive/eva_base_tinyllama.py +294 -0
projects/configs/OmniDrive/eva_large_llama7b.py +296 -0
projects/mmdet3d_plugin/__init__.py +11 -0
projects/mmdet3d_plugin/__pycache__/__init__.cpython-38.pyc +0 -0
projects/mmdet3d_plugin/core/apis/__init__.py +3 -0
projects/mmdet3d_plugin/core/apis/__pycache__/__init__.cpython-38.pyc +0 -0
projects/mmdet3d_plugin/core/apis/__pycache__/__init__.cpython-39.pyc +0 -0
projects/mmdet3d_plugin/core/apis/__pycache__/mmdet_train.cpython-38.pyc +0 -0
projects/mmdet3d_plugin/core/apis/__pycache__/mmdet_train.cpython-39.pyc +0 -0
projects/mmdet3d_plugin/core/apis/__pycache__/test.cpython-38.pyc +0 -0
projects/mmdet3d_plugin/core/apis/__pycache__/test.cpython-39.pyc +0 -0
projects/mmdet3d_plugin/core/apis/__pycache__/train.cpython-38.pyc +0 -0
projects/mmdet3d_plugin/core/apis/__pycache__/train.cpython-39.pyc +0 -0
projects/mmdet3d_plugin/core/apis/mmdet_train.py +204 -0
projects/mmdet3d_plugin/core/apis/test.py +164 -0
projects/mmdet3d_plugin/core/apis/train.py +70 -0
projects/mmdet3d_plugin/core/bbox/__pycache__/util.cpython-38.pyc +0 -0
projects/mmdet3d_plugin/core/bbox/__pycache__/util.cpython-39.pyc +0 -0
projects/mmdet3d_plugin/core/bbox/assigners/__init__.py +4 -0
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-38.pyc +0 -0
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-39.pyc +0 -0
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_2d.cpython-38.pyc +0 -0
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_2d.cpython-39.pyc +0 -0
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-38.pyc +0 -0
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-39.pyc +0 -0
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/map_assigner.cpython-38.pyc +0 -0
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/map_assigner.cpython-39.pyc +0 -0
projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_2d.py +158 -0
projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py +91 -0
projects/mmdet3d_plugin/core/bbox/assigners/map_assigner.py +63 -0
projects/mmdet3d_plugin/core/bbox/coders/__init__.py +2 -0
projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-38.pyc +0 -0
projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-39.pyc +0 -0
projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-38.pyc +0 -0
projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-39.pyc +0 -0
projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py +111 -0
projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py +4 -0
projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-38.pyc +0 -0
projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-39.pyc +0 -0
projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-38.pyc +0 -0
projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-39.pyc +0 -0

iter_21096.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3119e1ca3d54933c48df1409537879079a492895c7c36f4f7ae47c223ceb8de7
+size 14575027161

{finetune-8b-llava-llama3-evabase640-petrv3 → pretrain_tiny}/config.json RENAMED Viewed

File without changes

{finetune-8b-llava-llama3-evabase640-petrv3 → pretrain_tiny}/generation_config.json RENAMED Viewed

File without changes

{finetune-8b-llava-llama3-evabase640-petrv3 → pretrain_tiny}/model.safetensors RENAMED Viewed

File without changes

{finetune-8b-llava-llama3-evabase640-petrv3 → pretrain_tiny}/special_tokens_map.json RENAMED Viewed

File without changes

{finetune-8b-llava-llama3-evabase640-petrv3 → pretrain_tiny}/tokenizer.model RENAMED Viewed

File without changes

{finetune-8b-llava-llama3-evabase640-petrv3 → pretrain_tiny}/tokenizer_config.json RENAMED Viewed

File without changes

{finetune-8b-llava-llama3-evabase640-petrv3 → pretrain_tiny}/trainer_state.json RENAMED Viewed

File without changes

{finetune-8b-llava-llama3-evabase640-petrv3 → pretrain_tiny}/training_args.bin RENAMED Viewed

File without changes

projects/configs/OmniDrive/eva_base_tinyllama.py ADDED Viewed

	@@ -0,0 +1,294 @@

+_base_ = [
+    '../../../mmdetection3d/configs/_base_/datasets/nus-3d.py',
+    '../../../mmdetection3d/configs/_base_/default_runtime.py'
+]
+backbone_norm_cfg = dict(type='LN', requires_grad=True)
+plugin=True
+plugin_dir='projects/mmdet3d_plugin/'
+# If point cloud range is changed, the models should also change their point
+# cloud range accordingly
+point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
+voxel_size = [0.2, 0.2, 8]
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+# For nuScenes we usually do 10-class detection
+class_names = [
+    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
+    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+]
+num_gpus = 8
+batch_size = 2
+num_iters_per_epoch = 28130 // (num_gpus * batch_size)
+num_epochs = 12
+llm_path = 'ckpts/pretrain_tiny'
+collect_keys=['lidar2img', 'intrinsics', 'extrinsics','timestamp', 'img_timestamp', 'ego_pose', 'ego_pose_inv', 'command', 'can_bus']
+input_modality = dict(
+    use_lidar=False,
+    use_camera=True,
+    use_radar=False,
+    use_map=False,
+    use_external=True)
+model = dict(
+    type='Petr3D',
+    save_path='./results_planning_tiny/',  #save path for vlm models.
+    use_grid_mask=True,
+    frozen=False,
+    use_lora=False,
+    tokenizer=llm_path,
+    lm_head=llm_path, # set to None if don't use llm head
+    img_backbone=dict(
+        type='EVAViT',
+        img_size=640,
+        patch_size=16,
+        window_size=16,
+        in_chans=3,
+        embed_dim=768,
+        depth=12,
+        num_heads=12,
+        mlp_ratio=4*2/3,
+        window_block_indexes=(0, 1, 3, 4, 6, 7, 9, 10),
+        qkv_bias=True,
+        drop_path_rate=0.1,
+        flash_attn=True,
+        with_cp=True,
+        frozen=False),
+    map_head=dict(
+        type='PETRHeadM',
+        num_classes=1,
+        in_channels=768,
+        out_dims=2048,
+        memory_len=600,
+        with_mask=True, # map query can't see vlm tokens
+        topk_proposals=300,
+        num_lane=1800,   # 300+1500
+        num_lanes_one2one=300,
+        k_one2many=5,
+        lambda_one2many=1.0,
+        num_extra=256,
+        n_control=11,
+        pc_range=point_cloud_range,
+        code_weights = [1.0, 1.0],
+        transformer=dict(
+            type='PETRTemporalTransformer',
+                 input_dimension=256,
+                 output_dimension=256,
+                 num_layers=6,
+                 embed_dims=256,
+                 num_heads=8,
+                 feedforward_dims=2048,
+                 dropout=0.1,
+                 with_cp=True,
+                 flash_attn=True,),
+        train_cfg=dict(
+                assigner=dict(
+                    type='LaneHungarianAssigner',
+                    cls_cost=dict(type='FocalLossCost', weight=1.5),
+                    reg_cost=dict(type='LaneL1Cost', weight=0.02),
+                    iou_cost=dict(type='IoUCost', weight=0.0))), # dummy
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.5),
+        loss_bbox=dict(type='L1Loss', loss_weight=0.02),
+        loss_dir=dict(type='PtsDirCosLoss', loss_weight=0.0)), #
+    pts_bbox_head=dict(
+        type='StreamPETRHead',
+        num_classes=10,
+        in_channels=768,
+        out_dims=2048,
+        num_query=600,
+        with_mask=True,
+        memory_len=600,
+        topk_proposals=300,
+        num_propagated=300,
+        num_extra=256,
+        n_control=11, # align with centerline query defination
+        match_with_velo=False,
+        scalar=10, ##noise groups
+        noise_scale = 1.0,
+        dn_weight= 1.0, ##dn loss weight
+        split = 0.75, ###positive rate
+        code_weights = [2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
+        transformer=dict(
+            type='PETRTemporalTransformer',
+                 input_dimension=256,
+                 output_dimension=256,
+                 num_layers=6,
+                 embed_dims=256,
+                 num_heads=8,
+                 feedforward_dims=2048,
+                 dropout=0.1,
+                 with_cp=True,
+                 flash_attn=True,
+            ),
+        bbox_coder=dict(
+            type='NMSFreeCoder',
+            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
+            pc_range=point_cloud_range,
+            max_num=300,
+            voxel_size=voxel_size,
+            num_classes=10),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=2.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=0.25),
+        loss_iou=dict(type='GIoULoss', loss_weight=0.0),),
+        # model training and testing settings
+    train_cfg=dict(pts=dict(
+        grid_size=[512, 512, 1],
+        voxel_size=voxel_size,
+        point_cloud_range=point_cloud_range,
+        out_size_factor=4,
+        assigner=dict(
+            type='HungarianAssigner3D',
+            cls_cost=dict(type='FocalLossCost', weight=2.0),
+            reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
+            iou_cost=dict(type='IoUCost', weight=0.0), # Fake cost. This is just to make it compatible with DETR head.
+            pc_range=point_cloud_range),)
+            )
+            )
+dataset_type = 'CustomNuScenesDataset'
+data_root = './data/nuscenes/'
+file_client_args = dict(backend='disk')
+ida_aug_conf = {
+        "resize_lim": (0.37, 0.45),
+        "final_dim": (320, 640),
+        "bot_pct_lim": (0.0, 0.0),
+        "rot_lim": (0.0, 0.0),
+        "H": 900,
+        "W": 1600,
+        "rand_flip": False,
+    }
+train_pipeline = [
+    dict(type='LoadMultiViewImageFromFiles', to_float32=True),
+    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_bbox=True,
+        with_label=True, with_bbox_depth=True),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectNameFilter', classes=class_names),
+    dict(type='ResizeCropFlipRotImage', data_aug_conf = ida_aug_conf, training=True),
+    dict(type='ResizeMultiview3D', img_scale=(640, 640), keep_ratio=False, multiscale_mode='value'),
+    dict(type='LoadAnnoatationVQA',
+         base_vqa_path='./data/nuscenes/vqa/train/',
+         base_desc_path='./data/nuscenes/desc/train/',
+         base_conv_path='./data/nuscenes/conv/train/',
+         base_key_path='./data/nuscenes/keywords/train/',
+         tokenizer=llm_path,
+         max_length=2048,
+         ignore_type=[],
+         lane_objs_info="./data/nuscenes/lane_obj_train.pkl"),
+    dict(type='NormalizeMultiviewImage', **img_norm_cfg),
+    dict(type='PadMultiViewImage', size_divisor=32),
+    dict(type='PETRFormatBundle3D', class_names=class_names, collect_keys=collect_keys + ['prev_exists']),
+    dict(type='Collect3D', keys=['lane_pts', 'input_ids', 'vlm_labels', 'gt_bboxes_3d', 'gt_labels_3d', 'img', 'gt_bboxes', 'gt_labels', 'centers2d', 'depths', 'prev_exists'] + collect_keys,
+             meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape', 'scale_factor', 'flip', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg', 'scene_token', 'gt_bboxes_3d','gt_labels_3d'))
+]
+test_pipeline = [
+    dict(type='LoadMultiViewImageFromFiles', to_float32=True),
+    dict(type='ResizeCropFlipRotImage', data_aug_conf = ida_aug_conf, training=False),
+    dict(type='ResizeMultiview3D', img_scale=(640, 640), keep_ratio=False, multiscale_mode='value'),
+    dict(type='NormalizeMultiviewImage', **img_norm_cfg),
+    dict(type='PadMultiViewImage', size_divisor=32),
+    dict(type='LoadAnnoatationVQATest',
+         base_vqa_path='./data/nuscenes/vqa/val/',
+         base_conv_path='./data/nuscenes/conv/val/',
+         base_counter_path='./data/nuscenes/eval_cf/',
+         load_type=["planning"], # please don't test all the questions in single test, it requires quite long time
+         tokenizer=llm_path,
+         max_length=2048,),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='PETRFormatBundle3D',
+                collect_keys=collect_keys,
+                class_names=class_names,
+                with_label=False),
+            dict(type='Collect3D', keys=['input_ids', 'img'] + collect_keys,
+            meta_keys=('sample_idx', 'vlm_labels', 'filename', 'ori_shape', 'img_shape','pad_shape', 'scale_factor', 'flip', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg', 'scene_token'))
+        ])
+]
+data = dict(
+    samples_per_gpu=batch_size,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file=data_root + 'nuscenes2d_ego_temporal_infos_train.pkl',
+        seq_split_num=1, # streaming video training
+        seq_mode=True, # streaming video training
+        pipeline=train_pipeline,
+        classes=class_names,
+        modality=input_modality,
+        test_mode=False,
+        use_valid_flag=True,
+        filter_empty_gt=False,
+        box_type_3d='LiDAR'),
+    val=dict(
+        type=dataset_type,
+        eval_mode=['lane', 'det'],
+        pipeline=test_pipeline,
+        ann_file=data_root + 'nuscenes2d_ego_temporal_infos_val.pkl',
+        classes=class_names,
+        modality=input_modality),
+    test=dict(
+        type=dataset_type,
+        eval_mode=['lane', 'det'],
+        pipeline=test_pipeline,
+        ann_file=data_root + 'nuscenes2d_ego_temporal_infos_val.pkl',
+        classes=class_names,
+        modality=input_modality),
+    shuffler_sampler=dict(
+        type='InfiniteGroupEachSampleInBatchSampler',
+        seq_split_num=2,
+        warmup_split_num=10, # lane det and vlm need short term temporal fusion in the early stage of training
+        num_iters_to_seq=num_iters_per_epoch,
+    ),
+    nonshuffler_sampler=dict(type='DistributedSampler')
+    )
+optimizer = dict(constructor='LearningRateDecayOptimizerConstructor', type='AdamW',
+                 lr=1e-4, betas=(0.9, 0.999), weight_decay=1e-4,
+                 paramwise_cfg={'decay_rate': 0.9,
+                                'head_decay_rate': 4.0,
+                                'lm_head_decay_rate': 0.1,
+                                'decay_type': 'vit_wise',
+                                'num_layers': 24,
+                                })
+optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic', grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+    policy='CosineAnnealing',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    min_lr_ratio=1e-3,
+    )
+evaluation = dict(interval=num_iters_per_epoch*num_epochs, pipeline=test_pipeline)
+find_unused_parameters=False #### when use checkpoint, find_unused_parameters must be False
+checkpoint_config = dict(interval=num_iters_per_epoch//2, max_keep_ckpts=3)
+runner = dict(
+    type='IterBasedRunner', max_iters=num_epochs * num_iters_per_epoch)
+load_from=None
+resume_from=None

projects/configs/OmniDrive/eva_large_llama7b.py ADDED Viewed

	@@ -0,0 +1,296 @@

+_base_ = [
+    '../../../mmdetection3d/configs/_base_/datasets/nus-3d.py',
+    '../../../mmdetection3d/configs/_base_/default_runtime.py'
+]
+backbone_norm_cfg = dict(type='LN', requires_grad=True)
+plugin=True
+plugin_dir='projects/mmdet3d_plugin/'
+# If point cloud range is changed, the models should also change their point
+# cloud range accordingly
+point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
+voxel_size = [0.2, 0.2, 8]
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+# For nuScenes we usually do 10-class detection
+class_names = [
+    'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
+    'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
+]
+num_gpus = 8
+batch_size = 2
+num_iters_per_epoch = 28130 // (num_gpus * batch_size)
+num_epochs = 6
+llm_path = 'ckpts/final/'
+collect_keys=['lidar2img', 'intrinsics', 'extrinsics','timestamp', 'img_timestamp', 'ego_pose', 'ego_pose_inv', 'command', 'can_bus']
+input_modality = dict(
+    use_lidar=False,
+    use_camera=True,
+    use_radar=False,
+    use_map=False,
+    use_external=True)
+model = dict(
+    type='Petr3D',
+    save_path='./results_planning_only/',  #save path for vlm models.
+    use_grid_mask=True,
+    frozen=False,
+    use_lora=True,
+    tokenizer=llm_path,
+    lm_head=llm_path, # set to None if don't use llm head
+    img_backbone=dict(
+        type='EVAViT',
+        img_size=640,
+        patch_size=16,
+        window_size=16,
+        in_chans=3,
+        embed_dim=1024,
+        depth=24,
+        num_heads=16,
+        mlp_ratio=4*2/3,
+        window_block_indexes = (
+        list(range(0, 2)) + list(range(3, 5)) + list(range(6, 8)) + list(range(9, 11)) + list(range(12, 14)) + list(range(15, 17)) + list(range(18, 20)) + list(range(21, 23))
+        ),
+        qkv_bias=True,
+        drop_path_rate=0.3,
+        flash_attn=True,
+        with_cp=True,
+        frozen=False,),
+    map_head=dict(
+        type='PETRHeadM',
+        num_classes=1,
+        in_channels=1024,
+        out_dims=4096,
+        memory_len=600,
+        with_mask=True, # map query can't see vlm tokens
+        topk_proposals=300,
+        num_lane=1800,   # 300+1500
+        num_lanes_one2one=300,
+        k_one2many=5,
+        lambda_one2many=1.0,
+        num_extra=256,
+        n_control=11,
+        pc_range=point_cloud_range,
+        code_weights = [1.0, 1.0],
+        transformer=dict(
+            type='PETRTemporalTransformer',
+                 input_dimension=256,
+                 output_dimension=256,
+                 num_layers=6,
+                 embed_dims=256,
+                 num_heads=8,
+                 feedforward_dims=2048,
+                 dropout=0.1,
+                 with_cp=True,
+                 flash_attn=True,),
+        train_cfg=dict(
+                assigner=dict(
+                    type='LaneHungarianAssigner',
+                    cls_cost=dict(type='FocalLossCost', weight=1.5),
+                    reg_cost=dict(type='LaneL1Cost', weight=0.02),
+                    iou_cost=dict(type='IoUCost', weight=0.0))), # dummy
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.5),
+        loss_bbox=dict(type='L1Loss', loss_weight=0.02),
+        loss_dir=dict(type='PtsDirCosLoss', loss_weight=0.0)), #
+    pts_bbox_head=dict(
+        type='StreamPETRHead',
+        num_classes=10,
+        in_channels=1024,
+        out_dims=4096,
+        num_query=600,
+        with_mask=True,
+        memory_len=600,
+        topk_proposals=300,
+        num_propagated=300,
+        num_extra=256,
+        n_control=11, # align with centerline query defination
+        match_with_velo=False,
+        scalar=10, ##noise groups
+        noise_scale = 1.0,
+        dn_weight= 1.0, ##dn loss weight
+        split = 0.75, ###positive rate
+        code_weights = [2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
+        transformer=dict(
+            type='PETRTemporalTransformer',
+                 input_dimension=256,
+                 output_dimension=256,
+                 num_layers=6,
+                 embed_dims=256,
+                 num_heads=8,
+                 feedforward_dims=2048,
+                 dropout=0.1,
+                 with_cp=True,
+                 flash_attn=True,
+            ),
+        bbox_coder=dict(
+            type='NMSFreeCoder',
+            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
+            pc_range=point_cloud_range,
+            max_num=300,
+            voxel_size=voxel_size,
+            num_classes=10),
+        loss_cls=dict(
+            type='FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=2.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=0.25),
+        loss_iou=dict(type='GIoULoss', loss_weight=0.0),),
+        # model training and testing settings
+    train_cfg=dict(pts=dict(
+        grid_size=[512, 512, 1],
+        voxel_size=voxel_size,
+        point_cloud_range=point_cloud_range,
+        out_size_factor=4,
+        assigner=dict(
+            type='HungarianAssigner3D',
+            cls_cost=dict(type='FocalLossCost', weight=2.0),
+            reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
+            iou_cost=dict(type='IoUCost', weight=0.0), # Fake cost. This is just to make it compatible with DETR head.
+            pc_range=point_cloud_range),)
+            )
+            )
+dataset_type = 'CustomNuScenesDataset'
+data_root = './data/nuscenes/'
+file_client_args = dict(backend='disk')
+ida_aug_conf = {
+        "resize_lim": (0.37, 0.45),
+        "final_dim": (320, 640),
+        "bot_pct_lim": (0.0, 0.0),
+        "rot_lim": (0.0, 0.0),
+        "H": 900,
+        "W": 1600,
+        "rand_flip": False,
+    }
+train_pipeline = [
+    dict(type='LoadMultiViewImageFromFiles', to_float32=True),
+    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_bbox=True,
+        with_label=True, with_bbox_depth=True),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectNameFilter', classes=class_names),
+    dict(type='ResizeCropFlipRotImage', data_aug_conf = ida_aug_conf, training=True),
+    dict(type='ResizeMultiview3D', img_scale=(640, 640), keep_ratio=False, multiscale_mode='value'),
+    dict(type='LoadAnnoatationVQA',
+         base_vqa_path='./data/nuscenes/vqa/train/',
+         base_desc_path='./data/nuscenes/desc/train/',
+         base_conv_path='./data/nuscenes/conv/train/',
+         base_key_path='./data/nuscenes/keywords/train/',
+         tokenizer=llm_path,
+         max_length=2048,
+         ignore_type=[],
+         lane_objs_info="./data/nuscenes/lane_obj_train.pkl"),
+    dict(type='NormalizeMultiviewImage', **img_norm_cfg),
+    dict(type='PadMultiViewImage', size_divisor=32),
+    dict(type='PETRFormatBundle3D', class_names=class_names, collect_keys=collect_keys + ['prev_exists']),
+    dict(type='Collect3D', keys=['lane_pts', 'input_ids', 'vlm_labels', 'gt_bboxes_3d', 'gt_labels_3d', 'img', 'gt_bboxes', 'gt_labels', 'centers2d', 'depths', 'prev_exists'] + collect_keys,
+             meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape', 'scale_factor', 'flip', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg', 'scene_token', 'gt_bboxes_3d','gt_labels_3d'))
+]
+test_pipeline = [
+    dict(type='LoadMultiViewImageFromFiles', to_float32=True),
+    dict(type='ResizeCropFlipRotImage', data_aug_conf = ida_aug_conf, training=False),
+    dict(type='ResizeMultiview3D', img_scale=(640, 640), keep_ratio=False, multiscale_mode='value'),
+    dict(type='NormalizeMultiviewImage', **img_norm_cfg),
+    dict(type='PadMultiViewImage', size_divisor=32),
+    dict(type='LoadAnnoatationVQATest',
+         base_vqa_path='./data/nuscenes/vqa/val/',
+         base_conv_path='./data/nuscenes/conv/val/',
+         base_counter_path='./data/nuscenes/eval_cf/',
+         load_type=["planning"], # please don't test all the questions in single test, it requires quite long time
+         tokenizer=llm_path,
+         max_length=2048,),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='PETRFormatBundle3D',
+                collect_keys=collect_keys,
+                class_names=class_names,
+                with_label=False),
+            dict(type='Collect3D', keys=['input_ids', 'img'] + collect_keys,
+            meta_keys=('sample_idx', 'vlm_labels', 'filename', 'ori_shape', 'img_shape','pad_shape', 'scale_factor', 'flip', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg', 'scene_token'))
+        ])
+]
+data = dict(
+    samples_per_gpu=batch_size,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file=data_root + 'nuscenes2d_ego_temporal_infos_train.pkl',
+        seq_split_num=1, # streaming video training
+        seq_mode=True, # streaming video training
+        pipeline=train_pipeline,
+        classes=class_names,
+        modality=input_modality,
+        test_mode=False,
+        use_valid_flag=True,
+        filter_empty_gt=False,
+        box_type_3d='LiDAR'),
+    val=dict(
+        type=dataset_type,
+        eval_mode=['lane', 'det'],
+        pipeline=test_pipeline,
+        ann_file=data_root + 'nuscenes2d_ego_temporal_infos_val.pkl',
+        classes=class_names,
+        modality=input_modality),
+    test=dict(
+        type=dataset_type,
+        eval_mode=['lane', 'det'],
+        pipeline=test_pipeline,
+        ann_file=data_root + 'nuscenes2d_ego_temporal_infos_val.pkl',
+        classes=class_names,
+        modality=input_modality),
+    shuffler_sampler=dict(
+        type='InfiniteGroupEachSampleInBatchSampler',
+        seq_split_num=2,
+        warmup_split_num=10, # lane det and vlm need short term temporal fusion in the early stage of training
+        num_iters_to_seq=num_iters_per_epoch,
+    ),
+    nonshuffler_sampler=dict(type='DistributedSampler')
+    )
+optimizer = dict(constructor='LearningRateDecayOptimizerConstructor', type='AdamW',
+                 lr=1e-4, betas=(0.9, 0.999), weight_decay=1e-4,
+                 paramwise_cfg={'decay_rate': 0.9,
+                                'head_decay_rate': 4.0,
+                                'lm_head_decay_rate': 0.1,
+                                'decay_type': 'vit_wise',
+                                'num_layers': 24,
+                                })
+optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic', grad_clip=dict(max_norm=35, norm_type=2))
+# learning policy
+lr_config = dict(
+    policy='CosineAnnealing',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=1.0 / 3,
+    min_lr_ratio=1e-3,
+    )
+evaluation = dict(interval=num_iters_per_epoch*num_epochs, pipeline=test_pipeline)
+find_unused_parameters=False #### when use checkpoint, find_unused_parameters must be False
+checkpoint_config = dict(interval=num_iters_per_epoch//2, max_keep_ckpts=3)
+runner = dict(
+    type='IterBasedRunner', max_iters=num_epochs * num_iters_per_epoch)
+load_from=None
+resume_from=None

projects/mmdet3d_plugin/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from .core.bbox.assigners.hungarian_assigner_3d import HungarianAssigner3D
+from .core.bbox.coders.nms_free_coder import NMSFreeCoder
+from .core.bbox.match_costs import BBox3DL1Cost
+from .core.hook import *
+from .datasets import CustomNuScenesDataset
+from .datasets.pipelines import *
+from .models.losses import *
+from .models.dense_heads import  *
+from .models.detectors import *
+from .models.necks import *
+from .models.backbones import *

projects/mmdet3d_plugin/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (656 Bytes). View file

projects/mmdet3d_plugin/core/apis/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .train import custom_train_model
+from .mmdet_train import custom_train_detector
+from .test import custom_multi_gpu_test

projects/mmdet3d_plugin/core/apis/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (365 Bytes). View file

projects/mmdet3d_plugin/core/apis/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (312 Bytes). View file

projects/mmdet3d_plugin/core/apis/__pycache__/mmdet_train.cpython-38.pyc ADDED Viewed

Binary file (4.57 kB). View file

projects/mmdet3d_plugin/core/apis/__pycache__/mmdet_train.cpython-39.pyc ADDED Viewed

Binary file (4.53 kB). View file

projects/mmdet3d_plugin/core/apis/__pycache__/test.cpython-38.pyc ADDED Viewed

Binary file (4.05 kB). View file

projects/mmdet3d_plugin/core/apis/__pycache__/test.cpython-39.pyc ADDED Viewed

Binary file (4.01 kB). View file

projects/mmdet3d_plugin/core/apis/__pycache__/train.cpython-38.pyc ADDED Viewed

Binary file (1.18 kB). View file

projects/mmdet3d_plugin/core/apis/__pycache__/train.cpython-39.pyc ADDED Viewed

Binary file (1.11 kB). View file

projects/mmdet3d_plugin/core/apis/mmdet_train.py ADDED Viewed

	@@ -0,0 +1,204 @@

+# ---------------------------------------------
+# Copyright (c) OpenMMLab. All rights reserved.
+# ---------------------------------------------
+#  Modified by Zhiqi Li
+# ---------------------------------------------
+# ---------------------------------------------
+#  Modified by Shihao Wang
+# ---------------------------------------------
+import random
+import warnings
+import numpy as np
+import torch
+import torch.distributed as dist
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import (HOOKS, DistSamplerSeedHook, EpochBasedRunner,
+                         Fp16OptimizerHook, OptimizerHook, build_optimizer,
+                         build_runner, get_dist_info)
+from mmcv.utils import build_from_cfg
+from mmdet.core import EvalHook
+from mmdet.datasets import (build_dataset,
+                            replace_ImageToTensor)
+from mmdet.utils import get_root_logger
+import time
+import os.path as osp
+from projects.mmdet3d_plugin.datasets.builder import build_dataloader
+from projects.mmdet3d_plugin.core.evaluation.eval_hooks import CustomDistEvalHook
+from projects.mmdet3d_plugin.datasets import custom_build_dataset
+def custom_train_detector(model,
+                   dataset,
+                   cfg,
+                   distributed=False,
+                   validate=False,
+                   timestamp=None,
+                   eval_model=None,
+                   meta=None):
+    logger = get_root_logger(cfg.log_level)
+    # prepare data loaders
+    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
+    #assert len(dataset)==1s
+    if 'imgs_per_gpu' in cfg.data:
+        logger.warning('"imgs_per_gpu" is deprecated in MMDet V2.0. '
+                       'Please use "samples_per_gpu" instead')
+        if 'samples_per_gpu' in cfg.data:
+            logger.warning(
+                f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and '
+                f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"'
+                f'={cfg.data.imgs_per_gpu} is used in this experiments')
+        else:
+            logger.warning(
+                'Automatically set "samples_per_gpu"="imgs_per_gpu"='
+                f'{cfg.data.imgs_per_gpu} in this experiments')
+        cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu
+    data_loaders = [
+        build_dataloader(
+            ds,
+            cfg.data.samples_per_gpu,
+            cfg.data.workers_per_gpu,
+            # cfg.gpus will be ignored if distributed
+            len(cfg.gpu_ids),
+            dist=distributed,
+            seed=cfg.seed,
+            shuffler_sampler=cfg.data.shuffler_sampler,  # dict(type='DistributedGroupSampler'),
+            nonshuffler_sampler=cfg.data.nonshuffler_sampler,  # dict(type='DistributedSampler'),
+            runner_type=cfg.runner,
+        ) for ds in dataset
+    ]
+    # put model on gpus
+    if distributed:
+        find_unused_parameters = cfg.get('find_unused_parameters', False)
+        # Sets the `find_unused_parameters` parameter in
+        # torch.nn.parallel.DistributedDataParallel
+        model = MMDistributedDataParallel(
+            model.cuda(),
+            device_ids=[torch.cuda.current_device()],
+            broadcast_buffers=False,
+            find_unused_parameters=find_unused_parameters)
+        if eval_model is not None:
+            eval_model = MMDistributedDataParallel(
+                eval_model.cuda(),
+                device_ids=[torch.cuda.current_device()],
+                broadcast_buffers=False,
+                find_unused_parameters=find_unused_parameters)
+    else:
+        model = MMDataParallel(
+            model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
+        if eval_model is not None:
+            eval_model = MMDataParallel(
+                eval_model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
+    # build runner
+    optimizer = build_optimizer(model, cfg.optimizer)
+    if 'runner' not in cfg:
+        cfg.runner = {
+            'type': 'EpochBasedRunner',
+            'max_epochs': cfg.total_epochs
+        }
+        warnings.warn(
+            'config is now expected to have a `runner` section, '
+            'please set `runner` in your config.', UserWarning)
+    else:
+        if 'total_epochs' in cfg:
+            assert cfg.total_epochs == cfg.runner.max_epochs
+    if eval_model is not None:
+        runner = build_runner(
+            cfg.runner,
+            default_args=dict(
+                model=model,
+                eval_model=eval_model,
+                optimizer=optimizer,
+                work_dir=cfg.work_dir,
+                logger=logger,
+                meta=meta))
+    else:
+        runner = build_runner(
+            cfg.runner,
+            default_args=dict(
+                model=model,
+                optimizer=optimizer,
+                work_dir=cfg.work_dir,
+                logger=logger,
+                meta=meta))
+    # an ugly workaround to make .log and .log.json filenames the same
+    runner.timestamp = timestamp
+    # fp16 setting
+    fp16_cfg = cfg.get('fp16', None)
+    if fp16_cfg is not None:
+        optimizer_config = Fp16OptimizerHook(
+            **cfg.optimizer_config, **fp16_cfg, distributed=distributed)
+    elif distributed and 'type' not in cfg.optimizer_config:
+        optimizer_config = OptimizerHook(**cfg.optimizer_config)
+    else:
+        optimizer_config = cfg.optimizer_config
+    # register hooks
+    runner.register_training_hooks(cfg.lr_config, optimizer_config,
+                                   cfg.checkpoint_config, cfg.log_config,
+                                   cfg.get('momentum_config', None))
+    # register profiler hook
+    #trace_config = dict(type='tb_trace', dir_name='work_dir')
+    #profiler_config = dict(on_trace_ready=trace_config)
+    #runner.register_profiler_hook(profiler_config)
+    if distributed:
+        if isinstance(runner, EpochBasedRunner):
+            runner.register_hook(DistSamplerSeedHook())
+    # register eval hooks
+    if validate:
+        # Support batch_size > 1 in validation
+        val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1)
+        if val_samples_per_gpu > 1:
+            assert False
+            # Replace 'ImageToTensor' to 'DefaultFormatBundle'
+            cfg.data.val.pipeline = replace_ImageToTensor(
+                cfg.data.val.pipeline)
+        val_dataset = custom_build_dataset(cfg.data.val, dict(test_mode=True))
+        val_dataloader = build_dataloader(
+            val_dataset,
+            samples_per_gpu=val_samples_per_gpu,
+            workers_per_gpu=cfg.data.workers_per_gpu,
+            dist=distributed,
+            shuffle=False,
+            shuffler_sampler=cfg.data.shuffler_sampler,  # dict(type='DistributedGroupSampler'),
+            nonshuffler_sampler=cfg.data.nonshuffler_sampler,  # dict(type='DistributedSampler'),
+        )
+        eval_cfg = cfg.get('evaluation', {})
+        eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner'
+        eval_cfg['jsonfile_prefix'] = osp.join('val', cfg.work_dir, time.ctime().replace(' ','_').replace(':','_'))
+        eval_hook = CustomDistEvalHook if distributed else EvalHook
+        runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
+    # user-defined hooks
+    if cfg.get('custom_hooks', None):
+        custom_hooks = cfg.custom_hooks
+        assert isinstance(custom_hooks, list), \
+            f'custom_hooks expect list type, but got {type(custom_hooks)}'
+        for hook_cfg in cfg.custom_hooks:
+            assert isinstance(hook_cfg, dict), \
+                'Each item in custom_hooks expects dict type, but got ' \
+                f'{type(hook_cfg)}'
+            hook_cfg = hook_cfg.copy()
+            priority = hook_cfg.pop('priority', 'NORMAL')
+            hook = build_from_cfg(hook_cfg, HOOKS)
+            runner.register_hook(hook, priority=priority)
+    if cfg.resume_from:
+        runner.resume(cfg.resume_from, resume_optimizer=cfg.get('resume_optimizer', True))
+    elif cfg.load_from:
+        runner.load_checkpoint(cfg.load_from)
+    runner.run(data_loaders, cfg.workflow)

projects/mmdet3d_plugin/core/apis/test.py ADDED Viewed

	@@ -0,0 +1,164 @@

+# ---------------------------------------------
+# Copyright (c) OpenMMLab. All rights reserved.
+# ---------------------------------------------
+#  Modified by Zhiqi Li
+# ---------------------------------------------
+import os.path as osp
+import pickle
+import shutil
+import tempfile
+import time
+import mmcv
+import torch
+import torch.distributed as dist
+from mmcv.image import tensor2imgs
+from mmcv.runner import get_dist_info
+from mmdet.core import encode_mask_results
+import mmcv
+import numpy as np
+import pycocotools.mask as mask_util
+def custom_encode_mask_results(mask_results):
+    """Encode bitmap mask to RLE code. Semantic Masks only
+    Args:
+        mask_results (list | tuple[list]): bitmap mask results.
+            In mask scoring rcnn, mask_results is a tuple of (segm_results,
+            segm_cls_score).
+    Returns:
+        list | tuple: RLE encoded mask.
+    """
+    cls_segms = mask_results
+    num_classes = len(cls_segms)
+    encoded_mask_results = []
+    for i in range(len(cls_segms)):
+        encoded_mask_results.append(
+            mask_util.encode(
+                np.array(
+                    cls_segms[i][:, :, np.newaxis], order='F',
+                        dtype='uint8'))[0])  # encoded with RLE
+    return [encoded_mask_results]
+def custom_multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
+    """Test model with multiple gpus.
+    This method tests model with multiple gpus and collects the results
+    under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
+    it encodes results to gpu tensors and use gpu communication for results
+    collection. On cpu mode it saves the results on different gpus to 'tmpdir'
+    and collects them by the rank 0 worker.
+    Args:
+        model (nn.Module): Model to be tested.
+        data_loader (nn.Dataloader): Pytorch data loader.
+        tmpdir (str): Path of directory to save the temporary results from
+            different gpus under cpu mode.
+        gpu_collect (bool): Option to use either gpu or cpu to collect results.
+    Returns:
+        list: The prediction results.
+    """
+    model.eval()
+    bbox_results = []
+    mask_results = []
+    dataset = data_loader.dataset
+    rank, world_size = get_dist_info()
+    if rank == 0:
+        prog_bar = mmcv.ProgressBar(len(dataset))
+    time.sleep(2)  # This line can prevent deadlock problem in some cases.
+    have_mask = False
+    for i, data in enumerate(data_loader):
+        with torch.no_grad():
+            result = model(return_loss=False, rescale=True, **data)
+            # encode mask results
+            if isinstance(result, dict):
+                if 'bbox_results' in result.keys():
+                    bbox_result = result['bbox_results']
+                    batch_size = len(result['bbox_results'])
+                    bbox_results.extend(bbox_result)
+                if 'mask_results' in result.keys() and result['mask_results'] is not None:
+                    mask_result = custom_encode_mask_results(result['mask_results'])
+                    mask_results.extend(mask_result)
+                    have_mask = True
+            else:
+                batch_size = len(result)
+                bbox_results.extend(result)
+            #if isinstance(result[0], tuple):
+            #    assert False, 'this code is for instance segmentation, which our code will not utilize.'
+            #    result = [(bbox_results, encode_mask_results(mask_results))
+            #              for bbox_results, mask_results in result]
+        if rank == 0:
+            for _ in range(batch_size * world_size):
+                prog_bar.update()
+    # collect results from all ranks
+    if gpu_collect:
+        bbox_results = collect_results_gpu(bbox_results, len(dataset))
+        if have_mask:
+            mask_results = collect_results_gpu(mask_results, len(dataset))
+        else:
+            mask_results = None
+    else:
+        bbox_results = collect_results_cpu(bbox_results, len(dataset), tmpdir)
+        tmpdir = tmpdir+'_mask' if tmpdir is not None else None
+        if have_mask:
+            mask_results = collect_results_cpu(mask_results, len(dataset), tmpdir)
+        else:
+            mask_results = None
+    if mask_results is None:
+        return bbox_results
+    return {'bbox_results': bbox_results, 'mask_results': mask_results}
+def collect_results_cpu(result_part, size, tmpdir=None):
+    rank, world_size = get_dist_info()
+    # create a tmp dir if it is not specified
+    if tmpdir is None:
+        MAX_LEN = 512
+        # 32 is whitespace
+        dir_tensor = torch.full((MAX_LEN, ),
+                                32,
+                                dtype=torch.uint8,
+                                device='cuda')
+        if rank == 0:
+            mmcv.mkdir_or_exist('.dist_test')
+            tmpdir = tempfile.mkdtemp(dir='.dist_test')
+            tmpdir = torch.tensor(
+                bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
+            dir_tensor[:len(tmpdir)] = tmpdir
+        dist.broadcast(dir_tensor, 0)
+        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
+    else:
+        mmcv.mkdir_or_exist(tmpdir)
+    # dump the part result to the dir
+    mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))
+    dist.barrier()
+    # collect all parts
+    if rank != 0:
+        return None
+    else:
+        # load results of all parts from tmp dir
+        part_list = []
+        for i in range(world_size):
+            part_file = osp.join(tmpdir, f'part_{i}.pkl')
+            part_list.append(mmcv.load(part_file))
+        # sort the results
+        ordered_results = []
+        '''
+        bacause we change the sample of the evaluation stage to make sure that each gpu will handle continuous sample,
+        '''
+        #for res in zip(*part_list):
+        for res in part_list:
+            ordered_results.extend(list(res))
+        # the dataloader may pad some samples
+        ordered_results = ordered_results[:size]
+        # remove tmp dir
+        shutil.rmtree(tmpdir)
+        return ordered_results
+def collect_results_gpu(result_part, size):
+    collect_results_cpu(result_part, size)

projects/mmdet3d_plugin/core/apis/train.py ADDED Viewed

	@@ -0,0 +1,70 @@

+# ---------------------------------------------
+# Copyright (c) OpenMMLab. All rights reserved.
+# ---------------------------------------------
+#  Modified by Zhiqi Li
+# ---------------------------------------------
+# ---------------------------------------------
+#  Modified by Shihao Wang
+# ---------------------------------------------
+from .mmdet_train import custom_train_detector
+from mmseg.apis import train_segmentor
+from mmdet.apis import train_detector
+def custom_train_model(model,
+                dataset,
+                cfg,
+                distributed=False,
+                validate=False,
+                timestamp=None,
+                eval_model=None,
+                meta=None):
+    """A function wrapper for launching model training according to cfg.
+    Because we need different eval_hook in runner. Should be deprecated in the
+    future.
+    """
+    if cfg.model.type in ['EncoderDecoder3D']:
+        assert False
+    else:
+        custom_train_detector(
+            model,
+            dataset,
+            cfg,
+            distributed=distributed,
+            validate=validate,
+            timestamp=timestamp,
+            eval_model=eval_model,
+            meta=meta)
+def train_model(model,
+                dataset,
+                cfg,
+                distributed=False,
+                validate=False,
+                timestamp=None,
+                meta=None):
+    """A function wrapper for launching model training according to cfg.
+    Because we need different eval_hook in runner. Should be deprecated in the
+    future.
+    """
+    if cfg.model.type in ['EncoderDecoder3D']:
+        train_segmentor(
+            model,
+            dataset,
+            cfg,
+            distributed=distributed,
+            validate=validate,
+            timestamp=timestamp,
+            meta=meta)
+    else:
+        train_detector(
+            model,
+            dataset,
+            cfg,
+            distributed=distributed,
+            validate=validate,
+            timestamp=timestamp,
+            meta=meta)

projects/mmdet3d_plugin/core/bbox/__pycache__/util.cpython-38.pyc ADDED Viewed

Binary file (1.39 kB). View file

projects/mmdet3d_plugin/core/bbox/__pycache__/util.cpython-39.pyc ADDED Viewed

Binary file (1.33 kB). View file

projects/mmdet3d_plugin/core/bbox/assigners/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .hungarian_assigner_3d import HungarianAssigner3D
+from .hungarian_assigner_2d import HungarianAssigner2D
+from .map_assigner import  LaneHungarianAssigner
+__all__ = ['HungarianAssigner3D', 'HungarianAssigner2D']

projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (437 Bytes). View file

projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (384 Bytes). View file

projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_2d.cpython-38.pyc ADDED Viewed

Binary file (5.6 kB). View file

projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_2d.cpython-39.pyc ADDED Viewed

Binary file (5.52 kB). View file

projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-38.pyc ADDED Viewed

Binary file (2.4 kB). View file

projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-39.pyc ADDED Viewed

Binary file (2.31 kB). View file

projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/map_assigner.cpython-38.pyc ADDED Viewed

Binary file (1.6 kB). View file

projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/map_assigner.cpython-39.pyc ADDED Viewed

Binary file (1.52 kB). View file

projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_2d.py ADDED Viewed

	@@ -0,0 +1,158 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# ---------------------------------------------
+#  Modified by Shihao Wang
+# ---------------------------------------------
+import torch
+from mmdet.core.bbox.builder import BBOX_ASSIGNERS
+from mmdet.core.bbox.assigners import AssignResult
+from mmdet.core.bbox.assigners import BaseAssigner
+from mmdet.core.bbox.match_costs import build_match_cost
+from mmdet.core import bbox_cxcywh_to_xyxy
+try:
+    from scipy.optimize import linear_sum_assignment
+except ImportError:
+    linear_sum_assignment = None
+@BBOX_ASSIGNERS.register_module()
+class HungarianAssigner2D(BaseAssigner):
+    """Computes one-to-one matching between predictions and ground truth.
+    This class computes an assignment between the targets and the predictions
+    based on the costs. The costs are weighted sum of three components:
+    classification cost, regression L1 cost and regression iou cost. The
+    targets don't include the no_object, so generally there are more
+    predictions than targets. After the one-to-one matching, the un-matched
+    are treated as backgrounds. Thus each query prediction will be assigned
+    with `0` or a positive integer indicating the ground truth index:
+    - 0: negative sample, no assigned gt
+    - positive integer: positive sample, index (1-based) of assigned gt
+    Args:
+        cls_weight (int | float, optional): The scale factor for classification
+            cost. Default 1.0.
+        bbox_weight (int | float, optional): The scale factor for regression
+            L1 cost. Default 1.0.
+        iou_weight (int | float, optional): The scale factor for regression
+            iou cost. Default 1.0.
+        iou_calculator (dict | optional): The config for the iou calculation.
+            Default type `BboxOverlaps2D`.
+        iou_mode (str | optional): "iou" (intersection over union), "iof"
+                (intersection over foreground), or "giou" (generalized
+                intersection over union). Default "giou".
+    """
+    def __init__(self,
+                 cls_cost=dict(type='ClassificationCost', weight=1.),
+                 reg_cost=dict(type='BBoxL1Cost', weight=1.0),
+                 iou_cost=dict(type='IoUCost', iou_mode='giou', weight=1.0),
+                 centers2d_cost=dict(type='BBox3DL1Cost', weight=1.0)):
+        self.cls_cost = build_match_cost(cls_cost)
+        self.reg_cost = build_match_cost(reg_cost)
+        self.iou_cost = build_match_cost(iou_cost)
+        self.centers2d_cost = build_match_cost(centers2d_cost)
+    def assign(self,
+               bbox_pred,
+               cls_pred,
+               pred_centers2d,
+               gt_bboxes,
+               gt_labels,
+               centers2d,
+               img_meta,
+               gt_bboxes_ignore=None,
+               eps=1e-7):
+        """Computes one-to-one matching based on the weighted costs.
+        This method assign each query prediction to a ground truth or
+        background. The `assigned_gt_inds` with -1 means don't care,
+        0 means negative sample, and positive number is the index (1-based)
+        of assigned gt.
+        The assignment is done in the following steps, the order matters.
+        1. assign every prediction to -1
+        2. compute the weighted costs
+        3. do Hungarian matching on CPU based on the costs
+        4. assign all to 0 (background) first, then for each matched pair
+           between predictions and gts, treat this prediction as foreground
+           and assign the corresponding gt index (plus 1) to it.
+        Args:
+            bbox_pred (Tensor): Predicted boxes with normalized coordinates
+                (cx, cy, w, h), which are all in range [0, 1]. Shape
+                [num_query, 4].
+            cls_pred (Tensor): Predicted classification logits, shape
+                [num_query, num_class].
+            gt_bboxes (Tensor): Ground truth boxes with unnormalized
+                coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
+            gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
+            img_meta (dict): Meta information for current image.
+            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
+                labelled as `ignored`. Default None.
+            eps (int | float, optional): A value added to the denominator for
+                numerical stability. Default 1e-7.
+        Returns:
+            :obj:`AssignResult`: The assigned result.
+        """
+        assert gt_bboxes_ignore is None, \
+            'Only case when gt_bboxes_ignore is None is supported.'
+        num_gts, num_bboxes = gt_bboxes.size(0), bbox_pred.size(0)
+        # 1. assign -1 by default
+        assigned_gt_inds = bbox_pred.new_full((num_bboxes, ),
+                                              -1,
+                                              dtype=torch.long)
+        assigned_labels = bbox_pred.new_full((num_bboxes, ),
+                                             -1,
+                                             dtype=torch.long)
+        if num_gts == 0 or num_bboxes == 0:
+            # No ground truth or boxes, return empty assignment
+            if num_gts == 0:
+                # No ground truth, assign all to background
+                assigned_gt_inds[:] = 0
+            return AssignResult(
+                num_gts, assigned_gt_inds, None, labels=assigned_labels)
+        img_h, img_w, _ = img_meta['pad_shape']
+        factor = gt_bboxes.new_tensor([img_w, img_h, img_w,
+                                       img_h]).unsqueeze(0)
+        # 2. compute the weighted costs
+        # classification and bboxcost.
+        cls_cost = self.cls_cost(cls_pred, gt_labels)
+        # regression L1 cost
+        normalize_gt_bboxes = gt_bboxes / factor
+        reg_cost = self.reg_cost(bbox_pred, normalize_gt_bboxes)
+        # regression iou cost, defaultly giou is used in official DETR.
+        bboxes = bbox_cxcywh_to_xyxy(bbox_pred) * factor
+        iou_cost = self.iou_cost(bboxes, gt_bboxes)
+        # center2d L1 cost
+        normalize_centers2d = centers2d / factor[:, 0:2]
+        centers2d_cost = self.centers2d_cost(pred_centers2d, normalize_centers2d)
+        # weighted sum of above four costs
+        cost = cls_cost + reg_cost + iou_cost + centers2d_cost
+        cost = torch.nan_to_num(cost, nan=100.0, posinf=100.0, neginf=-100.0)
+        # 3. do Hungarian matching on CPU using linear_sum_assignment
+        cost = cost.detach().cpu()
+        if linear_sum_assignment is None:
+            raise ImportError('Please run "pip install scipy" '
+                              'to install scipy first.')
+        matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
+        matched_row_inds = torch.from_numpy(matched_row_inds).to(
+            bbox_pred.device)
+        matched_col_inds = torch.from_numpy(matched_col_inds).to(
+            bbox_pred.device)
+        # 4. assign backgrounds and foregrounds
+        # assign all indices to backgrounds first
+        assigned_gt_inds[:] = 0
+        # assign foregrounds based on matching results
+        assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
+        assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
+        return AssignResult(
+            num_gts, assigned_gt_inds, None, labels=assigned_labels)

projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py ADDED Viewed

	@@ -0,0 +1,91 @@

+# ------------------------------------------------------------------------
+# Modified from DETR3D (https://github.com/WangYueFt/detr3d)
+# Copyright (c) 2021 Wang, Yue
+# ------------------------------------------------------------------------
+import torch
+from mmdet.core.bbox.builder import BBOX_ASSIGNERS
+from mmdet.core.bbox.assigners import AssignResult
+from mmdet.core.bbox.assigners import BaseAssigner
+from mmdet.core.bbox.match_costs import build_match_cost
+from projects.mmdet3d_plugin.core.bbox.util import normalize_bbox
+try:
+    from scipy.optimize import linear_sum_assignment
+except ImportError:
+    linear_sum_assignment = None
+@BBOX_ASSIGNERS.register_module()
+class HungarianAssigner3D(BaseAssigner):
+    def __init__(self,
+                 cls_cost=dict(type='ClassificationCost', weight=1.),
+                 reg_cost=dict(type='BBoxL1Cost', weight=1.0),
+                 iou_cost=dict(type='IoUCost', weight=0.0),
+                 pc_range=None):
+        self.cls_cost = build_match_cost(cls_cost)
+        self.reg_cost = build_match_cost(reg_cost)
+        self.iou_cost = build_match_cost(iou_cost)
+        self.pc_range = pc_range
+    def assign(self,
+               bbox_pred,
+               cls_pred,
+               gt_bboxes,
+               gt_labels,
+               gt_bboxes_ignore=None,
+               code_weights=None,
+               with_velo=False,
+               eps=1e-7):
+        assert gt_bboxes_ignore is None, \
+            'Only case when gt_bboxes_ignore is None is supported.'
+        num_gts, num_bboxes = gt_bboxes.size(0), bbox_pred.size(0)
+        # 1. assign -1 by default
+        assigned_gt_inds = bbox_pred.new_full((num_bboxes, ),
+                                              -1,
+                                              dtype=torch.long)
+        assigned_labels = bbox_pred.new_full((num_bboxes, ),
+                                             -1,
+                                             dtype=torch.long)
+        if num_gts == 0 or num_bboxes == 0:
+            # No ground truth or boxes, return empty assignment
+            if num_gts == 0:
+                # No ground truth, assign all to background
+                assigned_gt_inds[:] = 0
+            return AssignResult(
+                num_gts, assigned_gt_inds, None, labels=assigned_labels)
+        # 2. compute the weighted costs
+        # classification and bboxcost.
+        cls_cost = self.cls_cost(cls_pred, gt_labels)
+        # regression L1 cost
+        normalized_gt_bboxes = normalize_bbox(gt_bboxes, self.pc_range)
+        if code_weights is not None:
+            bbox_pred = bbox_pred * code_weights
+            normalized_gt_bboxes = normalized_gt_bboxes * code_weights
+        if with_velo:
+            reg_cost = self.reg_cost(bbox_pred, normalized_gt_bboxes)
+        else:
+            reg_cost = self.reg_cost(bbox_pred[:, :8], normalized_gt_bboxes[:, :8])
+        # weighted sum of above two costs
+        cost = cls_cost + reg_cost
+        # 3. do Hungarian matching on CPU using linear_sum_assignment
+        cost = cost.detach().cpu()
+        if linear_sum_assignment is None:
+            raise ImportError('Please run "pip install scipy" '
+                              'to install scipy first.')
+        cost = torch.nan_to_num(cost, nan=100.0, posinf=100.0, neginf=-100.0)
+        matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
+        matched_row_inds = torch.from_numpy(matched_row_inds).to(
+            bbox_pred.device)
+        matched_col_inds = torch.from_numpy(matched_col_inds).to(
+            bbox_pred.device)
+        # 4. assign backgrounds and foregrounds
+        # assign all indices to backgrounds first
+        assigned_gt_inds[:] = 0
+        # assign foregrounds based on matching results
+        assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
+        assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
+        return AssignResult(
+            num_gts, assigned_gt_inds, None, labels=assigned_labels)

projects/mmdet3d_plugin/core/bbox/assigners/map_assigner.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import torch
+from mmdet.core.bbox.builder import BBOX_ASSIGNERS
+from mmdet.core.bbox.assigners import HungarianAssigner, AssignResult
+try:
+    from scipy.optimize import linear_sum_assignment
+except ImportError:
+    linear_sum_assignment = None
+@BBOX_ASSIGNERS.register_module()
+class LaneHungarianAssigner(HungarianAssigner):
+    def assign(self,
+               lane_pred,
+               cls_pred,
+               gt_lanes,
+               gt_labels,
+               img_meta,
+               gt_lanes_ignore=None,
+               eps=1e-7):
+        assert gt_lanes_ignore is None, \
+            'Only case when gt_lanes_ignore is None is supported.'
+        num_gts, num_lanes = gt_lanes.size(0), lane_pred.size(0)
+        # 1. assign -1 by default
+        assigned_gt_inds = lane_pred.new_full((num_lanes, ),
+                                              -1,
+                                              dtype=torch.long)
+        assigned_labels = lane_pred.new_full((num_lanes, ),
+                                             -1,
+                                             dtype=torch.long)
+        if num_gts == 0 or num_lanes == 0:
+            # No ground truth or boxes, return empty assignment
+            if num_gts == 0:
+                # No ground truth, assign all to background
+                assigned_gt_inds[:] = 0
+            return AssignResult(
+                num_gts, assigned_gt_inds, None, labels=assigned_labels)
+        # 2. compute the weighted costs
+        # classification and lanecost.
+        cls_cost = self.cls_cost(cls_pred, gt_labels)
+        # regression L1 cost
+        reg_cost = self.reg_cost(lane_pred, gt_lanes)
+        # weighted sum of above three costs
+        cost = cls_cost + reg_cost
+        # 3. do Hungarian matching on CPU using linear_sum_assignment
+        cost = torch.nan_to_num(cost)
+        cost = cost.detach().cpu()
+        matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
+        matched_row_inds = torch.from_numpy(matched_row_inds).to(
+            lane_pred.device)
+        matched_col_inds = torch.from_numpy(matched_col_inds).to(
+            lane_pred.device)
+        # 4. assign backgrounds and foregrounds
+        # assign all indices to backgrounds first
+        assigned_gt_inds[:] = 0
+        # assign foregrounds based on matching results
+        assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
+        assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
+        return AssignResult(
+            num_gts, assigned_gt_inds, None, labels=assigned_labels)

projects/mmdet3d_plugin/core/bbox/coders/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .nms_free_coder import NMSFreeCoder
2	+ __all__ = ['NMSFreeCoder']

projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (290 Bytes). View file

projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (237 Bytes). View file

projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-38.pyc ADDED Viewed

Binary file (3.74 kB). View file

projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-39.pyc ADDED Viewed

Binary file (3.68 kB). View file

projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import torch
+from mmdet.core.bbox import BaseBBoxCoder
+from mmdet.core.bbox.builder import BBOX_CODERS
+from projects.mmdet3d_plugin.core.bbox.util import denormalize_bbox
+@BBOX_CODERS.register_module()
+class NMSFreeCoder(BaseBBoxCoder):
+    """Bbox coder for NMS-free detector.
+    Args:
+        pc_range (list[float]): Range of point cloud.
+        post_center_range (list[float]): Limit of the center.
+            Default: None.
+        max_num (int): Max number to be kept. Default: 100.
+        score_threshold (float): Threshold to filter boxes based on score.
+            Default: None.
+        code_size (int): Code size of bboxes. Default: 9
+    """
+    def __init__(self,
+                 pc_range,
+                 voxel_size=None,
+                 post_center_range=None,
+                 max_num=100,
+                 score_threshold=None,
+                 num_classes=10):
+        self.pc_range = pc_range
+        self.voxel_size = voxel_size
+        self.post_center_range = post_center_range
+        self.max_num = max_num
+        self.score_threshold = score_threshold
+        self.num_classes = num_classes
+    def encode(self):
+        pass
+    def decode_single(self, cls_scores, bbox_preds):
+        """Decode bboxes.
+        Args:
+            cls_scores (Tensor): Outputs from the classification head, \
+                shape [num_query, cls_out_channels]. Note \
+                cls_out_channels should includes background.
+            bbox_preds (Tensor): Outputs from the regression \
+                head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
+                Shape [num_query, 9].
+        Returns:
+            list[dict]: Decoded boxes.
+        """
+        max_num = self.max_num
+        cls_scores = cls_scores.sigmoid()
+        scores, indexs = cls_scores.view(-1).topk(max_num)
+        labels = indexs % self.num_classes
+        bbox_index = torch.div(indexs, self.num_classes, rounding_mode='floor')
+        bbox_preds = bbox_preds[bbox_index]
+        final_box_preds = denormalize_bbox(bbox_preds, self.pc_range)
+        final_scores = scores
+        final_preds = labels
+        # use score threshold
+        if self.score_threshold is not None:
+            thresh_mask = final_scores >= self.score_threshold
+        if self.post_center_range is not None:
+            self.post_center_range = torch.tensor(self.post_center_range, device=scores.device)
+            mask = (final_box_preds[..., :3] >=
+                    self.post_center_range[:3]).all(1)
+            mask &= (final_box_preds[..., :3] <=
+                     self.post_center_range[3:]).all(1)
+            if self.score_threshold:
+                mask &= thresh_mask
+            boxes3d = final_box_preds[mask]
+            scores = final_scores[mask]
+            labels = final_preds[mask]
+            predictions_dict = {
+                'bboxes': boxes3d,
+                'scores': scores,
+                'labels': labels
+            }
+        else:
+            raise NotImplementedError(
+                'Need to reorganize output as a batch, only '
+                'support post_center_range is not None for now!')
+        return predictions_dict
+    def decode(self, preds_dicts):
+        """Decode bboxes.
+        Args:
+            all_cls_scores (Tensor): Outputs from the classification head, \
+                shape [nb_dec, bs, num_query, cls_out_channels]. Note \
+                cls_out_channels should includes background.
+            all_bbox_preds (Tensor): Sigmoid outputs from the regression \
+                head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
+                Shape [nb_dec, bs, num_query, 9].
+        Returns:
+            list[dict]: Decoded boxes.
+        """
+        all_cls_scores = preds_dicts['all_cls_scores'][-1]
+        all_bbox_preds = preds_dicts['all_bbox_preds'][-1]
+        batch_size = all_cls_scores.size()[0]
+        predictions_list = []
+        for i in range(batch_size):
+            predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i]))
+        return predictions_list

projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from mmdet.core.bbox.match_costs import build_match_cost
+from .match_cost import BBox3DL1Cost
+__all__ = ['build_match_cost', 'BBox3DL1Cost']

projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (371 Bytes). View file

projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (318 Bytes). View file

projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-38.pyc ADDED Viewed

Binary file (1.97 kB). View file

projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-39.pyc ADDED Viewed

Binary file (1.91 kB). View file