mins commited on
Commit
c501468
Β·
1 Parent(s): 86755c6

eva_base_tiny

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. iter_21096.pth +3 -0
  2. {finetune-8b-llava-llama3-evabase640-petrv3 β†’ pretrain_tiny}/config.json +0 -0
  3. {finetune-8b-llava-llama3-evabase640-petrv3 β†’ pretrain_tiny}/generation_config.json +0 -0
  4. {finetune-8b-llava-llama3-evabase640-petrv3 β†’ pretrain_tiny}/model.safetensors +0 -0
  5. {finetune-8b-llava-llama3-evabase640-petrv3 β†’ pretrain_tiny}/special_tokens_map.json +0 -0
  6. {finetune-8b-llava-llama3-evabase640-petrv3 β†’ pretrain_tiny}/tokenizer.model +0 -0
  7. {finetune-8b-llava-llama3-evabase640-petrv3 β†’ pretrain_tiny}/tokenizer_config.json +0 -0
  8. {finetune-8b-llava-llama3-evabase640-petrv3 β†’ pretrain_tiny}/trainer_state.json +0 -0
  9. {finetune-8b-llava-llama3-evabase640-petrv3 β†’ pretrain_tiny}/training_args.bin +0 -0
  10. projects/configs/OmniDrive/eva_base_tinyllama.py +294 -0
  11. projects/configs/OmniDrive/eva_large_llama7b.py +296 -0
  12. projects/mmdet3d_plugin/__init__.py +11 -0
  13. projects/mmdet3d_plugin/__pycache__/__init__.cpython-38.pyc +0 -0
  14. projects/mmdet3d_plugin/core/apis/__init__.py +3 -0
  15. projects/mmdet3d_plugin/core/apis/__pycache__/__init__.cpython-38.pyc +0 -0
  16. projects/mmdet3d_plugin/core/apis/__pycache__/__init__.cpython-39.pyc +0 -0
  17. projects/mmdet3d_plugin/core/apis/__pycache__/mmdet_train.cpython-38.pyc +0 -0
  18. projects/mmdet3d_plugin/core/apis/__pycache__/mmdet_train.cpython-39.pyc +0 -0
  19. projects/mmdet3d_plugin/core/apis/__pycache__/test.cpython-38.pyc +0 -0
  20. projects/mmdet3d_plugin/core/apis/__pycache__/test.cpython-39.pyc +0 -0
  21. projects/mmdet3d_plugin/core/apis/__pycache__/train.cpython-38.pyc +0 -0
  22. projects/mmdet3d_plugin/core/apis/__pycache__/train.cpython-39.pyc +0 -0
  23. projects/mmdet3d_plugin/core/apis/mmdet_train.py +204 -0
  24. projects/mmdet3d_plugin/core/apis/test.py +164 -0
  25. projects/mmdet3d_plugin/core/apis/train.py +70 -0
  26. projects/mmdet3d_plugin/core/bbox/__pycache__/util.cpython-38.pyc +0 -0
  27. projects/mmdet3d_plugin/core/bbox/__pycache__/util.cpython-39.pyc +0 -0
  28. projects/mmdet3d_plugin/core/bbox/assigners/__init__.py +4 -0
  29. projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-38.pyc +0 -0
  30. projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-39.pyc +0 -0
  31. projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_2d.cpython-38.pyc +0 -0
  32. projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_2d.cpython-39.pyc +0 -0
  33. projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-38.pyc +0 -0
  34. projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-39.pyc +0 -0
  35. projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/map_assigner.cpython-38.pyc +0 -0
  36. projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/map_assigner.cpython-39.pyc +0 -0
  37. projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_2d.py +158 -0
  38. projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py +91 -0
  39. projects/mmdet3d_plugin/core/bbox/assigners/map_assigner.py +63 -0
  40. projects/mmdet3d_plugin/core/bbox/coders/__init__.py +2 -0
  41. projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-38.pyc +0 -0
  42. projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-39.pyc +0 -0
  43. projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-38.pyc +0 -0
  44. projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-39.pyc +0 -0
  45. projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py +111 -0
  46. projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py +4 -0
  47. projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-38.pyc +0 -0
  48. projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-39.pyc +0 -0
  49. projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-38.pyc +0 -0
  50. projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-39.pyc +0 -0
iter_21096.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3119e1ca3d54933c48df1409537879079a492895c7c36f4f7ae47c223ceb8de7
3
+ size 14575027161
{finetune-8b-llava-llama3-evabase640-petrv3 β†’ pretrain_tiny}/config.json RENAMED
File without changes
{finetune-8b-llava-llama3-evabase640-petrv3 β†’ pretrain_tiny}/generation_config.json RENAMED
File without changes
{finetune-8b-llava-llama3-evabase640-petrv3 β†’ pretrain_tiny}/model.safetensors RENAMED
File without changes
{finetune-8b-llava-llama3-evabase640-petrv3 β†’ pretrain_tiny}/special_tokens_map.json RENAMED
File without changes
{finetune-8b-llava-llama3-evabase640-petrv3 β†’ pretrain_tiny}/tokenizer.model RENAMED
File without changes
{finetune-8b-llava-llama3-evabase640-petrv3 β†’ pretrain_tiny}/tokenizer_config.json RENAMED
File without changes
{finetune-8b-llava-llama3-evabase640-petrv3 β†’ pretrain_tiny}/trainer_state.json RENAMED
File without changes
{finetune-8b-llava-llama3-evabase640-petrv3 β†’ pretrain_tiny}/training_args.bin RENAMED
File without changes
projects/configs/OmniDrive/eva_base_tinyllama.py ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../../../mmdetection3d/configs/_base_/datasets/nus-3d.py',
3
+ '../../../mmdetection3d/configs/_base_/default_runtime.py'
4
+ ]
5
+ backbone_norm_cfg = dict(type='LN', requires_grad=True)
6
+ plugin=True
7
+ plugin_dir='projects/mmdet3d_plugin/'
8
+
9
+ # If point cloud range is changed, the models should also change their point
10
+ # cloud range accordingly
11
+ point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
12
+ voxel_size = [0.2, 0.2, 8]
13
+ img_norm_cfg = dict(
14
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
15
+ # For nuScenes we usually do 10-class detection
16
+ class_names = [
17
+ 'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
18
+ 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
19
+ ]
20
+
21
+ num_gpus = 8
22
+ batch_size = 2
23
+ num_iters_per_epoch = 28130 // (num_gpus * batch_size)
24
+ num_epochs = 12
25
+ llm_path = 'ckpts/pretrain_tiny'
26
+
27
+ collect_keys=['lidar2img', 'intrinsics', 'extrinsics','timestamp', 'img_timestamp', 'ego_pose', 'ego_pose_inv', 'command', 'can_bus']
28
+ input_modality = dict(
29
+ use_lidar=False,
30
+ use_camera=True,
31
+ use_radar=False,
32
+ use_map=False,
33
+ use_external=True)
34
+ model = dict(
35
+ type='Petr3D',
36
+ save_path='./results_planning_tiny/', #save path for vlm models.
37
+ use_grid_mask=True,
38
+ frozen=False,
39
+ use_lora=False,
40
+ tokenizer=llm_path,
41
+ lm_head=llm_path, # set to None if don't use llm head
42
+ img_backbone=dict(
43
+ type='EVAViT',
44
+ img_size=640,
45
+ patch_size=16,
46
+ window_size=16,
47
+ in_chans=3,
48
+ embed_dim=768,
49
+ depth=12,
50
+ num_heads=12,
51
+ mlp_ratio=4*2/3,
52
+ window_block_indexes=(0, 1, 3, 4, 6, 7, 9, 10),
53
+ qkv_bias=True,
54
+ drop_path_rate=0.1,
55
+ flash_attn=True,
56
+ with_cp=True,
57
+ frozen=False),
58
+ map_head=dict(
59
+ type='PETRHeadM',
60
+ num_classes=1,
61
+ in_channels=768,
62
+ out_dims=2048,
63
+ memory_len=600,
64
+ with_mask=True, # map query can't see vlm tokens
65
+ topk_proposals=300,
66
+ num_lane=1800, # 300+1500
67
+ num_lanes_one2one=300,
68
+ k_one2many=5,
69
+ lambda_one2many=1.0,
70
+ num_extra=256,
71
+ n_control=11,
72
+ pc_range=point_cloud_range,
73
+ code_weights = [1.0, 1.0],
74
+ transformer=dict(
75
+ type='PETRTemporalTransformer',
76
+ input_dimension=256,
77
+ output_dimension=256,
78
+ num_layers=6,
79
+ embed_dims=256,
80
+ num_heads=8,
81
+ feedforward_dims=2048,
82
+ dropout=0.1,
83
+ with_cp=True,
84
+ flash_attn=True,),
85
+ train_cfg=dict(
86
+ assigner=dict(
87
+ type='LaneHungarianAssigner',
88
+ cls_cost=dict(type='FocalLossCost', weight=1.5),
89
+ reg_cost=dict(type='LaneL1Cost', weight=0.02),
90
+ iou_cost=dict(type='IoUCost', weight=0.0))), # dummy
91
+ loss_cls=dict(
92
+ type='FocalLoss',
93
+ use_sigmoid=True,
94
+ gamma=2.0,
95
+ alpha=0.25,
96
+ loss_weight=1.5),
97
+ loss_bbox=dict(type='L1Loss', loss_weight=0.02),
98
+ loss_dir=dict(type='PtsDirCosLoss', loss_weight=0.0)), #
99
+ pts_bbox_head=dict(
100
+ type='StreamPETRHead',
101
+ num_classes=10,
102
+ in_channels=768,
103
+ out_dims=2048,
104
+ num_query=600,
105
+ with_mask=True,
106
+ memory_len=600,
107
+ topk_proposals=300,
108
+ num_propagated=300,
109
+ num_extra=256,
110
+ n_control=11, # align with centerline query defination
111
+ match_with_velo=False,
112
+ scalar=10, ##noise groups
113
+ noise_scale = 1.0,
114
+ dn_weight= 1.0, ##dn loss weight
115
+ split = 0.75, ###positive rate
116
+ code_weights = [2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
117
+ transformer=dict(
118
+ type='PETRTemporalTransformer',
119
+ input_dimension=256,
120
+ output_dimension=256,
121
+ num_layers=6,
122
+ embed_dims=256,
123
+ num_heads=8,
124
+ feedforward_dims=2048,
125
+ dropout=0.1,
126
+ with_cp=True,
127
+ flash_attn=True,
128
+ ),
129
+ bbox_coder=dict(
130
+ type='NMSFreeCoder',
131
+ post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
132
+ pc_range=point_cloud_range,
133
+ max_num=300,
134
+ voxel_size=voxel_size,
135
+ num_classes=10),
136
+ loss_cls=dict(
137
+ type='FocalLoss',
138
+ use_sigmoid=True,
139
+ gamma=2.0,
140
+ alpha=0.25,
141
+ loss_weight=2.0),
142
+ loss_bbox=dict(type='L1Loss', loss_weight=0.25),
143
+ loss_iou=dict(type='GIoULoss', loss_weight=0.0),),
144
+ # model training and testing settings
145
+ train_cfg=dict(pts=dict(
146
+ grid_size=[512, 512, 1],
147
+ voxel_size=voxel_size,
148
+ point_cloud_range=point_cloud_range,
149
+ out_size_factor=4,
150
+ assigner=dict(
151
+ type='HungarianAssigner3D',
152
+ cls_cost=dict(type='FocalLossCost', weight=2.0),
153
+ reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
154
+ iou_cost=dict(type='IoUCost', weight=0.0), # Fake cost. This is just to make it compatible with DETR head.
155
+ pc_range=point_cloud_range),)
156
+ )
157
+ )
158
+
159
+
160
+ dataset_type = 'CustomNuScenesDataset'
161
+ data_root = './data/nuscenes/'
162
+
163
+ file_client_args = dict(backend='disk')
164
+
165
+
166
+ ida_aug_conf = {
167
+ "resize_lim": (0.37, 0.45),
168
+ "final_dim": (320, 640),
169
+ "bot_pct_lim": (0.0, 0.0),
170
+ "rot_lim": (0.0, 0.0),
171
+ "H": 900,
172
+ "W": 1600,
173
+ "rand_flip": False,
174
+ }
175
+
176
+ train_pipeline = [
177
+ dict(type='LoadMultiViewImageFromFiles', to_float32=True),
178
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_bbox=True,
179
+ with_label=True, with_bbox_depth=True),
180
+ dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
181
+ dict(type='ObjectNameFilter', classes=class_names),
182
+ dict(type='ResizeCropFlipRotImage', data_aug_conf = ida_aug_conf, training=True),
183
+ dict(type='ResizeMultiview3D', img_scale=(640, 640), keep_ratio=False, multiscale_mode='value'),
184
+ dict(type='LoadAnnoatationVQA',
185
+ base_vqa_path='./data/nuscenes/vqa/train/',
186
+ base_desc_path='./data/nuscenes/desc/train/',
187
+ base_conv_path='./data/nuscenes/conv/train/',
188
+ base_key_path='./data/nuscenes/keywords/train/',
189
+ tokenizer=llm_path,
190
+ max_length=2048,
191
+ ignore_type=[],
192
+ lane_objs_info="./data/nuscenes/lane_obj_train.pkl"),
193
+ dict(type='NormalizeMultiviewImage', **img_norm_cfg),
194
+ dict(type='PadMultiViewImage', size_divisor=32),
195
+ dict(type='PETRFormatBundle3D', class_names=class_names, collect_keys=collect_keys + ['prev_exists']),
196
+ dict(type='Collect3D', keys=['lane_pts', 'input_ids', 'vlm_labels', 'gt_bboxes_3d', 'gt_labels_3d', 'img', 'gt_bboxes', 'gt_labels', 'centers2d', 'depths', 'prev_exists'] + collect_keys,
197
+ meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape', 'scale_factor', 'flip', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg', 'scene_token', 'gt_bboxes_3d','gt_labels_3d'))
198
+ ]
199
+ test_pipeline = [
200
+ dict(type='LoadMultiViewImageFromFiles', to_float32=True),
201
+ dict(type='ResizeCropFlipRotImage', data_aug_conf = ida_aug_conf, training=False),
202
+ dict(type='ResizeMultiview3D', img_scale=(640, 640), keep_ratio=False, multiscale_mode='value'),
203
+ dict(type='NormalizeMultiviewImage', **img_norm_cfg),
204
+ dict(type='PadMultiViewImage', size_divisor=32),
205
+ dict(type='LoadAnnoatationVQATest',
206
+ base_vqa_path='./data/nuscenes/vqa/val/',
207
+ base_conv_path='./data/nuscenes/conv/val/',
208
+ base_counter_path='./data/nuscenes/eval_cf/',
209
+ load_type=["planning"], # please don't test all the questions in single test, it requires quite long time
210
+ tokenizer=llm_path,
211
+ max_length=2048,),
212
+ dict(
213
+ type='MultiScaleFlipAug3D',
214
+ img_scale=(1333, 800),
215
+ pts_scale_ratio=1,
216
+ flip=False,
217
+ transforms=[
218
+ dict(
219
+ type='PETRFormatBundle3D',
220
+ collect_keys=collect_keys,
221
+ class_names=class_names,
222
+ with_label=False),
223
+ dict(type='Collect3D', keys=['input_ids', 'img'] + collect_keys,
224
+ meta_keys=('sample_idx', 'vlm_labels', 'filename', 'ori_shape', 'img_shape','pad_shape', 'scale_factor', 'flip', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg', 'scene_token'))
225
+ ])
226
+ ]
227
+
228
+ data = dict(
229
+ samples_per_gpu=batch_size,
230
+ workers_per_gpu=2,
231
+ train=dict(
232
+ type=dataset_type,
233
+ data_root=data_root,
234
+ ann_file=data_root + 'nuscenes2d_ego_temporal_infos_train.pkl',
235
+ seq_split_num=1, # streaming video training
236
+ seq_mode=True, # streaming video training
237
+ pipeline=train_pipeline,
238
+ classes=class_names,
239
+ modality=input_modality,
240
+ test_mode=False,
241
+ use_valid_flag=True,
242
+ filter_empty_gt=False,
243
+ box_type_3d='LiDAR'),
244
+ val=dict(
245
+ type=dataset_type,
246
+ eval_mode=['lane', 'det'],
247
+ pipeline=test_pipeline,
248
+ ann_file=data_root + 'nuscenes2d_ego_temporal_infos_val.pkl',
249
+ classes=class_names,
250
+ modality=input_modality),
251
+ test=dict(
252
+ type=dataset_type,
253
+ eval_mode=['lane', 'det'],
254
+ pipeline=test_pipeline,
255
+ ann_file=data_root + 'nuscenes2d_ego_temporal_infos_val.pkl',
256
+ classes=class_names,
257
+ modality=input_modality),
258
+ shuffler_sampler=dict(
259
+ type='InfiniteGroupEachSampleInBatchSampler',
260
+ seq_split_num=2,
261
+ warmup_split_num=10, # lane det and vlm need short term temporal fusion in the early stage of training
262
+ num_iters_to_seq=num_iters_per_epoch,
263
+ ),
264
+ nonshuffler_sampler=dict(type='DistributedSampler')
265
+ )
266
+
267
+
268
+ optimizer = dict(constructor='LearningRateDecayOptimizerConstructor', type='AdamW',
269
+ lr=1e-4, betas=(0.9, 0.999), weight_decay=1e-4,
270
+ paramwise_cfg={'decay_rate': 0.9,
271
+ 'head_decay_rate': 4.0,
272
+ 'lm_head_decay_rate': 0.1,
273
+ 'decay_type': 'vit_wise',
274
+ 'num_layers': 24,
275
+ })
276
+
277
+ optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic', grad_clip=dict(max_norm=35, norm_type=2))
278
+ # learning policy
279
+ lr_config = dict(
280
+ policy='CosineAnnealing',
281
+ warmup='linear',
282
+ warmup_iters=500,
283
+ warmup_ratio=1.0 / 3,
284
+ min_lr_ratio=1e-3,
285
+ )
286
+
287
+ evaluation = dict(interval=num_iters_per_epoch*num_epochs, pipeline=test_pipeline)
288
+
289
+ find_unused_parameters=False #### when use checkpoint, find_unused_parameters must be False
290
+ checkpoint_config = dict(interval=num_iters_per_epoch//2, max_keep_ckpts=3)
291
+ runner = dict(
292
+ type='IterBasedRunner', max_iters=num_epochs * num_iters_per_epoch)
293
+ load_from=None
294
+ resume_from=None
projects/configs/OmniDrive/eva_large_llama7b.py ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../../../mmdetection3d/configs/_base_/datasets/nus-3d.py',
3
+ '../../../mmdetection3d/configs/_base_/default_runtime.py'
4
+ ]
5
+ backbone_norm_cfg = dict(type='LN', requires_grad=True)
6
+ plugin=True
7
+ plugin_dir='projects/mmdet3d_plugin/'
8
+
9
+ # If point cloud range is changed, the models should also change their point
10
+ # cloud range accordingly
11
+ point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
12
+ voxel_size = [0.2, 0.2, 8]
13
+ img_norm_cfg = dict(
14
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
15
+ # For nuScenes we usually do 10-class detection
16
+ class_names = [
17
+ 'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
18
+ 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
19
+ ]
20
+
21
+ num_gpus = 8
22
+ batch_size = 2
23
+ num_iters_per_epoch = 28130 // (num_gpus * batch_size)
24
+ num_epochs = 6
25
+ llm_path = 'ckpts/final/'
26
+
27
+ collect_keys=['lidar2img', 'intrinsics', 'extrinsics','timestamp', 'img_timestamp', 'ego_pose', 'ego_pose_inv', 'command', 'can_bus']
28
+ input_modality = dict(
29
+ use_lidar=False,
30
+ use_camera=True,
31
+ use_radar=False,
32
+ use_map=False,
33
+ use_external=True)
34
+ model = dict(
35
+ type='Petr3D',
36
+ save_path='./results_planning_only/', #save path for vlm models.
37
+ use_grid_mask=True,
38
+ frozen=False,
39
+ use_lora=True,
40
+ tokenizer=llm_path,
41
+ lm_head=llm_path, # set to None if don't use llm head
42
+ img_backbone=dict(
43
+ type='EVAViT',
44
+ img_size=640,
45
+ patch_size=16,
46
+ window_size=16,
47
+ in_chans=3,
48
+ embed_dim=1024,
49
+ depth=24,
50
+ num_heads=16,
51
+ mlp_ratio=4*2/3,
52
+ window_block_indexes = (
53
+ list(range(0, 2)) + list(range(3, 5)) + list(range(6, 8)) + list(range(9, 11)) + list(range(12, 14)) + list(range(15, 17)) + list(range(18, 20)) + list(range(21, 23))
54
+ ),
55
+ qkv_bias=True,
56
+ drop_path_rate=0.3,
57
+ flash_attn=True,
58
+ with_cp=True,
59
+ frozen=False,),
60
+ map_head=dict(
61
+ type='PETRHeadM',
62
+ num_classes=1,
63
+ in_channels=1024,
64
+ out_dims=4096,
65
+ memory_len=600,
66
+ with_mask=True, # map query can't see vlm tokens
67
+ topk_proposals=300,
68
+ num_lane=1800, # 300+1500
69
+ num_lanes_one2one=300,
70
+ k_one2many=5,
71
+ lambda_one2many=1.0,
72
+ num_extra=256,
73
+ n_control=11,
74
+ pc_range=point_cloud_range,
75
+ code_weights = [1.0, 1.0],
76
+ transformer=dict(
77
+ type='PETRTemporalTransformer',
78
+ input_dimension=256,
79
+ output_dimension=256,
80
+ num_layers=6,
81
+ embed_dims=256,
82
+ num_heads=8,
83
+ feedforward_dims=2048,
84
+ dropout=0.1,
85
+ with_cp=True,
86
+ flash_attn=True,),
87
+ train_cfg=dict(
88
+ assigner=dict(
89
+ type='LaneHungarianAssigner',
90
+ cls_cost=dict(type='FocalLossCost', weight=1.5),
91
+ reg_cost=dict(type='LaneL1Cost', weight=0.02),
92
+ iou_cost=dict(type='IoUCost', weight=0.0))), # dummy
93
+ loss_cls=dict(
94
+ type='FocalLoss',
95
+ use_sigmoid=True,
96
+ gamma=2.0,
97
+ alpha=0.25,
98
+ loss_weight=1.5),
99
+ loss_bbox=dict(type='L1Loss', loss_weight=0.02),
100
+ loss_dir=dict(type='PtsDirCosLoss', loss_weight=0.0)), #
101
+ pts_bbox_head=dict(
102
+ type='StreamPETRHead',
103
+ num_classes=10,
104
+ in_channels=1024,
105
+ out_dims=4096,
106
+ num_query=600,
107
+ with_mask=True,
108
+ memory_len=600,
109
+ topk_proposals=300,
110
+ num_propagated=300,
111
+ num_extra=256,
112
+ n_control=11, # align with centerline query defination
113
+ match_with_velo=False,
114
+ scalar=10, ##noise groups
115
+ noise_scale = 1.0,
116
+ dn_weight= 1.0, ##dn loss weight
117
+ split = 0.75, ###positive rate
118
+ code_weights = [2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
119
+ transformer=dict(
120
+ type='PETRTemporalTransformer',
121
+ input_dimension=256,
122
+ output_dimension=256,
123
+ num_layers=6,
124
+ embed_dims=256,
125
+ num_heads=8,
126
+ feedforward_dims=2048,
127
+ dropout=0.1,
128
+ with_cp=True,
129
+ flash_attn=True,
130
+ ),
131
+ bbox_coder=dict(
132
+ type='NMSFreeCoder',
133
+ post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
134
+ pc_range=point_cloud_range,
135
+ max_num=300,
136
+ voxel_size=voxel_size,
137
+ num_classes=10),
138
+ loss_cls=dict(
139
+ type='FocalLoss',
140
+ use_sigmoid=True,
141
+ gamma=2.0,
142
+ alpha=0.25,
143
+ loss_weight=2.0),
144
+ loss_bbox=dict(type='L1Loss', loss_weight=0.25),
145
+ loss_iou=dict(type='GIoULoss', loss_weight=0.0),),
146
+ # model training and testing settings
147
+ train_cfg=dict(pts=dict(
148
+ grid_size=[512, 512, 1],
149
+ voxel_size=voxel_size,
150
+ point_cloud_range=point_cloud_range,
151
+ out_size_factor=4,
152
+ assigner=dict(
153
+ type='HungarianAssigner3D',
154
+ cls_cost=dict(type='FocalLossCost', weight=2.0),
155
+ reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
156
+ iou_cost=dict(type='IoUCost', weight=0.0), # Fake cost. This is just to make it compatible with DETR head.
157
+ pc_range=point_cloud_range),)
158
+ )
159
+ )
160
+
161
+
162
+ dataset_type = 'CustomNuScenesDataset'
163
+ data_root = './data/nuscenes/'
164
+
165
+ file_client_args = dict(backend='disk')
166
+
167
+
168
+ ida_aug_conf = {
169
+ "resize_lim": (0.37, 0.45),
170
+ "final_dim": (320, 640),
171
+ "bot_pct_lim": (0.0, 0.0),
172
+ "rot_lim": (0.0, 0.0),
173
+ "H": 900,
174
+ "W": 1600,
175
+ "rand_flip": False,
176
+ }
177
+
178
+ train_pipeline = [
179
+ dict(type='LoadMultiViewImageFromFiles', to_float32=True),
180
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_bbox=True,
181
+ with_label=True, with_bbox_depth=True),
182
+ dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
183
+ dict(type='ObjectNameFilter', classes=class_names),
184
+ dict(type='ResizeCropFlipRotImage', data_aug_conf = ida_aug_conf, training=True),
185
+ dict(type='ResizeMultiview3D', img_scale=(640, 640), keep_ratio=False, multiscale_mode='value'),
186
+ dict(type='LoadAnnoatationVQA',
187
+ base_vqa_path='./data/nuscenes/vqa/train/',
188
+ base_desc_path='./data/nuscenes/desc/train/',
189
+ base_conv_path='./data/nuscenes/conv/train/',
190
+ base_key_path='./data/nuscenes/keywords/train/',
191
+ tokenizer=llm_path,
192
+ max_length=2048,
193
+ ignore_type=[],
194
+ lane_objs_info="./data/nuscenes/lane_obj_train.pkl"),
195
+ dict(type='NormalizeMultiviewImage', **img_norm_cfg),
196
+ dict(type='PadMultiViewImage', size_divisor=32),
197
+ dict(type='PETRFormatBundle3D', class_names=class_names, collect_keys=collect_keys + ['prev_exists']),
198
+ dict(type='Collect3D', keys=['lane_pts', 'input_ids', 'vlm_labels', 'gt_bboxes_3d', 'gt_labels_3d', 'img', 'gt_bboxes', 'gt_labels', 'centers2d', 'depths', 'prev_exists'] + collect_keys,
199
+ meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape', 'scale_factor', 'flip', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg', 'scene_token', 'gt_bboxes_3d','gt_labels_3d'))
200
+ ]
201
+ test_pipeline = [
202
+ dict(type='LoadMultiViewImageFromFiles', to_float32=True),
203
+ dict(type='ResizeCropFlipRotImage', data_aug_conf = ida_aug_conf, training=False),
204
+ dict(type='ResizeMultiview3D', img_scale=(640, 640), keep_ratio=False, multiscale_mode='value'),
205
+ dict(type='NormalizeMultiviewImage', **img_norm_cfg),
206
+ dict(type='PadMultiViewImage', size_divisor=32),
207
+ dict(type='LoadAnnoatationVQATest',
208
+ base_vqa_path='./data/nuscenes/vqa/val/',
209
+ base_conv_path='./data/nuscenes/conv/val/',
210
+ base_counter_path='./data/nuscenes/eval_cf/',
211
+ load_type=["planning"], # please don't test all the questions in single test, it requires quite long time
212
+ tokenizer=llm_path,
213
+ max_length=2048,),
214
+ dict(
215
+ type='MultiScaleFlipAug3D',
216
+ img_scale=(1333, 800),
217
+ pts_scale_ratio=1,
218
+ flip=False,
219
+ transforms=[
220
+ dict(
221
+ type='PETRFormatBundle3D',
222
+ collect_keys=collect_keys,
223
+ class_names=class_names,
224
+ with_label=False),
225
+ dict(type='Collect3D', keys=['input_ids', 'img'] + collect_keys,
226
+ meta_keys=('sample_idx', 'vlm_labels', 'filename', 'ori_shape', 'img_shape','pad_shape', 'scale_factor', 'flip', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg', 'scene_token'))
227
+ ])
228
+ ]
229
+
230
+ data = dict(
231
+ samples_per_gpu=batch_size,
232
+ workers_per_gpu=2,
233
+ train=dict(
234
+ type=dataset_type,
235
+ data_root=data_root,
236
+ ann_file=data_root + 'nuscenes2d_ego_temporal_infos_train.pkl',
237
+ seq_split_num=1, # streaming video training
238
+ seq_mode=True, # streaming video training
239
+ pipeline=train_pipeline,
240
+ classes=class_names,
241
+ modality=input_modality,
242
+ test_mode=False,
243
+ use_valid_flag=True,
244
+ filter_empty_gt=False,
245
+ box_type_3d='LiDAR'),
246
+ val=dict(
247
+ type=dataset_type,
248
+ eval_mode=['lane', 'det'],
249
+ pipeline=test_pipeline,
250
+ ann_file=data_root + 'nuscenes2d_ego_temporal_infos_val.pkl',
251
+ classes=class_names,
252
+ modality=input_modality),
253
+ test=dict(
254
+ type=dataset_type,
255
+ eval_mode=['lane', 'det'],
256
+ pipeline=test_pipeline,
257
+ ann_file=data_root + 'nuscenes2d_ego_temporal_infos_val.pkl',
258
+ classes=class_names,
259
+ modality=input_modality),
260
+ shuffler_sampler=dict(
261
+ type='InfiniteGroupEachSampleInBatchSampler',
262
+ seq_split_num=2,
263
+ warmup_split_num=10, # lane det and vlm need short term temporal fusion in the early stage of training
264
+ num_iters_to_seq=num_iters_per_epoch,
265
+ ),
266
+ nonshuffler_sampler=dict(type='DistributedSampler')
267
+ )
268
+
269
+
270
+ optimizer = dict(constructor='LearningRateDecayOptimizerConstructor', type='AdamW',
271
+ lr=1e-4, betas=(0.9, 0.999), weight_decay=1e-4,
272
+ paramwise_cfg={'decay_rate': 0.9,
273
+ 'head_decay_rate': 4.0,
274
+ 'lm_head_decay_rate': 0.1,
275
+ 'decay_type': 'vit_wise',
276
+ 'num_layers': 24,
277
+ })
278
+
279
+ optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic', grad_clip=dict(max_norm=35, norm_type=2))
280
+ # learning policy
281
+ lr_config = dict(
282
+ policy='CosineAnnealing',
283
+ warmup='linear',
284
+ warmup_iters=500,
285
+ warmup_ratio=1.0 / 3,
286
+ min_lr_ratio=1e-3,
287
+ )
288
+
289
+ evaluation = dict(interval=num_iters_per_epoch*num_epochs, pipeline=test_pipeline)
290
+
291
+ find_unused_parameters=False #### when use checkpoint, find_unused_parameters must be False
292
+ checkpoint_config = dict(interval=num_iters_per_epoch//2, max_keep_ckpts=3)
293
+ runner = dict(
294
+ type='IterBasedRunner', max_iters=num_epochs * num_iters_per_epoch)
295
+ load_from=None
296
+ resume_from=None
projects/mmdet3d_plugin/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .core.bbox.assigners.hungarian_assigner_3d import HungarianAssigner3D
2
+ from .core.bbox.coders.nms_free_coder import NMSFreeCoder
3
+ from .core.bbox.match_costs import BBox3DL1Cost
4
+ from .core.hook import *
5
+ from .datasets import CustomNuScenesDataset
6
+ from .datasets.pipelines import *
7
+ from .models.losses import *
8
+ from .models.dense_heads import *
9
+ from .models.detectors import *
10
+ from .models.necks import *
11
+ from .models.backbones import *
projects/mmdet3d_plugin/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (656 Bytes). View file
 
projects/mmdet3d_plugin/core/apis/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .train import custom_train_model
2
+ from .mmdet_train import custom_train_detector
3
+ from .test import custom_multi_gpu_test
projects/mmdet3d_plugin/core/apis/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (365 Bytes). View file
 
projects/mmdet3d_plugin/core/apis/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (312 Bytes). View file
 
projects/mmdet3d_plugin/core/apis/__pycache__/mmdet_train.cpython-38.pyc ADDED
Binary file (4.57 kB). View file
 
projects/mmdet3d_plugin/core/apis/__pycache__/mmdet_train.cpython-39.pyc ADDED
Binary file (4.53 kB). View file
 
projects/mmdet3d_plugin/core/apis/__pycache__/test.cpython-38.pyc ADDED
Binary file (4.05 kB). View file
 
projects/mmdet3d_plugin/core/apis/__pycache__/test.cpython-39.pyc ADDED
Binary file (4.01 kB). View file
 
projects/mmdet3d_plugin/core/apis/__pycache__/train.cpython-38.pyc ADDED
Binary file (1.18 kB). View file
 
projects/mmdet3d_plugin/core/apis/__pycache__/train.cpython-39.pyc ADDED
Binary file (1.11 kB). View file
 
projects/mmdet3d_plugin/core/apis/mmdet_train.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---------------------------------------------
2
+ # Copyright (c) OpenMMLab. All rights reserved.
3
+ # ---------------------------------------------
4
+ # Modified by Zhiqi Li
5
+ # ---------------------------------------------
6
+ # ---------------------------------------------
7
+ # Modified by Shihao Wang
8
+ # ---------------------------------------------
9
+ import random
10
+ import warnings
11
+
12
+ import numpy as np
13
+ import torch
14
+ import torch.distributed as dist
15
+ from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
16
+ from mmcv.runner import (HOOKS, DistSamplerSeedHook, EpochBasedRunner,
17
+ Fp16OptimizerHook, OptimizerHook, build_optimizer,
18
+ build_runner, get_dist_info)
19
+ from mmcv.utils import build_from_cfg
20
+
21
+ from mmdet.core import EvalHook
22
+
23
+ from mmdet.datasets import (build_dataset,
24
+ replace_ImageToTensor)
25
+ from mmdet.utils import get_root_logger
26
+ import time
27
+ import os.path as osp
28
+ from projects.mmdet3d_plugin.datasets.builder import build_dataloader
29
+ from projects.mmdet3d_plugin.core.evaluation.eval_hooks import CustomDistEvalHook
30
+ from projects.mmdet3d_plugin.datasets import custom_build_dataset
31
+ def custom_train_detector(model,
32
+ dataset,
33
+ cfg,
34
+ distributed=False,
35
+ validate=False,
36
+ timestamp=None,
37
+ eval_model=None,
38
+ meta=None):
39
+ logger = get_root_logger(cfg.log_level)
40
+
41
+ # prepare data loaders
42
+
43
+ dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
44
+ #assert len(dataset)==1s
45
+ if 'imgs_per_gpu' in cfg.data:
46
+ logger.warning('"imgs_per_gpu" is deprecated in MMDet V2.0. '
47
+ 'Please use "samples_per_gpu" instead')
48
+ if 'samples_per_gpu' in cfg.data:
49
+ logger.warning(
50
+ f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and '
51
+ f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"'
52
+ f'={cfg.data.imgs_per_gpu} is used in this experiments')
53
+ else:
54
+ logger.warning(
55
+ 'Automatically set "samples_per_gpu"="imgs_per_gpu"='
56
+ f'{cfg.data.imgs_per_gpu} in this experiments')
57
+ cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu
58
+
59
+ data_loaders = [
60
+ build_dataloader(
61
+ ds,
62
+ cfg.data.samples_per_gpu,
63
+ cfg.data.workers_per_gpu,
64
+ # cfg.gpus will be ignored if distributed
65
+ len(cfg.gpu_ids),
66
+ dist=distributed,
67
+ seed=cfg.seed,
68
+ shuffler_sampler=cfg.data.shuffler_sampler, # dict(type='DistributedGroupSampler'),
69
+ nonshuffler_sampler=cfg.data.nonshuffler_sampler, # dict(type='DistributedSampler'),
70
+ runner_type=cfg.runner,
71
+ ) for ds in dataset
72
+ ]
73
+
74
+ # put model on gpus
75
+ if distributed:
76
+ find_unused_parameters = cfg.get('find_unused_parameters', False)
77
+ # Sets the `find_unused_parameters` parameter in
78
+ # torch.nn.parallel.DistributedDataParallel
79
+ model = MMDistributedDataParallel(
80
+ model.cuda(),
81
+ device_ids=[torch.cuda.current_device()],
82
+ broadcast_buffers=False,
83
+ find_unused_parameters=find_unused_parameters)
84
+ if eval_model is not None:
85
+ eval_model = MMDistributedDataParallel(
86
+ eval_model.cuda(),
87
+ device_ids=[torch.cuda.current_device()],
88
+ broadcast_buffers=False,
89
+ find_unused_parameters=find_unused_parameters)
90
+ else:
91
+ model = MMDataParallel(
92
+ model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
93
+ if eval_model is not None:
94
+ eval_model = MMDataParallel(
95
+ eval_model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
96
+
97
+
98
+ # build runner
99
+ optimizer = build_optimizer(model, cfg.optimizer)
100
+
101
+ if 'runner' not in cfg:
102
+ cfg.runner = {
103
+ 'type': 'EpochBasedRunner',
104
+ 'max_epochs': cfg.total_epochs
105
+ }
106
+ warnings.warn(
107
+ 'config is now expected to have a `runner` section, '
108
+ 'please set `runner` in your config.', UserWarning)
109
+ else:
110
+ if 'total_epochs' in cfg:
111
+ assert cfg.total_epochs == cfg.runner.max_epochs
112
+ if eval_model is not None:
113
+ runner = build_runner(
114
+ cfg.runner,
115
+ default_args=dict(
116
+ model=model,
117
+ eval_model=eval_model,
118
+ optimizer=optimizer,
119
+ work_dir=cfg.work_dir,
120
+ logger=logger,
121
+ meta=meta))
122
+ else:
123
+ runner = build_runner(
124
+ cfg.runner,
125
+ default_args=dict(
126
+ model=model,
127
+ optimizer=optimizer,
128
+ work_dir=cfg.work_dir,
129
+ logger=logger,
130
+ meta=meta))
131
+
132
+ # an ugly workaround to make .log and .log.json filenames the same
133
+ runner.timestamp = timestamp
134
+
135
+ # fp16 setting
136
+ fp16_cfg = cfg.get('fp16', None)
137
+ if fp16_cfg is not None:
138
+ optimizer_config = Fp16OptimizerHook(
139
+ **cfg.optimizer_config, **fp16_cfg, distributed=distributed)
140
+ elif distributed and 'type' not in cfg.optimizer_config:
141
+ optimizer_config = OptimizerHook(**cfg.optimizer_config)
142
+ else:
143
+ optimizer_config = cfg.optimizer_config
144
+
145
+ # register hooks
146
+ runner.register_training_hooks(cfg.lr_config, optimizer_config,
147
+ cfg.checkpoint_config, cfg.log_config,
148
+ cfg.get('momentum_config', None))
149
+
150
+ # register profiler hook
151
+ #trace_config = dict(type='tb_trace', dir_name='work_dir')
152
+ #profiler_config = dict(on_trace_ready=trace_config)
153
+ #runner.register_profiler_hook(profiler_config)
154
+
155
+ if distributed:
156
+ if isinstance(runner, EpochBasedRunner):
157
+ runner.register_hook(DistSamplerSeedHook())
158
+
159
+ # register eval hooks
160
+ if validate:
161
+ # Support batch_size > 1 in validation
162
+ val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1)
163
+ if val_samples_per_gpu > 1:
164
+ assert False
165
+ # Replace 'ImageToTensor' to 'DefaultFormatBundle'
166
+ cfg.data.val.pipeline = replace_ImageToTensor(
167
+ cfg.data.val.pipeline)
168
+ val_dataset = custom_build_dataset(cfg.data.val, dict(test_mode=True))
169
+
170
+ val_dataloader = build_dataloader(
171
+ val_dataset,
172
+ samples_per_gpu=val_samples_per_gpu,
173
+ workers_per_gpu=cfg.data.workers_per_gpu,
174
+ dist=distributed,
175
+ shuffle=False,
176
+ shuffler_sampler=cfg.data.shuffler_sampler, # dict(type='DistributedGroupSampler'),
177
+ nonshuffler_sampler=cfg.data.nonshuffler_sampler, # dict(type='DistributedSampler'),
178
+ )
179
+ eval_cfg = cfg.get('evaluation', {})
180
+ eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner'
181
+ eval_cfg['jsonfile_prefix'] = osp.join('val', cfg.work_dir, time.ctime().replace(' ','_').replace(':','_'))
182
+ eval_hook = CustomDistEvalHook if distributed else EvalHook
183
+ runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
184
+
185
+ # user-defined hooks
186
+ if cfg.get('custom_hooks', None):
187
+ custom_hooks = cfg.custom_hooks
188
+ assert isinstance(custom_hooks, list), \
189
+ f'custom_hooks expect list type, but got {type(custom_hooks)}'
190
+ for hook_cfg in cfg.custom_hooks:
191
+ assert isinstance(hook_cfg, dict), \
192
+ 'Each item in custom_hooks expects dict type, but got ' \
193
+ f'{type(hook_cfg)}'
194
+ hook_cfg = hook_cfg.copy()
195
+ priority = hook_cfg.pop('priority', 'NORMAL')
196
+ hook = build_from_cfg(hook_cfg, HOOKS)
197
+ runner.register_hook(hook, priority=priority)
198
+
199
+ if cfg.resume_from:
200
+ runner.resume(cfg.resume_from, resume_optimizer=cfg.get('resume_optimizer', True))
201
+ elif cfg.load_from:
202
+ runner.load_checkpoint(cfg.load_from)
203
+ runner.run(data_loaders, cfg.workflow)
204
+
projects/mmdet3d_plugin/core/apis/test.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---------------------------------------------
2
+ # Copyright (c) OpenMMLab. All rights reserved.
3
+ # ---------------------------------------------
4
+ # Modified by Zhiqi Li
5
+ # ---------------------------------------------
6
+ import os.path as osp
7
+ import pickle
8
+ import shutil
9
+ import tempfile
10
+ import time
11
+
12
+ import mmcv
13
+ import torch
14
+ import torch.distributed as dist
15
+ from mmcv.image import tensor2imgs
16
+ from mmcv.runner import get_dist_info
17
+
18
+ from mmdet.core import encode_mask_results
19
+
20
+
21
+ import mmcv
22
+ import numpy as np
23
+ import pycocotools.mask as mask_util
24
+
25
+ def custom_encode_mask_results(mask_results):
26
+ """Encode bitmap mask to RLE code. Semantic Masks only
27
+ Args:
28
+ mask_results (list | tuple[list]): bitmap mask results.
29
+ In mask scoring rcnn, mask_results is a tuple of (segm_results,
30
+ segm_cls_score).
31
+ Returns:
32
+ list | tuple: RLE encoded mask.
33
+ """
34
+ cls_segms = mask_results
35
+ num_classes = len(cls_segms)
36
+ encoded_mask_results = []
37
+ for i in range(len(cls_segms)):
38
+ encoded_mask_results.append(
39
+ mask_util.encode(
40
+ np.array(
41
+ cls_segms[i][:, :, np.newaxis], order='F',
42
+ dtype='uint8'))[0]) # encoded with RLE
43
+ return [encoded_mask_results]
44
+
45
+ def custom_multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
46
+ """Test model with multiple gpus.
47
+ This method tests model with multiple gpus and collects the results
48
+ under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
49
+ it encodes results to gpu tensors and use gpu communication for results
50
+ collection. On cpu mode it saves the results on different gpus to 'tmpdir'
51
+ and collects them by the rank 0 worker.
52
+ Args:
53
+ model (nn.Module): Model to be tested.
54
+ data_loader (nn.Dataloader): Pytorch data loader.
55
+ tmpdir (str): Path of directory to save the temporary results from
56
+ different gpus under cpu mode.
57
+ gpu_collect (bool): Option to use either gpu or cpu to collect results.
58
+ Returns:
59
+ list: The prediction results.
60
+ """
61
+ model.eval()
62
+ bbox_results = []
63
+ mask_results = []
64
+ dataset = data_loader.dataset
65
+ rank, world_size = get_dist_info()
66
+ if rank == 0:
67
+ prog_bar = mmcv.ProgressBar(len(dataset))
68
+ time.sleep(2) # This line can prevent deadlock problem in some cases.
69
+ have_mask = False
70
+ for i, data in enumerate(data_loader):
71
+ with torch.no_grad():
72
+ result = model(return_loss=False, rescale=True, **data)
73
+ # encode mask results
74
+ if isinstance(result, dict):
75
+ if 'bbox_results' in result.keys():
76
+ bbox_result = result['bbox_results']
77
+ batch_size = len(result['bbox_results'])
78
+ bbox_results.extend(bbox_result)
79
+ if 'mask_results' in result.keys() and result['mask_results'] is not None:
80
+ mask_result = custom_encode_mask_results(result['mask_results'])
81
+ mask_results.extend(mask_result)
82
+ have_mask = True
83
+ else:
84
+ batch_size = len(result)
85
+ bbox_results.extend(result)
86
+
87
+ #if isinstance(result[0], tuple):
88
+ # assert False, 'this code is for instance segmentation, which our code will not utilize.'
89
+ # result = [(bbox_results, encode_mask_results(mask_results))
90
+ # for bbox_results, mask_results in result]
91
+ if rank == 0:
92
+
93
+ for _ in range(batch_size * world_size):
94
+ prog_bar.update()
95
+
96
+ # collect results from all ranks
97
+ if gpu_collect:
98
+ bbox_results = collect_results_gpu(bbox_results, len(dataset))
99
+ if have_mask:
100
+ mask_results = collect_results_gpu(mask_results, len(dataset))
101
+ else:
102
+ mask_results = None
103
+ else:
104
+ bbox_results = collect_results_cpu(bbox_results, len(dataset), tmpdir)
105
+ tmpdir = tmpdir+'_mask' if tmpdir is not None else None
106
+ if have_mask:
107
+ mask_results = collect_results_cpu(mask_results, len(dataset), tmpdir)
108
+ else:
109
+ mask_results = None
110
+
111
+ if mask_results is None:
112
+ return bbox_results
113
+ return {'bbox_results': bbox_results, 'mask_results': mask_results}
114
+
115
+
116
+ def collect_results_cpu(result_part, size, tmpdir=None):
117
+ rank, world_size = get_dist_info()
118
+ # create a tmp dir if it is not specified
119
+ if tmpdir is None:
120
+ MAX_LEN = 512
121
+ # 32 is whitespace
122
+ dir_tensor = torch.full((MAX_LEN, ),
123
+ 32,
124
+ dtype=torch.uint8,
125
+ device='cuda')
126
+ if rank == 0:
127
+ mmcv.mkdir_or_exist('.dist_test')
128
+ tmpdir = tempfile.mkdtemp(dir='.dist_test')
129
+ tmpdir = torch.tensor(
130
+ bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
131
+ dir_tensor[:len(tmpdir)] = tmpdir
132
+ dist.broadcast(dir_tensor, 0)
133
+ tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
134
+ else:
135
+ mmcv.mkdir_or_exist(tmpdir)
136
+ # dump the part result to the dir
137
+ mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))
138
+ dist.barrier()
139
+ # collect all parts
140
+ if rank != 0:
141
+ return None
142
+ else:
143
+ # load results of all parts from tmp dir
144
+ part_list = []
145
+ for i in range(world_size):
146
+ part_file = osp.join(tmpdir, f'part_{i}.pkl')
147
+ part_list.append(mmcv.load(part_file))
148
+ # sort the results
149
+ ordered_results = []
150
+ '''
151
+ bacause we change the sample of the evaluation stage to make sure that each gpu will handle continuous sample,
152
+ '''
153
+ #for res in zip(*part_list):
154
+ for res in part_list:
155
+ ordered_results.extend(list(res))
156
+ # the dataloader may pad some samples
157
+ ordered_results = ordered_results[:size]
158
+ # remove tmp dir
159
+ shutil.rmtree(tmpdir)
160
+ return ordered_results
161
+
162
+
163
+ def collect_results_gpu(result_part, size):
164
+ collect_results_cpu(result_part, size)
projects/mmdet3d_plugin/core/apis/train.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---------------------------------------------
2
+ # Copyright (c) OpenMMLab. All rights reserved.
3
+ # ---------------------------------------------
4
+ # Modified by Zhiqi Li
5
+ # ---------------------------------------------
6
+ # ---------------------------------------------
7
+ # Modified by Shihao Wang
8
+ # ---------------------------------------------
9
+
10
+ from .mmdet_train import custom_train_detector
11
+ from mmseg.apis import train_segmentor
12
+ from mmdet.apis import train_detector
13
+
14
+ def custom_train_model(model,
15
+ dataset,
16
+ cfg,
17
+ distributed=False,
18
+ validate=False,
19
+ timestamp=None,
20
+ eval_model=None,
21
+ meta=None):
22
+ """A function wrapper for launching model training according to cfg.
23
+
24
+ Because we need different eval_hook in runner. Should be deprecated in the
25
+ future.
26
+ """
27
+ if cfg.model.type in ['EncoderDecoder3D']:
28
+ assert False
29
+ else:
30
+ custom_train_detector(
31
+ model,
32
+ dataset,
33
+ cfg,
34
+ distributed=distributed,
35
+ validate=validate,
36
+ timestamp=timestamp,
37
+ eval_model=eval_model,
38
+ meta=meta)
39
+
40
+
41
+ def train_model(model,
42
+ dataset,
43
+ cfg,
44
+ distributed=False,
45
+ validate=False,
46
+ timestamp=None,
47
+ meta=None):
48
+ """A function wrapper for launching model training according to cfg.
49
+
50
+ Because we need different eval_hook in runner. Should be deprecated in the
51
+ future.
52
+ """
53
+ if cfg.model.type in ['EncoderDecoder3D']:
54
+ train_segmentor(
55
+ model,
56
+ dataset,
57
+ cfg,
58
+ distributed=distributed,
59
+ validate=validate,
60
+ timestamp=timestamp,
61
+ meta=meta)
62
+ else:
63
+ train_detector(
64
+ model,
65
+ dataset,
66
+ cfg,
67
+ distributed=distributed,
68
+ validate=validate,
69
+ timestamp=timestamp,
70
+ meta=meta)
projects/mmdet3d_plugin/core/bbox/__pycache__/util.cpython-38.pyc ADDED
Binary file (1.39 kB). View file
 
projects/mmdet3d_plugin/core/bbox/__pycache__/util.cpython-39.pyc ADDED
Binary file (1.33 kB). View file
 
projects/mmdet3d_plugin/core/bbox/assigners/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from .hungarian_assigner_3d import HungarianAssigner3D
2
+ from .hungarian_assigner_2d import HungarianAssigner2D
3
+ from .map_assigner import LaneHungarianAssigner
4
+ __all__ = ['HungarianAssigner3D', 'HungarianAssigner2D']
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (437 Bytes). View file
 
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (384 Bytes). View file
 
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_2d.cpython-38.pyc ADDED
Binary file (5.6 kB). View file
 
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_2d.cpython-39.pyc ADDED
Binary file (5.52 kB). View file
 
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-38.pyc ADDED
Binary file (2.4 kB). View file
 
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-39.pyc ADDED
Binary file (2.31 kB). View file
 
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/map_assigner.cpython-38.pyc ADDED
Binary file (1.6 kB). View file
 
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/map_assigner.cpython-39.pyc ADDED
Binary file (1.52 kB). View file
 
projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_2d.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ # ---------------------------------------------
3
+ # Modified by Shihao Wang
4
+ # ---------------------------------------------
5
+ import torch
6
+
7
+ from mmdet.core.bbox.builder import BBOX_ASSIGNERS
8
+ from mmdet.core.bbox.assigners import AssignResult
9
+ from mmdet.core.bbox.assigners import BaseAssigner
10
+ from mmdet.core.bbox.match_costs import build_match_cost
11
+ from mmdet.core import bbox_cxcywh_to_xyxy
12
+
13
+ try:
14
+ from scipy.optimize import linear_sum_assignment
15
+ except ImportError:
16
+ linear_sum_assignment = None
17
+
18
+
19
+ @BBOX_ASSIGNERS.register_module()
20
+ class HungarianAssigner2D(BaseAssigner):
21
+ """Computes one-to-one matching between predictions and ground truth.
22
+
23
+ This class computes an assignment between the targets and the predictions
24
+ based on the costs. The costs are weighted sum of three components:
25
+ classification cost, regression L1 cost and regression iou cost. The
26
+ targets don't include the no_object, so generally there are more
27
+ predictions than targets. After the one-to-one matching, the un-matched
28
+ are treated as backgrounds. Thus each query prediction will be assigned
29
+ with `0` or a positive integer indicating the ground truth index:
30
+
31
+ - 0: negative sample, no assigned gt
32
+ - positive integer: positive sample, index (1-based) of assigned gt
33
+
34
+ Args:
35
+ cls_weight (int | float, optional): The scale factor for classification
36
+ cost. Default 1.0.
37
+ bbox_weight (int | float, optional): The scale factor for regression
38
+ L1 cost. Default 1.0.
39
+ iou_weight (int | float, optional): The scale factor for regression
40
+ iou cost. Default 1.0.
41
+ iou_calculator (dict | optional): The config for the iou calculation.
42
+ Default type `BboxOverlaps2D`.
43
+ iou_mode (str | optional): "iou" (intersection over union), "iof"
44
+ (intersection over foreground), or "giou" (generalized
45
+ intersection over union). Default "giou".
46
+ """
47
+
48
+ def __init__(self,
49
+ cls_cost=dict(type='ClassificationCost', weight=1.),
50
+ reg_cost=dict(type='BBoxL1Cost', weight=1.0),
51
+ iou_cost=dict(type='IoUCost', iou_mode='giou', weight=1.0),
52
+ centers2d_cost=dict(type='BBox3DL1Cost', weight=1.0)):
53
+ self.cls_cost = build_match_cost(cls_cost)
54
+ self.reg_cost = build_match_cost(reg_cost)
55
+ self.iou_cost = build_match_cost(iou_cost)
56
+ self.centers2d_cost = build_match_cost(centers2d_cost)
57
+
58
+ def assign(self,
59
+ bbox_pred,
60
+ cls_pred,
61
+ pred_centers2d,
62
+ gt_bboxes,
63
+ gt_labels,
64
+ centers2d,
65
+ img_meta,
66
+ gt_bboxes_ignore=None,
67
+ eps=1e-7):
68
+ """Computes one-to-one matching based on the weighted costs.
69
+
70
+ This method assign each query prediction to a ground truth or
71
+ background. The `assigned_gt_inds` with -1 means don't care,
72
+ 0 means negative sample, and positive number is the index (1-based)
73
+ of assigned gt.
74
+ The assignment is done in the following steps, the order matters.
75
+
76
+ 1. assign every prediction to -1
77
+ 2. compute the weighted costs
78
+ 3. do Hungarian matching on CPU based on the costs
79
+ 4. assign all to 0 (background) first, then for each matched pair
80
+ between predictions and gts, treat this prediction as foreground
81
+ and assign the corresponding gt index (plus 1) to it.
82
+
83
+ Args:
84
+ bbox_pred (Tensor): Predicted boxes with normalized coordinates
85
+ (cx, cy, w, h), which are all in range [0, 1]. Shape
86
+ [num_query, 4].
87
+ cls_pred (Tensor): Predicted classification logits, shape
88
+ [num_query, num_class].
89
+ gt_bboxes (Tensor): Ground truth boxes with unnormalized
90
+ coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
91
+ gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
92
+ img_meta (dict): Meta information for current image.
93
+ gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
94
+ labelled as `ignored`. Default None.
95
+ eps (int | float, optional): A value added to the denominator for
96
+ numerical stability. Default 1e-7.
97
+
98
+ Returns:
99
+ :obj:`AssignResult`: The assigned result.
100
+ """
101
+ assert gt_bboxes_ignore is None, \
102
+ 'Only case when gt_bboxes_ignore is None is supported.'
103
+ num_gts, num_bboxes = gt_bboxes.size(0), bbox_pred.size(0)
104
+
105
+ # 1. assign -1 by default
106
+ assigned_gt_inds = bbox_pred.new_full((num_bboxes, ),
107
+ -1,
108
+ dtype=torch.long)
109
+ assigned_labels = bbox_pred.new_full((num_bboxes, ),
110
+ -1,
111
+ dtype=torch.long)
112
+ if num_gts == 0 or num_bboxes == 0:
113
+ # No ground truth or boxes, return empty assignment
114
+ if num_gts == 0:
115
+ # No ground truth, assign all to background
116
+ assigned_gt_inds[:] = 0
117
+ return AssignResult(
118
+ num_gts, assigned_gt_inds, None, labels=assigned_labels)
119
+ img_h, img_w, _ = img_meta['pad_shape']
120
+ factor = gt_bboxes.new_tensor([img_w, img_h, img_w,
121
+ img_h]).unsqueeze(0)
122
+
123
+ # 2. compute the weighted costs
124
+ # classification and bboxcost.
125
+ cls_cost = self.cls_cost(cls_pred, gt_labels)
126
+ # regression L1 cost
127
+ normalize_gt_bboxes = gt_bboxes / factor
128
+ reg_cost = self.reg_cost(bbox_pred, normalize_gt_bboxes)
129
+ # regression iou cost, defaultly giou is used in official DETR.
130
+ bboxes = bbox_cxcywh_to_xyxy(bbox_pred) * factor
131
+ iou_cost = self.iou_cost(bboxes, gt_bboxes)
132
+
133
+ # center2d L1 cost
134
+ normalize_centers2d = centers2d / factor[:, 0:2]
135
+ centers2d_cost = self.centers2d_cost(pred_centers2d, normalize_centers2d)
136
+
137
+ # weighted sum of above four costs
138
+ cost = cls_cost + reg_cost + iou_cost + centers2d_cost
139
+ cost = torch.nan_to_num(cost, nan=100.0, posinf=100.0, neginf=-100.0)
140
+ # 3. do Hungarian matching on CPU using linear_sum_assignment
141
+ cost = cost.detach().cpu()
142
+ if linear_sum_assignment is None:
143
+ raise ImportError('Please run "pip install scipy" '
144
+ 'to install scipy first.')
145
+ matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
146
+ matched_row_inds = torch.from_numpy(matched_row_inds).to(
147
+ bbox_pred.device)
148
+ matched_col_inds = torch.from_numpy(matched_col_inds).to(
149
+ bbox_pred.device)
150
+
151
+ # 4. assign backgrounds and foregrounds
152
+ # assign all indices to backgrounds first
153
+ assigned_gt_inds[:] = 0
154
+ # assign foregrounds based on matching results
155
+ assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
156
+ assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
157
+ return AssignResult(
158
+ num_gts, assigned_gt_inds, None, labels=assigned_labels)
projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ------------------------------------------------------------------------
2
+ # Modified from DETR3D (https://github.com/WangYueFt/detr3d)
3
+ # Copyright (c) 2021 Wang, Yue
4
+ # ------------------------------------------------------------------------
5
+ import torch
6
+ from mmdet.core.bbox.builder import BBOX_ASSIGNERS
7
+ from mmdet.core.bbox.assigners import AssignResult
8
+ from mmdet.core.bbox.assigners import BaseAssigner
9
+ from mmdet.core.bbox.match_costs import build_match_cost
10
+ from projects.mmdet3d_plugin.core.bbox.util import normalize_bbox
11
+
12
+ try:
13
+ from scipy.optimize import linear_sum_assignment
14
+ except ImportError:
15
+ linear_sum_assignment = None
16
+
17
+ @BBOX_ASSIGNERS.register_module()
18
+ class HungarianAssigner3D(BaseAssigner):
19
+ def __init__(self,
20
+ cls_cost=dict(type='ClassificationCost', weight=1.),
21
+ reg_cost=dict(type='BBoxL1Cost', weight=1.0),
22
+ iou_cost=dict(type='IoUCost', weight=0.0),
23
+ pc_range=None):
24
+ self.cls_cost = build_match_cost(cls_cost)
25
+ self.reg_cost = build_match_cost(reg_cost)
26
+ self.iou_cost = build_match_cost(iou_cost)
27
+ self.pc_range = pc_range
28
+
29
+ def assign(self,
30
+ bbox_pred,
31
+ cls_pred,
32
+ gt_bboxes,
33
+ gt_labels,
34
+ gt_bboxes_ignore=None,
35
+ code_weights=None,
36
+ with_velo=False,
37
+ eps=1e-7):
38
+ assert gt_bboxes_ignore is None, \
39
+ 'Only case when gt_bboxes_ignore is None is supported.'
40
+ num_gts, num_bboxes = gt_bboxes.size(0), bbox_pred.size(0)
41
+ # 1. assign -1 by default
42
+ assigned_gt_inds = bbox_pred.new_full((num_bboxes, ),
43
+ -1,
44
+ dtype=torch.long)
45
+ assigned_labels = bbox_pred.new_full((num_bboxes, ),
46
+ -1,
47
+ dtype=torch.long)
48
+ if num_gts == 0 or num_bboxes == 0:
49
+ # No ground truth or boxes, return empty assignment
50
+ if num_gts == 0:
51
+ # No ground truth, assign all to background
52
+ assigned_gt_inds[:] = 0
53
+ return AssignResult(
54
+ num_gts, assigned_gt_inds, None, labels=assigned_labels)
55
+ # 2. compute the weighted costs
56
+ # classification and bboxcost.
57
+ cls_cost = self.cls_cost(cls_pred, gt_labels)
58
+ # regression L1 cost
59
+ normalized_gt_bboxes = normalize_bbox(gt_bboxes, self.pc_range)
60
+ if code_weights is not None:
61
+ bbox_pred = bbox_pred * code_weights
62
+ normalized_gt_bboxes = normalized_gt_bboxes * code_weights
63
+
64
+ if with_velo:
65
+ reg_cost = self.reg_cost(bbox_pred, normalized_gt_bboxes)
66
+ else:
67
+ reg_cost = self.reg_cost(bbox_pred[:, :8], normalized_gt_bboxes[:, :8])
68
+
69
+ # weighted sum of above two costs
70
+ cost = cls_cost + reg_cost
71
+
72
+ # 3. do Hungarian matching on CPU using linear_sum_assignment
73
+ cost = cost.detach().cpu()
74
+ if linear_sum_assignment is None:
75
+ raise ImportError('Please run "pip install scipy" '
76
+ 'to install scipy first.')
77
+ cost = torch.nan_to_num(cost, nan=100.0, posinf=100.0, neginf=-100.0)
78
+ matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
79
+ matched_row_inds = torch.from_numpy(matched_row_inds).to(
80
+ bbox_pred.device)
81
+ matched_col_inds = torch.from_numpy(matched_col_inds).to(
82
+ bbox_pred.device)
83
+
84
+ # 4. assign backgrounds and foregrounds
85
+ # assign all indices to backgrounds first
86
+ assigned_gt_inds[:] = 0
87
+ # assign foregrounds based on matching results
88
+ assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
89
+ assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
90
+ return AssignResult(
91
+ num_gts, assigned_gt_inds, None, labels=assigned_labels)
projects/mmdet3d_plugin/core/bbox/assigners/map_assigner.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from mmdet.core.bbox.builder import BBOX_ASSIGNERS
3
+ from mmdet.core.bbox.assigners import HungarianAssigner, AssignResult
4
+ try:
5
+ from scipy.optimize import linear_sum_assignment
6
+ except ImportError:
7
+ linear_sum_assignment = None
8
+
9
+ @BBOX_ASSIGNERS.register_module()
10
+ class LaneHungarianAssigner(HungarianAssigner):
11
+
12
+ def assign(self,
13
+ lane_pred,
14
+ cls_pred,
15
+ gt_lanes,
16
+ gt_labels,
17
+ img_meta,
18
+ gt_lanes_ignore=None,
19
+ eps=1e-7):
20
+ assert gt_lanes_ignore is None, \
21
+ 'Only case when gt_lanes_ignore is None is supported.'
22
+ num_gts, num_lanes = gt_lanes.size(0), lane_pred.size(0)
23
+
24
+ # 1. assign -1 by default
25
+ assigned_gt_inds = lane_pred.new_full((num_lanes, ),
26
+ -1,
27
+ dtype=torch.long)
28
+ assigned_labels = lane_pred.new_full((num_lanes, ),
29
+ -1,
30
+ dtype=torch.long)
31
+ if num_gts == 0 or num_lanes == 0:
32
+ # No ground truth or boxes, return empty assignment
33
+ if num_gts == 0:
34
+ # No ground truth, assign all to background
35
+ assigned_gt_inds[:] = 0
36
+ return AssignResult(
37
+ num_gts, assigned_gt_inds, None, labels=assigned_labels)
38
+
39
+ # 2. compute the weighted costs
40
+ # classification and lanecost.
41
+ cls_cost = self.cls_cost(cls_pred, gt_labels)
42
+ # regression L1 cost
43
+ reg_cost = self.reg_cost(lane_pred, gt_lanes)
44
+ # weighted sum of above three costs
45
+ cost = cls_cost + reg_cost
46
+
47
+ # 3. do Hungarian matching on CPU using linear_sum_assignment
48
+ cost = torch.nan_to_num(cost)
49
+ cost = cost.detach().cpu()
50
+ matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
51
+ matched_row_inds = torch.from_numpy(matched_row_inds).to(
52
+ lane_pred.device)
53
+ matched_col_inds = torch.from_numpy(matched_col_inds).to(
54
+ lane_pred.device)
55
+
56
+ # 4. assign backgrounds and foregrounds
57
+ # assign all indices to backgrounds first
58
+ assigned_gt_inds[:] = 0
59
+ # assign foregrounds based on matching results
60
+ assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
61
+ assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
62
+ return AssignResult(
63
+ num_gts, assigned_gt_inds, None, labels=assigned_labels)
projects/mmdet3d_plugin/core/bbox/coders/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .nms_free_coder import NMSFreeCoder
2
+ __all__ = ['NMSFreeCoder']
projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (290 Bytes). View file
 
projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (237 Bytes). View file
 
projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-38.pyc ADDED
Binary file (3.74 kB). View file
 
projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-39.pyc ADDED
Binary file (3.68 kB). View file
 
projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ from mmdet.core.bbox import BaseBBoxCoder
4
+ from mmdet.core.bbox.builder import BBOX_CODERS
5
+ from projects.mmdet3d_plugin.core.bbox.util import denormalize_bbox
6
+
7
+
8
+ @BBOX_CODERS.register_module()
9
+ class NMSFreeCoder(BaseBBoxCoder):
10
+ """Bbox coder for NMS-free detector.
11
+ Args:
12
+ pc_range (list[float]): Range of point cloud.
13
+ post_center_range (list[float]): Limit of the center.
14
+ Default: None.
15
+ max_num (int): Max number to be kept. Default: 100.
16
+ score_threshold (float): Threshold to filter boxes based on score.
17
+ Default: None.
18
+ code_size (int): Code size of bboxes. Default: 9
19
+ """
20
+
21
+ def __init__(self,
22
+ pc_range,
23
+ voxel_size=None,
24
+ post_center_range=None,
25
+ max_num=100,
26
+ score_threshold=None,
27
+ num_classes=10):
28
+
29
+ self.pc_range = pc_range
30
+ self.voxel_size = voxel_size
31
+ self.post_center_range = post_center_range
32
+ self.max_num = max_num
33
+ self.score_threshold = score_threshold
34
+ self.num_classes = num_classes
35
+
36
+ def encode(self):
37
+ pass
38
+
39
+ def decode_single(self, cls_scores, bbox_preds):
40
+ """Decode bboxes.
41
+ Args:
42
+ cls_scores (Tensor): Outputs from the classification head, \
43
+ shape [num_query, cls_out_channels]. Note \
44
+ cls_out_channels should includes background.
45
+ bbox_preds (Tensor): Outputs from the regression \
46
+ head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
47
+ Shape [num_query, 9].
48
+ Returns:
49
+ list[dict]: Decoded boxes.
50
+ """
51
+ max_num = self.max_num
52
+
53
+ cls_scores = cls_scores.sigmoid()
54
+ scores, indexs = cls_scores.view(-1).topk(max_num)
55
+ labels = indexs % self.num_classes
56
+ bbox_index = torch.div(indexs, self.num_classes, rounding_mode='floor')
57
+ bbox_preds = bbox_preds[bbox_index]
58
+
59
+ final_box_preds = denormalize_bbox(bbox_preds, self.pc_range)
60
+ final_scores = scores
61
+ final_preds = labels
62
+
63
+ # use score threshold
64
+ if self.score_threshold is not None:
65
+ thresh_mask = final_scores >= self.score_threshold
66
+ if self.post_center_range is not None:
67
+ self.post_center_range = torch.tensor(self.post_center_range, device=scores.device)
68
+
69
+ mask = (final_box_preds[..., :3] >=
70
+ self.post_center_range[:3]).all(1)
71
+ mask &= (final_box_preds[..., :3] <=
72
+ self.post_center_range[3:]).all(1)
73
+
74
+ if self.score_threshold:
75
+ mask &= thresh_mask
76
+
77
+ boxes3d = final_box_preds[mask]
78
+ scores = final_scores[mask]
79
+ labels = final_preds[mask]
80
+ predictions_dict = {
81
+ 'bboxes': boxes3d,
82
+ 'scores': scores,
83
+ 'labels': labels
84
+ }
85
+
86
+ else:
87
+ raise NotImplementedError(
88
+ 'Need to reorganize output as a batch, only '
89
+ 'support post_center_range is not None for now!')
90
+ return predictions_dict
91
+
92
+ def decode(self, preds_dicts):
93
+ """Decode bboxes.
94
+ Args:
95
+ all_cls_scores (Tensor): Outputs from the classification head, \
96
+ shape [nb_dec, bs, num_query, cls_out_channels]. Note \
97
+ cls_out_channels should includes background.
98
+ all_bbox_preds (Tensor): Sigmoid outputs from the regression \
99
+ head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
100
+ Shape [nb_dec, bs, num_query, 9].
101
+ Returns:
102
+ list[dict]: Decoded boxes.
103
+ """
104
+ all_cls_scores = preds_dicts['all_cls_scores'][-1]
105
+ all_bbox_preds = preds_dicts['all_bbox_preds'][-1]
106
+
107
+ batch_size = all_cls_scores.size()[0]
108
+ predictions_list = []
109
+ for i in range(batch_size):
110
+ predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i]))
111
+ return predictions_list
projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from mmdet.core.bbox.match_costs import build_match_cost
2
+ from .match_cost import BBox3DL1Cost
3
+
4
+ __all__ = ['build_match_cost', 'BBox3DL1Cost']
projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (371 Bytes). View file
 
projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (318 Bytes). View file
 
projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-38.pyc ADDED
Binary file (1.97 kB). View file
 
projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-39.pyc ADDED
Binary file (1.91 kB). View file