mins
commited on
Commit
Β·
c501468
1
Parent(s):
86755c6
eva_base_tiny
Browse filesThis view is limited to 50 files because it contains too many changes. Β
See raw diff
- iter_21096.pth +3 -0
- {finetune-8b-llava-llama3-evabase640-petrv3 β pretrain_tiny}/config.json +0 -0
- {finetune-8b-llava-llama3-evabase640-petrv3 β pretrain_tiny}/generation_config.json +0 -0
- {finetune-8b-llava-llama3-evabase640-petrv3 β pretrain_tiny}/model.safetensors +0 -0
- {finetune-8b-llava-llama3-evabase640-petrv3 β pretrain_tiny}/special_tokens_map.json +0 -0
- {finetune-8b-llava-llama3-evabase640-petrv3 β pretrain_tiny}/tokenizer.model +0 -0
- {finetune-8b-llava-llama3-evabase640-petrv3 β pretrain_tiny}/tokenizer_config.json +0 -0
- {finetune-8b-llava-llama3-evabase640-petrv3 β pretrain_tiny}/trainer_state.json +0 -0
- {finetune-8b-llava-llama3-evabase640-petrv3 β pretrain_tiny}/training_args.bin +0 -0
- projects/configs/OmniDrive/eva_base_tinyllama.py +294 -0
- projects/configs/OmniDrive/eva_large_llama7b.py +296 -0
- projects/mmdet3d_plugin/__init__.py +11 -0
- projects/mmdet3d_plugin/__pycache__/__init__.cpython-38.pyc +0 -0
- projects/mmdet3d_plugin/core/apis/__init__.py +3 -0
- projects/mmdet3d_plugin/core/apis/__pycache__/__init__.cpython-38.pyc +0 -0
- projects/mmdet3d_plugin/core/apis/__pycache__/__init__.cpython-39.pyc +0 -0
- projects/mmdet3d_plugin/core/apis/__pycache__/mmdet_train.cpython-38.pyc +0 -0
- projects/mmdet3d_plugin/core/apis/__pycache__/mmdet_train.cpython-39.pyc +0 -0
- projects/mmdet3d_plugin/core/apis/__pycache__/test.cpython-38.pyc +0 -0
- projects/mmdet3d_plugin/core/apis/__pycache__/test.cpython-39.pyc +0 -0
- projects/mmdet3d_plugin/core/apis/__pycache__/train.cpython-38.pyc +0 -0
- projects/mmdet3d_plugin/core/apis/__pycache__/train.cpython-39.pyc +0 -0
- projects/mmdet3d_plugin/core/apis/mmdet_train.py +204 -0
- projects/mmdet3d_plugin/core/apis/test.py +164 -0
- projects/mmdet3d_plugin/core/apis/train.py +70 -0
- projects/mmdet3d_plugin/core/bbox/__pycache__/util.cpython-38.pyc +0 -0
- projects/mmdet3d_plugin/core/bbox/__pycache__/util.cpython-39.pyc +0 -0
- projects/mmdet3d_plugin/core/bbox/assigners/__init__.py +4 -0
- projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-38.pyc +0 -0
- projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-39.pyc +0 -0
- projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_2d.cpython-38.pyc +0 -0
- projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_2d.cpython-39.pyc +0 -0
- projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-38.pyc +0 -0
- projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-39.pyc +0 -0
- projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/map_assigner.cpython-38.pyc +0 -0
- projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/map_assigner.cpython-39.pyc +0 -0
- projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_2d.py +158 -0
- projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py +91 -0
- projects/mmdet3d_plugin/core/bbox/assigners/map_assigner.py +63 -0
- projects/mmdet3d_plugin/core/bbox/coders/__init__.py +2 -0
- projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-38.pyc +0 -0
- projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-39.pyc +0 -0
- projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-38.pyc +0 -0
- projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-39.pyc +0 -0
- projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py +111 -0
- projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py +4 -0
- projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-38.pyc +0 -0
- projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-39.pyc +0 -0
- projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-38.pyc +0 -0
- projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-39.pyc +0 -0
iter_21096.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3119e1ca3d54933c48df1409537879079a492895c7c36f4f7ae47c223ceb8de7
|
3 |
+
size 14575027161
|
{finetune-8b-llava-llama3-evabase640-petrv3 β pretrain_tiny}/config.json
RENAMED
File without changes
|
{finetune-8b-llava-llama3-evabase640-petrv3 β pretrain_tiny}/generation_config.json
RENAMED
File without changes
|
{finetune-8b-llava-llama3-evabase640-petrv3 β pretrain_tiny}/model.safetensors
RENAMED
File without changes
|
{finetune-8b-llava-llama3-evabase640-petrv3 β pretrain_tiny}/special_tokens_map.json
RENAMED
File without changes
|
{finetune-8b-llava-llama3-evabase640-petrv3 β pretrain_tiny}/tokenizer.model
RENAMED
File without changes
|
{finetune-8b-llava-llama3-evabase640-petrv3 β pretrain_tiny}/tokenizer_config.json
RENAMED
File without changes
|
{finetune-8b-llava-llama3-evabase640-petrv3 β pretrain_tiny}/trainer_state.json
RENAMED
File without changes
|
{finetune-8b-llava-llama3-evabase640-petrv3 β pretrain_tiny}/training_args.bin
RENAMED
File without changes
|
projects/configs/OmniDrive/eva_base_tinyllama.py
ADDED
@@ -0,0 +1,294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
'../../../mmdetection3d/configs/_base_/datasets/nus-3d.py',
|
3 |
+
'../../../mmdetection3d/configs/_base_/default_runtime.py'
|
4 |
+
]
|
5 |
+
backbone_norm_cfg = dict(type='LN', requires_grad=True)
|
6 |
+
plugin=True
|
7 |
+
plugin_dir='projects/mmdet3d_plugin/'
|
8 |
+
|
9 |
+
# If point cloud range is changed, the models should also change their point
|
10 |
+
# cloud range accordingly
|
11 |
+
point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
|
12 |
+
voxel_size = [0.2, 0.2, 8]
|
13 |
+
img_norm_cfg = dict(
|
14 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
15 |
+
# For nuScenes we usually do 10-class detection
|
16 |
+
class_names = [
|
17 |
+
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
|
18 |
+
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
|
19 |
+
]
|
20 |
+
|
21 |
+
num_gpus = 8
|
22 |
+
batch_size = 2
|
23 |
+
num_iters_per_epoch = 28130 // (num_gpus * batch_size)
|
24 |
+
num_epochs = 12
|
25 |
+
llm_path = 'ckpts/pretrain_tiny'
|
26 |
+
|
27 |
+
collect_keys=['lidar2img', 'intrinsics', 'extrinsics','timestamp', 'img_timestamp', 'ego_pose', 'ego_pose_inv', 'command', 'can_bus']
|
28 |
+
input_modality = dict(
|
29 |
+
use_lidar=False,
|
30 |
+
use_camera=True,
|
31 |
+
use_radar=False,
|
32 |
+
use_map=False,
|
33 |
+
use_external=True)
|
34 |
+
model = dict(
|
35 |
+
type='Petr3D',
|
36 |
+
save_path='./results_planning_tiny/', #save path for vlm models.
|
37 |
+
use_grid_mask=True,
|
38 |
+
frozen=False,
|
39 |
+
use_lora=False,
|
40 |
+
tokenizer=llm_path,
|
41 |
+
lm_head=llm_path, # set to None if don't use llm head
|
42 |
+
img_backbone=dict(
|
43 |
+
type='EVAViT',
|
44 |
+
img_size=640,
|
45 |
+
patch_size=16,
|
46 |
+
window_size=16,
|
47 |
+
in_chans=3,
|
48 |
+
embed_dim=768,
|
49 |
+
depth=12,
|
50 |
+
num_heads=12,
|
51 |
+
mlp_ratio=4*2/3,
|
52 |
+
window_block_indexes=(0, 1, 3, 4, 6, 7, 9, 10),
|
53 |
+
qkv_bias=True,
|
54 |
+
drop_path_rate=0.1,
|
55 |
+
flash_attn=True,
|
56 |
+
with_cp=True,
|
57 |
+
frozen=False),
|
58 |
+
map_head=dict(
|
59 |
+
type='PETRHeadM',
|
60 |
+
num_classes=1,
|
61 |
+
in_channels=768,
|
62 |
+
out_dims=2048,
|
63 |
+
memory_len=600,
|
64 |
+
with_mask=True, # map query can't see vlm tokens
|
65 |
+
topk_proposals=300,
|
66 |
+
num_lane=1800, # 300+1500
|
67 |
+
num_lanes_one2one=300,
|
68 |
+
k_one2many=5,
|
69 |
+
lambda_one2many=1.0,
|
70 |
+
num_extra=256,
|
71 |
+
n_control=11,
|
72 |
+
pc_range=point_cloud_range,
|
73 |
+
code_weights = [1.0, 1.0],
|
74 |
+
transformer=dict(
|
75 |
+
type='PETRTemporalTransformer',
|
76 |
+
input_dimension=256,
|
77 |
+
output_dimension=256,
|
78 |
+
num_layers=6,
|
79 |
+
embed_dims=256,
|
80 |
+
num_heads=8,
|
81 |
+
feedforward_dims=2048,
|
82 |
+
dropout=0.1,
|
83 |
+
with_cp=True,
|
84 |
+
flash_attn=True,),
|
85 |
+
train_cfg=dict(
|
86 |
+
assigner=dict(
|
87 |
+
type='LaneHungarianAssigner',
|
88 |
+
cls_cost=dict(type='FocalLossCost', weight=1.5),
|
89 |
+
reg_cost=dict(type='LaneL1Cost', weight=0.02),
|
90 |
+
iou_cost=dict(type='IoUCost', weight=0.0))), # dummy
|
91 |
+
loss_cls=dict(
|
92 |
+
type='FocalLoss',
|
93 |
+
use_sigmoid=True,
|
94 |
+
gamma=2.0,
|
95 |
+
alpha=0.25,
|
96 |
+
loss_weight=1.5),
|
97 |
+
loss_bbox=dict(type='L1Loss', loss_weight=0.02),
|
98 |
+
loss_dir=dict(type='PtsDirCosLoss', loss_weight=0.0)), #
|
99 |
+
pts_bbox_head=dict(
|
100 |
+
type='StreamPETRHead',
|
101 |
+
num_classes=10,
|
102 |
+
in_channels=768,
|
103 |
+
out_dims=2048,
|
104 |
+
num_query=600,
|
105 |
+
with_mask=True,
|
106 |
+
memory_len=600,
|
107 |
+
topk_proposals=300,
|
108 |
+
num_propagated=300,
|
109 |
+
num_extra=256,
|
110 |
+
n_control=11, # align with centerline query defination
|
111 |
+
match_with_velo=False,
|
112 |
+
scalar=10, ##noise groups
|
113 |
+
noise_scale = 1.0,
|
114 |
+
dn_weight= 1.0, ##dn loss weight
|
115 |
+
split = 0.75, ###positive rate
|
116 |
+
code_weights = [2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
|
117 |
+
transformer=dict(
|
118 |
+
type='PETRTemporalTransformer',
|
119 |
+
input_dimension=256,
|
120 |
+
output_dimension=256,
|
121 |
+
num_layers=6,
|
122 |
+
embed_dims=256,
|
123 |
+
num_heads=8,
|
124 |
+
feedforward_dims=2048,
|
125 |
+
dropout=0.1,
|
126 |
+
with_cp=True,
|
127 |
+
flash_attn=True,
|
128 |
+
),
|
129 |
+
bbox_coder=dict(
|
130 |
+
type='NMSFreeCoder',
|
131 |
+
post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
|
132 |
+
pc_range=point_cloud_range,
|
133 |
+
max_num=300,
|
134 |
+
voxel_size=voxel_size,
|
135 |
+
num_classes=10),
|
136 |
+
loss_cls=dict(
|
137 |
+
type='FocalLoss',
|
138 |
+
use_sigmoid=True,
|
139 |
+
gamma=2.0,
|
140 |
+
alpha=0.25,
|
141 |
+
loss_weight=2.0),
|
142 |
+
loss_bbox=dict(type='L1Loss', loss_weight=0.25),
|
143 |
+
loss_iou=dict(type='GIoULoss', loss_weight=0.0),),
|
144 |
+
# model training and testing settings
|
145 |
+
train_cfg=dict(pts=dict(
|
146 |
+
grid_size=[512, 512, 1],
|
147 |
+
voxel_size=voxel_size,
|
148 |
+
point_cloud_range=point_cloud_range,
|
149 |
+
out_size_factor=4,
|
150 |
+
assigner=dict(
|
151 |
+
type='HungarianAssigner3D',
|
152 |
+
cls_cost=dict(type='FocalLossCost', weight=2.0),
|
153 |
+
reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
|
154 |
+
iou_cost=dict(type='IoUCost', weight=0.0), # Fake cost. This is just to make it compatible with DETR head.
|
155 |
+
pc_range=point_cloud_range),)
|
156 |
+
)
|
157 |
+
)
|
158 |
+
|
159 |
+
|
160 |
+
dataset_type = 'CustomNuScenesDataset'
|
161 |
+
data_root = './data/nuscenes/'
|
162 |
+
|
163 |
+
file_client_args = dict(backend='disk')
|
164 |
+
|
165 |
+
|
166 |
+
ida_aug_conf = {
|
167 |
+
"resize_lim": (0.37, 0.45),
|
168 |
+
"final_dim": (320, 640),
|
169 |
+
"bot_pct_lim": (0.0, 0.0),
|
170 |
+
"rot_lim": (0.0, 0.0),
|
171 |
+
"H": 900,
|
172 |
+
"W": 1600,
|
173 |
+
"rand_flip": False,
|
174 |
+
}
|
175 |
+
|
176 |
+
train_pipeline = [
|
177 |
+
dict(type='LoadMultiViewImageFromFiles', to_float32=True),
|
178 |
+
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_bbox=True,
|
179 |
+
with_label=True, with_bbox_depth=True),
|
180 |
+
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
|
181 |
+
dict(type='ObjectNameFilter', classes=class_names),
|
182 |
+
dict(type='ResizeCropFlipRotImage', data_aug_conf = ida_aug_conf, training=True),
|
183 |
+
dict(type='ResizeMultiview3D', img_scale=(640, 640), keep_ratio=False, multiscale_mode='value'),
|
184 |
+
dict(type='LoadAnnoatationVQA',
|
185 |
+
base_vqa_path='./data/nuscenes/vqa/train/',
|
186 |
+
base_desc_path='./data/nuscenes/desc/train/',
|
187 |
+
base_conv_path='./data/nuscenes/conv/train/',
|
188 |
+
base_key_path='./data/nuscenes/keywords/train/',
|
189 |
+
tokenizer=llm_path,
|
190 |
+
max_length=2048,
|
191 |
+
ignore_type=[],
|
192 |
+
lane_objs_info="./data/nuscenes/lane_obj_train.pkl"),
|
193 |
+
dict(type='NormalizeMultiviewImage', **img_norm_cfg),
|
194 |
+
dict(type='PadMultiViewImage', size_divisor=32),
|
195 |
+
dict(type='PETRFormatBundle3D', class_names=class_names, collect_keys=collect_keys + ['prev_exists']),
|
196 |
+
dict(type='Collect3D', keys=['lane_pts', 'input_ids', 'vlm_labels', 'gt_bboxes_3d', 'gt_labels_3d', 'img', 'gt_bboxes', 'gt_labels', 'centers2d', 'depths', 'prev_exists'] + collect_keys,
|
197 |
+
meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape', 'scale_factor', 'flip', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg', 'scene_token', 'gt_bboxes_3d','gt_labels_3d'))
|
198 |
+
]
|
199 |
+
test_pipeline = [
|
200 |
+
dict(type='LoadMultiViewImageFromFiles', to_float32=True),
|
201 |
+
dict(type='ResizeCropFlipRotImage', data_aug_conf = ida_aug_conf, training=False),
|
202 |
+
dict(type='ResizeMultiview3D', img_scale=(640, 640), keep_ratio=False, multiscale_mode='value'),
|
203 |
+
dict(type='NormalizeMultiviewImage', **img_norm_cfg),
|
204 |
+
dict(type='PadMultiViewImage', size_divisor=32),
|
205 |
+
dict(type='LoadAnnoatationVQATest',
|
206 |
+
base_vqa_path='./data/nuscenes/vqa/val/',
|
207 |
+
base_conv_path='./data/nuscenes/conv/val/',
|
208 |
+
base_counter_path='./data/nuscenes/eval_cf/',
|
209 |
+
load_type=["planning"], # please don't test all the questions in single test, it requires quite long time
|
210 |
+
tokenizer=llm_path,
|
211 |
+
max_length=2048,),
|
212 |
+
dict(
|
213 |
+
type='MultiScaleFlipAug3D',
|
214 |
+
img_scale=(1333, 800),
|
215 |
+
pts_scale_ratio=1,
|
216 |
+
flip=False,
|
217 |
+
transforms=[
|
218 |
+
dict(
|
219 |
+
type='PETRFormatBundle3D',
|
220 |
+
collect_keys=collect_keys,
|
221 |
+
class_names=class_names,
|
222 |
+
with_label=False),
|
223 |
+
dict(type='Collect3D', keys=['input_ids', 'img'] + collect_keys,
|
224 |
+
meta_keys=('sample_idx', 'vlm_labels', 'filename', 'ori_shape', 'img_shape','pad_shape', 'scale_factor', 'flip', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg', 'scene_token'))
|
225 |
+
])
|
226 |
+
]
|
227 |
+
|
228 |
+
data = dict(
|
229 |
+
samples_per_gpu=batch_size,
|
230 |
+
workers_per_gpu=2,
|
231 |
+
train=dict(
|
232 |
+
type=dataset_type,
|
233 |
+
data_root=data_root,
|
234 |
+
ann_file=data_root + 'nuscenes2d_ego_temporal_infos_train.pkl',
|
235 |
+
seq_split_num=1, # streaming video training
|
236 |
+
seq_mode=True, # streaming video training
|
237 |
+
pipeline=train_pipeline,
|
238 |
+
classes=class_names,
|
239 |
+
modality=input_modality,
|
240 |
+
test_mode=False,
|
241 |
+
use_valid_flag=True,
|
242 |
+
filter_empty_gt=False,
|
243 |
+
box_type_3d='LiDAR'),
|
244 |
+
val=dict(
|
245 |
+
type=dataset_type,
|
246 |
+
eval_mode=['lane', 'det'],
|
247 |
+
pipeline=test_pipeline,
|
248 |
+
ann_file=data_root + 'nuscenes2d_ego_temporal_infos_val.pkl',
|
249 |
+
classes=class_names,
|
250 |
+
modality=input_modality),
|
251 |
+
test=dict(
|
252 |
+
type=dataset_type,
|
253 |
+
eval_mode=['lane', 'det'],
|
254 |
+
pipeline=test_pipeline,
|
255 |
+
ann_file=data_root + 'nuscenes2d_ego_temporal_infos_val.pkl',
|
256 |
+
classes=class_names,
|
257 |
+
modality=input_modality),
|
258 |
+
shuffler_sampler=dict(
|
259 |
+
type='InfiniteGroupEachSampleInBatchSampler',
|
260 |
+
seq_split_num=2,
|
261 |
+
warmup_split_num=10, # lane det and vlm need short term temporal fusion in the early stage of training
|
262 |
+
num_iters_to_seq=num_iters_per_epoch,
|
263 |
+
),
|
264 |
+
nonshuffler_sampler=dict(type='DistributedSampler')
|
265 |
+
)
|
266 |
+
|
267 |
+
|
268 |
+
optimizer = dict(constructor='LearningRateDecayOptimizerConstructor', type='AdamW',
|
269 |
+
lr=1e-4, betas=(0.9, 0.999), weight_decay=1e-4,
|
270 |
+
paramwise_cfg={'decay_rate': 0.9,
|
271 |
+
'head_decay_rate': 4.0,
|
272 |
+
'lm_head_decay_rate': 0.1,
|
273 |
+
'decay_type': 'vit_wise',
|
274 |
+
'num_layers': 24,
|
275 |
+
})
|
276 |
+
|
277 |
+
optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic', grad_clip=dict(max_norm=35, norm_type=2))
|
278 |
+
# learning policy
|
279 |
+
lr_config = dict(
|
280 |
+
policy='CosineAnnealing',
|
281 |
+
warmup='linear',
|
282 |
+
warmup_iters=500,
|
283 |
+
warmup_ratio=1.0 / 3,
|
284 |
+
min_lr_ratio=1e-3,
|
285 |
+
)
|
286 |
+
|
287 |
+
evaluation = dict(interval=num_iters_per_epoch*num_epochs, pipeline=test_pipeline)
|
288 |
+
|
289 |
+
find_unused_parameters=False #### when use checkpoint, find_unused_parameters must be False
|
290 |
+
checkpoint_config = dict(interval=num_iters_per_epoch//2, max_keep_ckpts=3)
|
291 |
+
runner = dict(
|
292 |
+
type='IterBasedRunner', max_iters=num_epochs * num_iters_per_epoch)
|
293 |
+
load_from=None
|
294 |
+
resume_from=None
|
projects/configs/OmniDrive/eva_large_llama7b.py
ADDED
@@ -0,0 +1,296 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
'../../../mmdetection3d/configs/_base_/datasets/nus-3d.py',
|
3 |
+
'../../../mmdetection3d/configs/_base_/default_runtime.py'
|
4 |
+
]
|
5 |
+
backbone_norm_cfg = dict(type='LN', requires_grad=True)
|
6 |
+
plugin=True
|
7 |
+
plugin_dir='projects/mmdet3d_plugin/'
|
8 |
+
|
9 |
+
# If point cloud range is changed, the models should also change their point
|
10 |
+
# cloud range accordingly
|
11 |
+
point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
|
12 |
+
voxel_size = [0.2, 0.2, 8]
|
13 |
+
img_norm_cfg = dict(
|
14 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
15 |
+
# For nuScenes we usually do 10-class detection
|
16 |
+
class_names = [
|
17 |
+
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
|
18 |
+
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
|
19 |
+
]
|
20 |
+
|
21 |
+
num_gpus = 8
|
22 |
+
batch_size = 2
|
23 |
+
num_iters_per_epoch = 28130 // (num_gpus * batch_size)
|
24 |
+
num_epochs = 6
|
25 |
+
llm_path = 'ckpts/final/'
|
26 |
+
|
27 |
+
collect_keys=['lidar2img', 'intrinsics', 'extrinsics','timestamp', 'img_timestamp', 'ego_pose', 'ego_pose_inv', 'command', 'can_bus']
|
28 |
+
input_modality = dict(
|
29 |
+
use_lidar=False,
|
30 |
+
use_camera=True,
|
31 |
+
use_radar=False,
|
32 |
+
use_map=False,
|
33 |
+
use_external=True)
|
34 |
+
model = dict(
|
35 |
+
type='Petr3D',
|
36 |
+
save_path='./results_planning_only/', #save path for vlm models.
|
37 |
+
use_grid_mask=True,
|
38 |
+
frozen=False,
|
39 |
+
use_lora=True,
|
40 |
+
tokenizer=llm_path,
|
41 |
+
lm_head=llm_path, # set to None if don't use llm head
|
42 |
+
img_backbone=dict(
|
43 |
+
type='EVAViT',
|
44 |
+
img_size=640,
|
45 |
+
patch_size=16,
|
46 |
+
window_size=16,
|
47 |
+
in_chans=3,
|
48 |
+
embed_dim=1024,
|
49 |
+
depth=24,
|
50 |
+
num_heads=16,
|
51 |
+
mlp_ratio=4*2/3,
|
52 |
+
window_block_indexes = (
|
53 |
+
list(range(0, 2)) + list(range(3, 5)) + list(range(6, 8)) + list(range(9, 11)) + list(range(12, 14)) + list(range(15, 17)) + list(range(18, 20)) + list(range(21, 23))
|
54 |
+
),
|
55 |
+
qkv_bias=True,
|
56 |
+
drop_path_rate=0.3,
|
57 |
+
flash_attn=True,
|
58 |
+
with_cp=True,
|
59 |
+
frozen=False,),
|
60 |
+
map_head=dict(
|
61 |
+
type='PETRHeadM',
|
62 |
+
num_classes=1,
|
63 |
+
in_channels=1024,
|
64 |
+
out_dims=4096,
|
65 |
+
memory_len=600,
|
66 |
+
with_mask=True, # map query can't see vlm tokens
|
67 |
+
topk_proposals=300,
|
68 |
+
num_lane=1800, # 300+1500
|
69 |
+
num_lanes_one2one=300,
|
70 |
+
k_one2many=5,
|
71 |
+
lambda_one2many=1.0,
|
72 |
+
num_extra=256,
|
73 |
+
n_control=11,
|
74 |
+
pc_range=point_cloud_range,
|
75 |
+
code_weights = [1.0, 1.0],
|
76 |
+
transformer=dict(
|
77 |
+
type='PETRTemporalTransformer',
|
78 |
+
input_dimension=256,
|
79 |
+
output_dimension=256,
|
80 |
+
num_layers=6,
|
81 |
+
embed_dims=256,
|
82 |
+
num_heads=8,
|
83 |
+
feedforward_dims=2048,
|
84 |
+
dropout=0.1,
|
85 |
+
with_cp=True,
|
86 |
+
flash_attn=True,),
|
87 |
+
train_cfg=dict(
|
88 |
+
assigner=dict(
|
89 |
+
type='LaneHungarianAssigner',
|
90 |
+
cls_cost=dict(type='FocalLossCost', weight=1.5),
|
91 |
+
reg_cost=dict(type='LaneL1Cost', weight=0.02),
|
92 |
+
iou_cost=dict(type='IoUCost', weight=0.0))), # dummy
|
93 |
+
loss_cls=dict(
|
94 |
+
type='FocalLoss',
|
95 |
+
use_sigmoid=True,
|
96 |
+
gamma=2.0,
|
97 |
+
alpha=0.25,
|
98 |
+
loss_weight=1.5),
|
99 |
+
loss_bbox=dict(type='L1Loss', loss_weight=0.02),
|
100 |
+
loss_dir=dict(type='PtsDirCosLoss', loss_weight=0.0)), #
|
101 |
+
pts_bbox_head=dict(
|
102 |
+
type='StreamPETRHead',
|
103 |
+
num_classes=10,
|
104 |
+
in_channels=1024,
|
105 |
+
out_dims=4096,
|
106 |
+
num_query=600,
|
107 |
+
with_mask=True,
|
108 |
+
memory_len=600,
|
109 |
+
topk_proposals=300,
|
110 |
+
num_propagated=300,
|
111 |
+
num_extra=256,
|
112 |
+
n_control=11, # align with centerline query defination
|
113 |
+
match_with_velo=False,
|
114 |
+
scalar=10, ##noise groups
|
115 |
+
noise_scale = 1.0,
|
116 |
+
dn_weight= 1.0, ##dn loss weight
|
117 |
+
split = 0.75, ###positive rate
|
118 |
+
code_weights = [2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
|
119 |
+
transformer=dict(
|
120 |
+
type='PETRTemporalTransformer',
|
121 |
+
input_dimension=256,
|
122 |
+
output_dimension=256,
|
123 |
+
num_layers=6,
|
124 |
+
embed_dims=256,
|
125 |
+
num_heads=8,
|
126 |
+
feedforward_dims=2048,
|
127 |
+
dropout=0.1,
|
128 |
+
with_cp=True,
|
129 |
+
flash_attn=True,
|
130 |
+
),
|
131 |
+
bbox_coder=dict(
|
132 |
+
type='NMSFreeCoder',
|
133 |
+
post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
|
134 |
+
pc_range=point_cloud_range,
|
135 |
+
max_num=300,
|
136 |
+
voxel_size=voxel_size,
|
137 |
+
num_classes=10),
|
138 |
+
loss_cls=dict(
|
139 |
+
type='FocalLoss',
|
140 |
+
use_sigmoid=True,
|
141 |
+
gamma=2.0,
|
142 |
+
alpha=0.25,
|
143 |
+
loss_weight=2.0),
|
144 |
+
loss_bbox=dict(type='L1Loss', loss_weight=0.25),
|
145 |
+
loss_iou=dict(type='GIoULoss', loss_weight=0.0),),
|
146 |
+
# model training and testing settings
|
147 |
+
train_cfg=dict(pts=dict(
|
148 |
+
grid_size=[512, 512, 1],
|
149 |
+
voxel_size=voxel_size,
|
150 |
+
point_cloud_range=point_cloud_range,
|
151 |
+
out_size_factor=4,
|
152 |
+
assigner=dict(
|
153 |
+
type='HungarianAssigner3D',
|
154 |
+
cls_cost=dict(type='FocalLossCost', weight=2.0),
|
155 |
+
reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
|
156 |
+
iou_cost=dict(type='IoUCost', weight=0.0), # Fake cost. This is just to make it compatible with DETR head.
|
157 |
+
pc_range=point_cloud_range),)
|
158 |
+
)
|
159 |
+
)
|
160 |
+
|
161 |
+
|
162 |
+
dataset_type = 'CustomNuScenesDataset'
|
163 |
+
data_root = './data/nuscenes/'
|
164 |
+
|
165 |
+
file_client_args = dict(backend='disk')
|
166 |
+
|
167 |
+
|
168 |
+
ida_aug_conf = {
|
169 |
+
"resize_lim": (0.37, 0.45),
|
170 |
+
"final_dim": (320, 640),
|
171 |
+
"bot_pct_lim": (0.0, 0.0),
|
172 |
+
"rot_lim": (0.0, 0.0),
|
173 |
+
"H": 900,
|
174 |
+
"W": 1600,
|
175 |
+
"rand_flip": False,
|
176 |
+
}
|
177 |
+
|
178 |
+
train_pipeline = [
|
179 |
+
dict(type='LoadMultiViewImageFromFiles', to_float32=True),
|
180 |
+
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_bbox=True,
|
181 |
+
with_label=True, with_bbox_depth=True),
|
182 |
+
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
|
183 |
+
dict(type='ObjectNameFilter', classes=class_names),
|
184 |
+
dict(type='ResizeCropFlipRotImage', data_aug_conf = ida_aug_conf, training=True),
|
185 |
+
dict(type='ResizeMultiview3D', img_scale=(640, 640), keep_ratio=False, multiscale_mode='value'),
|
186 |
+
dict(type='LoadAnnoatationVQA',
|
187 |
+
base_vqa_path='./data/nuscenes/vqa/train/',
|
188 |
+
base_desc_path='./data/nuscenes/desc/train/',
|
189 |
+
base_conv_path='./data/nuscenes/conv/train/',
|
190 |
+
base_key_path='./data/nuscenes/keywords/train/',
|
191 |
+
tokenizer=llm_path,
|
192 |
+
max_length=2048,
|
193 |
+
ignore_type=[],
|
194 |
+
lane_objs_info="./data/nuscenes/lane_obj_train.pkl"),
|
195 |
+
dict(type='NormalizeMultiviewImage', **img_norm_cfg),
|
196 |
+
dict(type='PadMultiViewImage', size_divisor=32),
|
197 |
+
dict(type='PETRFormatBundle3D', class_names=class_names, collect_keys=collect_keys + ['prev_exists']),
|
198 |
+
dict(type='Collect3D', keys=['lane_pts', 'input_ids', 'vlm_labels', 'gt_bboxes_3d', 'gt_labels_3d', 'img', 'gt_bboxes', 'gt_labels', 'centers2d', 'depths', 'prev_exists'] + collect_keys,
|
199 |
+
meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape', 'scale_factor', 'flip', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg', 'scene_token', 'gt_bboxes_3d','gt_labels_3d'))
|
200 |
+
]
|
201 |
+
test_pipeline = [
|
202 |
+
dict(type='LoadMultiViewImageFromFiles', to_float32=True),
|
203 |
+
dict(type='ResizeCropFlipRotImage', data_aug_conf = ida_aug_conf, training=False),
|
204 |
+
dict(type='ResizeMultiview3D', img_scale=(640, 640), keep_ratio=False, multiscale_mode='value'),
|
205 |
+
dict(type='NormalizeMultiviewImage', **img_norm_cfg),
|
206 |
+
dict(type='PadMultiViewImage', size_divisor=32),
|
207 |
+
dict(type='LoadAnnoatationVQATest',
|
208 |
+
base_vqa_path='./data/nuscenes/vqa/val/',
|
209 |
+
base_conv_path='./data/nuscenes/conv/val/',
|
210 |
+
base_counter_path='./data/nuscenes/eval_cf/',
|
211 |
+
load_type=["planning"], # please don't test all the questions in single test, it requires quite long time
|
212 |
+
tokenizer=llm_path,
|
213 |
+
max_length=2048,),
|
214 |
+
dict(
|
215 |
+
type='MultiScaleFlipAug3D',
|
216 |
+
img_scale=(1333, 800),
|
217 |
+
pts_scale_ratio=1,
|
218 |
+
flip=False,
|
219 |
+
transforms=[
|
220 |
+
dict(
|
221 |
+
type='PETRFormatBundle3D',
|
222 |
+
collect_keys=collect_keys,
|
223 |
+
class_names=class_names,
|
224 |
+
with_label=False),
|
225 |
+
dict(type='Collect3D', keys=['input_ids', 'img'] + collect_keys,
|
226 |
+
meta_keys=('sample_idx', 'vlm_labels', 'filename', 'ori_shape', 'img_shape','pad_shape', 'scale_factor', 'flip', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg', 'scene_token'))
|
227 |
+
])
|
228 |
+
]
|
229 |
+
|
230 |
+
data = dict(
|
231 |
+
samples_per_gpu=batch_size,
|
232 |
+
workers_per_gpu=2,
|
233 |
+
train=dict(
|
234 |
+
type=dataset_type,
|
235 |
+
data_root=data_root,
|
236 |
+
ann_file=data_root + 'nuscenes2d_ego_temporal_infos_train.pkl',
|
237 |
+
seq_split_num=1, # streaming video training
|
238 |
+
seq_mode=True, # streaming video training
|
239 |
+
pipeline=train_pipeline,
|
240 |
+
classes=class_names,
|
241 |
+
modality=input_modality,
|
242 |
+
test_mode=False,
|
243 |
+
use_valid_flag=True,
|
244 |
+
filter_empty_gt=False,
|
245 |
+
box_type_3d='LiDAR'),
|
246 |
+
val=dict(
|
247 |
+
type=dataset_type,
|
248 |
+
eval_mode=['lane', 'det'],
|
249 |
+
pipeline=test_pipeline,
|
250 |
+
ann_file=data_root + 'nuscenes2d_ego_temporal_infos_val.pkl',
|
251 |
+
classes=class_names,
|
252 |
+
modality=input_modality),
|
253 |
+
test=dict(
|
254 |
+
type=dataset_type,
|
255 |
+
eval_mode=['lane', 'det'],
|
256 |
+
pipeline=test_pipeline,
|
257 |
+
ann_file=data_root + 'nuscenes2d_ego_temporal_infos_val.pkl',
|
258 |
+
classes=class_names,
|
259 |
+
modality=input_modality),
|
260 |
+
shuffler_sampler=dict(
|
261 |
+
type='InfiniteGroupEachSampleInBatchSampler',
|
262 |
+
seq_split_num=2,
|
263 |
+
warmup_split_num=10, # lane det and vlm need short term temporal fusion in the early stage of training
|
264 |
+
num_iters_to_seq=num_iters_per_epoch,
|
265 |
+
),
|
266 |
+
nonshuffler_sampler=dict(type='DistributedSampler')
|
267 |
+
)
|
268 |
+
|
269 |
+
|
270 |
+
optimizer = dict(constructor='LearningRateDecayOptimizerConstructor', type='AdamW',
|
271 |
+
lr=1e-4, betas=(0.9, 0.999), weight_decay=1e-4,
|
272 |
+
paramwise_cfg={'decay_rate': 0.9,
|
273 |
+
'head_decay_rate': 4.0,
|
274 |
+
'lm_head_decay_rate': 0.1,
|
275 |
+
'decay_type': 'vit_wise',
|
276 |
+
'num_layers': 24,
|
277 |
+
})
|
278 |
+
|
279 |
+
optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic', grad_clip=dict(max_norm=35, norm_type=2))
|
280 |
+
# learning policy
|
281 |
+
lr_config = dict(
|
282 |
+
policy='CosineAnnealing',
|
283 |
+
warmup='linear',
|
284 |
+
warmup_iters=500,
|
285 |
+
warmup_ratio=1.0 / 3,
|
286 |
+
min_lr_ratio=1e-3,
|
287 |
+
)
|
288 |
+
|
289 |
+
evaluation = dict(interval=num_iters_per_epoch*num_epochs, pipeline=test_pipeline)
|
290 |
+
|
291 |
+
find_unused_parameters=False #### when use checkpoint, find_unused_parameters must be False
|
292 |
+
checkpoint_config = dict(interval=num_iters_per_epoch//2, max_keep_ckpts=3)
|
293 |
+
runner = dict(
|
294 |
+
type='IterBasedRunner', max_iters=num_epochs * num_iters_per_epoch)
|
295 |
+
load_from=None
|
296 |
+
resume_from=None
|
projects/mmdet3d_plugin/__init__.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .core.bbox.assigners.hungarian_assigner_3d import HungarianAssigner3D
|
2 |
+
from .core.bbox.coders.nms_free_coder import NMSFreeCoder
|
3 |
+
from .core.bbox.match_costs import BBox3DL1Cost
|
4 |
+
from .core.hook import *
|
5 |
+
from .datasets import CustomNuScenesDataset
|
6 |
+
from .datasets.pipelines import *
|
7 |
+
from .models.losses import *
|
8 |
+
from .models.dense_heads import *
|
9 |
+
from .models.detectors import *
|
10 |
+
from .models.necks import *
|
11 |
+
from .models.backbones import *
|
projects/mmdet3d_plugin/__pycache__/__init__.cpython-38.pyc
ADDED
Binary file (656 Bytes). View file
|
|
projects/mmdet3d_plugin/core/apis/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .train import custom_train_model
|
2 |
+
from .mmdet_train import custom_train_detector
|
3 |
+
from .test import custom_multi_gpu_test
|
projects/mmdet3d_plugin/core/apis/__pycache__/__init__.cpython-38.pyc
ADDED
Binary file (365 Bytes). View file
|
|
projects/mmdet3d_plugin/core/apis/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (312 Bytes). View file
|
|
projects/mmdet3d_plugin/core/apis/__pycache__/mmdet_train.cpython-38.pyc
ADDED
Binary file (4.57 kB). View file
|
|
projects/mmdet3d_plugin/core/apis/__pycache__/mmdet_train.cpython-39.pyc
ADDED
Binary file (4.53 kB). View file
|
|
projects/mmdet3d_plugin/core/apis/__pycache__/test.cpython-38.pyc
ADDED
Binary file (4.05 kB). View file
|
|
projects/mmdet3d_plugin/core/apis/__pycache__/test.cpython-39.pyc
ADDED
Binary file (4.01 kB). View file
|
|
projects/mmdet3d_plugin/core/apis/__pycache__/train.cpython-38.pyc
ADDED
Binary file (1.18 kB). View file
|
|
projects/mmdet3d_plugin/core/apis/__pycache__/train.cpython-39.pyc
ADDED
Binary file (1.11 kB). View file
|
|
projects/mmdet3d_plugin/core/apis/mmdet_train.py
ADDED
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ---------------------------------------------
|
2 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
3 |
+
# ---------------------------------------------
|
4 |
+
# Modified by Zhiqi Li
|
5 |
+
# ---------------------------------------------
|
6 |
+
# ---------------------------------------------
|
7 |
+
# Modified by Shihao Wang
|
8 |
+
# ---------------------------------------------
|
9 |
+
import random
|
10 |
+
import warnings
|
11 |
+
|
12 |
+
import numpy as np
|
13 |
+
import torch
|
14 |
+
import torch.distributed as dist
|
15 |
+
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
|
16 |
+
from mmcv.runner import (HOOKS, DistSamplerSeedHook, EpochBasedRunner,
|
17 |
+
Fp16OptimizerHook, OptimizerHook, build_optimizer,
|
18 |
+
build_runner, get_dist_info)
|
19 |
+
from mmcv.utils import build_from_cfg
|
20 |
+
|
21 |
+
from mmdet.core import EvalHook
|
22 |
+
|
23 |
+
from mmdet.datasets import (build_dataset,
|
24 |
+
replace_ImageToTensor)
|
25 |
+
from mmdet.utils import get_root_logger
|
26 |
+
import time
|
27 |
+
import os.path as osp
|
28 |
+
from projects.mmdet3d_plugin.datasets.builder import build_dataloader
|
29 |
+
from projects.mmdet3d_plugin.core.evaluation.eval_hooks import CustomDistEvalHook
|
30 |
+
from projects.mmdet3d_plugin.datasets import custom_build_dataset
|
31 |
+
def custom_train_detector(model,
|
32 |
+
dataset,
|
33 |
+
cfg,
|
34 |
+
distributed=False,
|
35 |
+
validate=False,
|
36 |
+
timestamp=None,
|
37 |
+
eval_model=None,
|
38 |
+
meta=None):
|
39 |
+
logger = get_root_logger(cfg.log_level)
|
40 |
+
|
41 |
+
# prepare data loaders
|
42 |
+
|
43 |
+
dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
|
44 |
+
#assert len(dataset)==1s
|
45 |
+
if 'imgs_per_gpu' in cfg.data:
|
46 |
+
logger.warning('"imgs_per_gpu" is deprecated in MMDet V2.0. '
|
47 |
+
'Please use "samples_per_gpu" instead')
|
48 |
+
if 'samples_per_gpu' in cfg.data:
|
49 |
+
logger.warning(
|
50 |
+
f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and '
|
51 |
+
f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"'
|
52 |
+
f'={cfg.data.imgs_per_gpu} is used in this experiments')
|
53 |
+
else:
|
54 |
+
logger.warning(
|
55 |
+
'Automatically set "samples_per_gpu"="imgs_per_gpu"='
|
56 |
+
f'{cfg.data.imgs_per_gpu} in this experiments')
|
57 |
+
cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu
|
58 |
+
|
59 |
+
data_loaders = [
|
60 |
+
build_dataloader(
|
61 |
+
ds,
|
62 |
+
cfg.data.samples_per_gpu,
|
63 |
+
cfg.data.workers_per_gpu,
|
64 |
+
# cfg.gpus will be ignored if distributed
|
65 |
+
len(cfg.gpu_ids),
|
66 |
+
dist=distributed,
|
67 |
+
seed=cfg.seed,
|
68 |
+
shuffler_sampler=cfg.data.shuffler_sampler, # dict(type='DistributedGroupSampler'),
|
69 |
+
nonshuffler_sampler=cfg.data.nonshuffler_sampler, # dict(type='DistributedSampler'),
|
70 |
+
runner_type=cfg.runner,
|
71 |
+
) for ds in dataset
|
72 |
+
]
|
73 |
+
|
74 |
+
# put model on gpus
|
75 |
+
if distributed:
|
76 |
+
find_unused_parameters = cfg.get('find_unused_parameters', False)
|
77 |
+
# Sets the `find_unused_parameters` parameter in
|
78 |
+
# torch.nn.parallel.DistributedDataParallel
|
79 |
+
model = MMDistributedDataParallel(
|
80 |
+
model.cuda(),
|
81 |
+
device_ids=[torch.cuda.current_device()],
|
82 |
+
broadcast_buffers=False,
|
83 |
+
find_unused_parameters=find_unused_parameters)
|
84 |
+
if eval_model is not None:
|
85 |
+
eval_model = MMDistributedDataParallel(
|
86 |
+
eval_model.cuda(),
|
87 |
+
device_ids=[torch.cuda.current_device()],
|
88 |
+
broadcast_buffers=False,
|
89 |
+
find_unused_parameters=find_unused_parameters)
|
90 |
+
else:
|
91 |
+
model = MMDataParallel(
|
92 |
+
model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
|
93 |
+
if eval_model is not None:
|
94 |
+
eval_model = MMDataParallel(
|
95 |
+
eval_model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
|
96 |
+
|
97 |
+
|
98 |
+
# build runner
|
99 |
+
optimizer = build_optimizer(model, cfg.optimizer)
|
100 |
+
|
101 |
+
if 'runner' not in cfg:
|
102 |
+
cfg.runner = {
|
103 |
+
'type': 'EpochBasedRunner',
|
104 |
+
'max_epochs': cfg.total_epochs
|
105 |
+
}
|
106 |
+
warnings.warn(
|
107 |
+
'config is now expected to have a `runner` section, '
|
108 |
+
'please set `runner` in your config.', UserWarning)
|
109 |
+
else:
|
110 |
+
if 'total_epochs' in cfg:
|
111 |
+
assert cfg.total_epochs == cfg.runner.max_epochs
|
112 |
+
if eval_model is not None:
|
113 |
+
runner = build_runner(
|
114 |
+
cfg.runner,
|
115 |
+
default_args=dict(
|
116 |
+
model=model,
|
117 |
+
eval_model=eval_model,
|
118 |
+
optimizer=optimizer,
|
119 |
+
work_dir=cfg.work_dir,
|
120 |
+
logger=logger,
|
121 |
+
meta=meta))
|
122 |
+
else:
|
123 |
+
runner = build_runner(
|
124 |
+
cfg.runner,
|
125 |
+
default_args=dict(
|
126 |
+
model=model,
|
127 |
+
optimizer=optimizer,
|
128 |
+
work_dir=cfg.work_dir,
|
129 |
+
logger=logger,
|
130 |
+
meta=meta))
|
131 |
+
|
132 |
+
# an ugly workaround to make .log and .log.json filenames the same
|
133 |
+
runner.timestamp = timestamp
|
134 |
+
|
135 |
+
# fp16 setting
|
136 |
+
fp16_cfg = cfg.get('fp16', None)
|
137 |
+
if fp16_cfg is not None:
|
138 |
+
optimizer_config = Fp16OptimizerHook(
|
139 |
+
**cfg.optimizer_config, **fp16_cfg, distributed=distributed)
|
140 |
+
elif distributed and 'type' not in cfg.optimizer_config:
|
141 |
+
optimizer_config = OptimizerHook(**cfg.optimizer_config)
|
142 |
+
else:
|
143 |
+
optimizer_config = cfg.optimizer_config
|
144 |
+
|
145 |
+
# register hooks
|
146 |
+
runner.register_training_hooks(cfg.lr_config, optimizer_config,
|
147 |
+
cfg.checkpoint_config, cfg.log_config,
|
148 |
+
cfg.get('momentum_config', None))
|
149 |
+
|
150 |
+
# register profiler hook
|
151 |
+
#trace_config = dict(type='tb_trace', dir_name='work_dir')
|
152 |
+
#profiler_config = dict(on_trace_ready=trace_config)
|
153 |
+
#runner.register_profiler_hook(profiler_config)
|
154 |
+
|
155 |
+
if distributed:
|
156 |
+
if isinstance(runner, EpochBasedRunner):
|
157 |
+
runner.register_hook(DistSamplerSeedHook())
|
158 |
+
|
159 |
+
# register eval hooks
|
160 |
+
if validate:
|
161 |
+
# Support batch_size > 1 in validation
|
162 |
+
val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1)
|
163 |
+
if val_samples_per_gpu > 1:
|
164 |
+
assert False
|
165 |
+
# Replace 'ImageToTensor' to 'DefaultFormatBundle'
|
166 |
+
cfg.data.val.pipeline = replace_ImageToTensor(
|
167 |
+
cfg.data.val.pipeline)
|
168 |
+
val_dataset = custom_build_dataset(cfg.data.val, dict(test_mode=True))
|
169 |
+
|
170 |
+
val_dataloader = build_dataloader(
|
171 |
+
val_dataset,
|
172 |
+
samples_per_gpu=val_samples_per_gpu,
|
173 |
+
workers_per_gpu=cfg.data.workers_per_gpu,
|
174 |
+
dist=distributed,
|
175 |
+
shuffle=False,
|
176 |
+
shuffler_sampler=cfg.data.shuffler_sampler, # dict(type='DistributedGroupSampler'),
|
177 |
+
nonshuffler_sampler=cfg.data.nonshuffler_sampler, # dict(type='DistributedSampler'),
|
178 |
+
)
|
179 |
+
eval_cfg = cfg.get('evaluation', {})
|
180 |
+
eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner'
|
181 |
+
eval_cfg['jsonfile_prefix'] = osp.join('val', cfg.work_dir, time.ctime().replace(' ','_').replace(':','_'))
|
182 |
+
eval_hook = CustomDistEvalHook if distributed else EvalHook
|
183 |
+
runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
|
184 |
+
|
185 |
+
# user-defined hooks
|
186 |
+
if cfg.get('custom_hooks', None):
|
187 |
+
custom_hooks = cfg.custom_hooks
|
188 |
+
assert isinstance(custom_hooks, list), \
|
189 |
+
f'custom_hooks expect list type, but got {type(custom_hooks)}'
|
190 |
+
for hook_cfg in cfg.custom_hooks:
|
191 |
+
assert isinstance(hook_cfg, dict), \
|
192 |
+
'Each item in custom_hooks expects dict type, but got ' \
|
193 |
+
f'{type(hook_cfg)}'
|
194 |
+
hook_cfg = hook_cfg.copy()
|
195 |
+
priority = hook_cfg.pop('priority', 'NORMAL')
|
196 |
+
hook = build_from_cfg(hook_cfg, HOOKS)
|
197 |
+
runner.register_hook(hook, priority=priority)
|
198 |
+
|
199 |
+
if cfg.resume_from:
|
200 |
+
runner.resume(cfg.resume_from, resume_optimizer=cfg.get('resume_optimizer', True))
|
201 |
+
elif cfg.load_from:
|
202 |
+
runner.load_checkpoint(cfg.load_from)
|
203 |
+
runner.run(data_loaders, cfg.workflow)
|
204 |
+
|
projects/mmdet3d_plugin/core/apis/test.py
ADDED
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ---------------------------------------------
|
2 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
3 |
+
# ---------------------------------------------
|
4 |
+
# Modified by Zhiqi Li
|
5 |
+
# ---------------------------------------------
|
6 |
+
import os.path as osp
|
7 |
+
import pickle
|
8 |
+
import shutil
|
9 |
+
import tempfile
|
10 |
+
import time
|
11 |
+
|
12 |
+
import mmcv
|
13 |
+
import torch
|
14 |
+
import torch.distributed as dist
|
15 |
+
from mmcv.image import tensor2imgs
|
16 |
+
from mmcv.runner import get_dist_info
|
17 |
+
|
18 |
+
from mmdet.core import encode_mask_results
|
19 |
+
|
20 |
+
|
21 |
+
import mmcv
|
22 |
+
import numpy as np
|
23 |
+
import pycocotools.mask as mask_util
|
24 |
+
|
25 |
+
def custom_encode_mask_results(mask_results):
|
26 |
+
"""Encode bitmap mask to RLE code. Semantic Masks only
|
27 |
+
Args:
|
28 |
+
mask_results (list | tuple[list]): bitmap mask results.
|
29 |
+
In mask scoring rcnn, mask_results is a tuple of (segm_results,
|
30 |
+
segm_cls_score).
|
31 |
+
Returns:
|
32 |
+
list | tuple: RLE encoded mask.
|
33 |
+
"""
|
34 |
+
cls_segms = mask_results
|
35 |
+
num_classes = len(cls_segms)
|
36 |
+
encoded_mask_results = []
|
37 |
+
for i in range(len(cls_segms)):
|
38 |
+
encoded_mask_results.append(
|
39 |
+
mask_util.encode(
|
40 |
+
np.array(
|
41 |
+
cls_segms[i][:, :, np.newaxis], order='F',
|
42 |
+
dtype='uint8'))[0]) # encoded with RLE
|
43 |
+
return [encoded_mask_results]
|
44 |
+
|
45 |
+
def custom_multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
|
46 |
+
"""Test model with multiple gpus.
|
47 |
+
This method tests model with multiple gpus and collects the results
|
48 |
+
under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
|
49 |
+
it encodes results to gpu tensors and use gpu communication for results
|
50 |
+
collection. On cpu mode it saves the results on different gpus to 'tmpdir'
|
51 |
+
and collects them by the rank 0 worker.
|
52 |
+
Args:
|
53 |
+
model (nn.Module): Model to be tested.
|
54 |
+
data_loader (nn.Dataloader): Pytorch data loader.
|
55 |
+
tmpdir (str): Path of directory to save the temporary results from
|
56 |
+
different gpus under cpu mode.
|
57 |
+
gpu_collect (bool): Option to use either gpu or cpu to collect results.
|
58 |
+
Returns:
|
59 |
+
list: The prediction results.
|
60 |
+
"""
|
61 |
+
model.eval()
|
62 |
+
bbox_results = []
|
63 |
+
mask_results = []
|
64 |
+
dataset = data_loader.dataset
|
65 |
+
rank, world_size = get_dist_info()
|
66 |
+
if rank == 0:
|
67 |
+
prog_bar = mmcv.ProgressBar(len(dataset))
|
68 |
+
time.sleep(2) # This line can prevent deadlock problem in some cases.
|
69 |
+
have_mask = False
|
70 |
+
for i, data in enumerate(data_loader):
|
71 |
+
with torch.no_grad():
|
72 |
+
result = model(return_loss=False, rescale=True, **data)
|
73 |
+
# encode mask results
|
74 |
+
if isinstance(result, dict):
|
75 |
+
if 'bbox_results' in result.keys():
|
76 |
+
bbox_result = result['bbox_results']
|
77 |
+
batch_size = len(result['bbox_results'])
|
78 |
+
bbox_results.extend(bbox_result)
|
79 |
+
if 'mask_results' in result.keys() and result['mask_results'] is not None:
|
80 |
+
mask_result = custom_encode_mask_results(result['mask_results'])
|
81 |
+
mask_results.extend(mask_result)
|
82 |
+
have_mask = True
|
83 |
+
else:
|
84 |
+
batch_size = len(result)
|
85 |
+
bbox_results.extend(result)
|
86 |
+
|
87 |
+
#if isinstance(result[0], tuple):
|
88 |
+
# assert False, 'this code is for instance segmentation, which our code will not utilize.'
|
89 |
+
# result = [(bbox_results, encode_mask_results(mask_results))
|
90 |
+
# for bbox_results, mask_results in result]
|
91 |
+
if rank == 0:
|
92 |
+
|
93 |
+
for _ in range(batch_size * world_size):
|
94 |
+
prog_bar.update()
|
95 |
+
|
96 |
+
# collect results from all ranks
|
97 |
+
if gpu_collect:
|
98 |
+
bbox_results = collect_results_gpu(bbox_results, len(dataset))
|
99 |
+
if have_mask:
|
100 |
+
mask_results = collect_results_gpu(mask_results, len(dataset))
|
101 |
+
else:
|
102 |
+
mask_results = None
|
103 |
+
else:
|
104 |
+
bbox_results = collect_results_cpu(bbox_results, len(dataset), tmpdir)
|
105 |
+
tmpdir = tmpdir+'_mask' if tmpdir is not None else None
|
106 |
+
if have_mask:
|
107 |
+
mask_results = collect_results_cpu(mask_results, len(dataset), tmpdir)
|
108 |
+
else:
|
109 |
+
mask_results = None
|
110 |
+
|
111 |
+
if mask_results is None:
|
112 |
+
return bbox_results
|
113 |
+
return {'bbox_results': bbox_results, 'mask_results': mask_results}
|
114 |
+
|
115 |
+
|
116 |
+
def collect_results_cpu(result_part, size, tmpdir=None):
|
117 |
+
rank, world_size = get_dist_info()
|
118 |
+
# create a tmp dir if it is not specified
|
119 |
+
if tmpdir is None:
|
120 |
+
MAX_LEN = 512
|
121 |
+
# 32 is whitespace
|
122 |
+
dir_tensor = torch.full((MAX_LEN, ),
|
123 |
+
32,
|
124 |
+
dtype=torch.uint8,
|
125 |
+
device='cuda')
|
126 |
+
if rank == 0:
|
127 |
+
mmcv.mkdir_or_exist('.dist_test')
|
128 |
+
tmpdir = tempfile.mkdtemp(dir='.dist_test')
|
129 |
+
tmpdir = torch.tensor(
|
130 |
+
bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
|
131 |
+
dir_tensor[:len(tmpdir)] = tmpdir
|
132 |
+
dist.broadcast(dir_tensor, 0)
|
133 |
+
tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
|
134 |
+
else:
|
135 |
+
mmcv.mkdir_or_exist(tmpdir)
|
136 |
+
# dump the part result to the dir
|
137 |
+
mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))
|
138 |
+
dist.barrier()
|
139 |
+
# collect all parts
|
140 |
+
if rank != 0:
|
141 |
+
return None
|
142 |
+
else:
|
143 |
+
# load results of all parts from tmp dir
|
144 |
+
part_list = []
|
145 |
+
for i in range(world_size):
|
146 |
+
part_file = osp.join(tmpdir, f'part_{i}.pkl')
|
147 |
+
part_list.append(mmcv.load(part_file))
|
148 |
+
# sort the results
|
149 |
+
ordered_results = []
|
150 |
+
'''
|
151 |
+
bacause we change the sample of the evaluation stage to make sure that each gpu will handle continuous sample,
|
152 |
+
'''
|
153 |
+
#for res in zip(*part_list):
|
154 |
+
for res in part_list:
|
155 |
+
ordered_results.extend(list(res))
|
156 |
+
# the dataloader may pad some samples
|
157 |
+
ordered_results = ordered_results[:size]
|
158 |
+
# remove tmp dir
|
159 |
+
shutil.rmtree(tmpdir)
|
160 |
+
return ordered_results
|
161 |
+
|
162 |
+
|
163 |
+
def collect_results_gpu(result_part, size):
|
164 |
+
collect_results_cpu(result_part, size)
|
projects/mmdet3d_plugin/core/apis/train.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ---------------------------------------------
|
2 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
3 |
+
# ---------------------------------------------
|
4 |
+
# Modified by Zhiqi Li
|
5 |
+
# ---------------------------------------------
|
6 |
+
# ---------------------------------------------
|
7 |
+
# Modified by Shihao Wang
|
8 |
+
# ---------------------------------------------
|
9 |
+
|
10 |
+
from .mmdet_train import custom_train_detector
|
11 |
+
from mmseg.apis import train_segmentor
|
12 |
+
from mmdet.apis import train_detector
|
13 |
+
|
14 |
+
def custom_train_model(model,
|
15 |
+
dataset,
|
16 |
+
cfg,
|
17 |
+
distributed=False,
|
18 |
+
validate=False,
|
19 |
+
timestamp=None,
|
20 |
+
eval_model=None,
|
21 |
+
meta=None):
|
22 |
+
"""A function wrapper for launching model training according to cfg.
|
23 |
+
|
24 |
+
Because we need different eval_hook in runner. Should be deprecated in the
|
25 |
+
future.
|
26 |
+
"""
|
27 |
+
if cfg.model.type in ['EncoderDecoder3D']:
|
28 |
+
assert False
|
29 |
+
else:
|
30 |
+
custom_train_detector(
|
31 |
+
model,
|
32 |
+
dataset,
|
33 |
+
cfg,
|
34 |
+
distributed=distributed,
|
35 |
+
validate=validate,
|
36 |
+
timestamp=timestamp,
|
37 |
+
eval_model=eval_model,
|
38 |
+
meta=meta)
|
39 |
+
|
40 |
+
|
41 |
+
def train_model(model,
|
42 |
+
dataset,
|
43 |
+
cfg,
|
44 |
+
distributed=False,
|
45 |
+
validate=False,
|
46 |
+
timestamp=None,
|
47 |
+
meta=None):
|
48 |
+
"""A function wrapper for launching model training according to cfg.
|
49 |
+
|
50 |
+
Because we need different eval_hook in runner. Should be deprecated in the
|
51 |
+
future.
|
52 |
+
"""
|
53 |
+
if cfg.model.type in ['EncoderDecoder3D']:
|
54 |
+
train_segmentor(
|
55 |
+
model,
|
56 |
+
dataset,
|
57 |
+
cfg,
|
58 |
+
distributed=distributed,
|
59 |
+
validate=validate,
|
60 |
+
timestamp=timestamp,
|
61 |
+
meta=meta)
|
62 |
+
else:
|
63 |
+
train_detector(
|
64 |
+
model,
|
65 |
+
dataset,
|
66 |
+
cfg,
|
67 |
+
distributed=distributed,
|
68 |
+
validate=validate,
|
69 |
+
timestamp=timestamp,
|
70 |
+
meta=meta)
|
projects/mmdet3d_plugin/core/bbox/__pycache__/util.cpython-38.pyc
ADDED
Binary file (1.39 kB). View file
|
|
projects/mmdet3d_plugin/core/bbox/__pycache__/util.cpython-39.pyc
ADDED
Binary file (1.33 kB). View file
|
|
projects/mmdet3d_plugin/core/bbox/assigners/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .hungarian_assigner_3d import HungarianAssigner3D
|
2 |
+
from .hungarian_assigner_2d import HungarianAssigner2D
|
3 |
+
from .map_assigner import LaneHungarianAssigner
|
4 |
+
__all__ = ['HungarianAssigner3D', 'HungarianAssigner2D']
|
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-38.pyc
ADDED
Binary file (437 Bytes). View file
|
|
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (384 Bytes). View file
|
|
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_2d.cpython-38.pyc
ADDED
Binary file (5.6 kB). View file
|
|
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_2d.cpython-39.pyc
ADDED
Binary file (5.52 kB). View file
|
|
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-38.pyc
ADDED
Binary file (2.4 kB). View file
|
|
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-39.pyc
ADDED
Binary file (2.31 kB). View file
|
|
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/map_assigner.cpython-38.pyc
ADDED
Binary file (1.6 kB). View file
|
|
projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/map_assigner.cpython-39.pyc
ADDED
Binary file (1.52 kB). View file
|
|
projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_2d.py
ADDED
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
# ---------------------------------------------
|
3 |
+
# Modified by Shihao Wang
|
4 |
+
# ---------------------------------------------
|
5 |
+
import torch
|
6 |
+
|
7 |
+
from mmdet.core.bbox.builder import BBOX_ASSIGNERS
|
8 |
+
from mmdet.core.bbox.assigners import AssignResult
|
9 |
+
from mmdet.core.bbox.assigners import BaseAssigner
|
10 |
+
from mmdet.core.bbox.match_costs import build_match_cost
|
11 |
+
from mmdet.core import bbox_cxcywh_to_xyxy
|
12 |
+
|
13 |
+
try:
|
14 |
+
from scipy.optimize import linear_sum_assignment
|
15 |
+
except ImportError:
|
16 |
+
linear_sum_assignment = None
|
17 |
+
|
18 |
+
|
19 |
+
@BBOX_ASSIGNERS.register_module()
|
20 |
+
class HungarianAssigner2D(BaseAssigner):
|
21 |
+
"""Computes one-to-one matching between predictions and ground truth.
|
22 |
+
|
23 |
+
This class computes an assignment between the targets and the predictions
|
24 |
+
based on the costs. The costs are weighted sum of three components:
|
25 |
+
classification cost, regression L1 cost and regression iou cost. The
|
26 |
+
targets don't include the no_object, so generally there are more
|
27 |
+
predictions than targets. After the one-to-one matching, the un-matched
|
28 |
+
are treated as backgrounds. Thus each query prediction will be assigned
|
29 |
+
with `0` or a positive integer indicating the ground truth index:
|
30 |
+
|
31 |
+
- 0: negative sample, no assigned gt
|
32 |
+
- positive integer: positive sample, index (1-based) of assigned gt
|
33 |
+
|
34 |
+
Args:
|
35 |
+
cls_weight (int | float, optional): The scale factor for classification
|
36 |
+
cost. Default 1.0.
|
37 |
+
bbox_weight (int | float, optional): The scale factor for regression
|
38 |
+
L1 cost. Default 1.0.
|
39 |
+
iou_weight (int | float, optional): The scale factor for regression
|
40 |
+
iou cost. Default 1.0.
|
41 |
+
iou_calculator (dict | optional): The config for the iou calculation.
|
42 |
+
Default type `BboxOverlaps2D`.
|
43 |
+
iou_mode (str | optional): "iou" (intersection over union), "iof"
|
44 |
+
(intersection over foreground), or "giou" (generalized
|
45 |
+
intersection over union). Default "giou".
|
46 |
+
"""
|
47 |
+
|
48 |
+
def __init__(self,
|
49 |
+
cls_cost=dict(type='ClassificationCost', weight=1.),
|
50 |
+
reg_cost=dict(type='BBoxL1Cost', weight=1.0),
|
51 |
+
iou_cost=dict(type='IoUCost', iou_mode='giou', weight=1.0),
|
52 |
+
centers2d_cost=dict(type='BBox3DL1Cost', weight=1.0)):
|
53 |
+
self.cls_cost = build_match_cost(cls_cost)
|
54 |
+
self.reg_cost = build_match_cost(reg_cost)
|
55 |
+
self.iou_cost = build_match_cost(iou_cost)
|
56 |
+
self.centers2d_cost = build_match_cost(centers2d_cost)
|
57 |
+
|
58 |
+
def assign(self,
|
59 |
+
bbox_pred,
|
60 |
+
cls_pred,
|
61 |
+
pred_centers2d,
|
62 |
+
gt_bboxes,
|
63 |
+
gt_labels,
|
64 |
+
centers2d,
|
65 |
+
img_meta,
|
66 |
+
gt_bboxes_ignore=None,
|
67 |
+
eps=1e-7):
|
68 |
+
"""Computes one-to-one matching based on the weighted costs.
|
69 |
+
|
70 |
+
This method assign each query prediction to a ground truth or
|
71 |
+
background. The `assigned_gt_inds` with -1 means don't care,
|
72 |
+
0 means negative sample, and positive number is the index (1-based)
|
73 |
+
of assigned gt.
|
74 |
+
The assignment is done in the following steps, the order matters.
|
75 |
+
|
76 |
+
1. assign every prediction to -1
|
77 |
+
2. compute the weighted costs
|
78 |
+
3. do Hungarian matching on CPU based on the costs
|
79 |
+
4. assign all to 0 (background) first, then for each matched pair
|
80 |
+
between predictions and gts, treat this prediction as foreground
|
81 |
+
and assign the corresponding gt index (plus 1) to it.
|
82 |
+
|
83 |
+
Args:
|
84 |
+
bbox_pred (Tensor): Predicted boxes with normalized coordinates
|
85 |
+
(cx, cy, w, h), which are all in range [0, 1]. Shape
|
86 |
+
[num_query, 4].
|
87 |
+
cls_pred (Tensor): Predicted classification logits, shape
|
88 |
+
[num_query, num_class].
|
89 |
+
gt_bboxes (Tensor): Ground truth boxes with unnormalized
|
90 |
+
coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
|
91 |
+
gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
|
92 |
+
img_meta (dict): Meta information for current image.
|
93 |
+
gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
|
94 |
+
labelled as `ignored`. Default None.
|
95 |
+
eps (int | float, optional): A value added to the denominator for
|
96 |
+
numerical stability. Default 1e-7.
|
97 |
+
|
98 |
+
Returns:
|
99 |
+
:obj:`AssignResult`: The assigned result.
|
100 |
+
"""
|
101 |
+
assert gt_bboxes_ignore is None, \
|
102 |
+
'Only case when gt_bboxes_ignore is None is supported.'
|
103 |
+
num_gts, num_bboxes = gt_bboxes.size(0), bbox_pred.size(0)
|
104 |
+
|
105 |
+
# 1. assign -1 by default
|
106 |
+
assigned_gt_inds = bbox_pred.new_full((num_bboxes, ),
|
107 |
+
-1,
|
108 |
+
dtype=torch.long)
|
109 |
+
assigned_labels = bbox_pred.new_full((num_bboxes, ),
|
110 |
+
-1,
|
111 |
+
dtype=torch.long)
|
112 |
+
if num_gts == 0 or num_bboxes == 0:
|
113 |
+
# No ground truth or boxes, return empty assignment
|
114 |
+
if num_gts == 0:
|
115 |
+
# No ground truth, assign all to background
|
116 |
+
assigned_gt_inds[:] = 0
|
117 |
+
return AssignResult(
|
118 |
+
num_gts, assigned_gt_inds, None, labels=assigned_labels)
|
119 |
+
img_h, img_w, _ = img_meta['pad_shape']
|
120 |
+
factor = gt_bboxes.new_tensor([img_w, img_h, img_w,
|
121 |
+
img_h]).unsqueeze(0)
|
122 |
+
|
123 |
+
# 2. compute the weighted costs
|
124 |
+
# classification and bboxcost.
|
125 |
+
cls_cost = self.cls_cost(cls_pred, gt_labels)
|
126 |
+
# regression L1 cost
|
127 |
+
normalize_gt_bboxes = gt_bboxes / factor
|
128 |
+
reg_cost = self.reg_cost(bbox_pred, normalize_gt_bboxes)
|
129 |
+
# regression iou cost, defaultly giou is used in official DETR.
|
130 |
+
bboxes = bbox_cxcywh_to_xyxy(bbox_pred) * factor
|
131 |
+
iou_cost = self.iou_cost(bboxes, gt_bboxes)
|
132 |
+
|
133 |
+
# center2d L1 cost
|
134 |
+
normalize_centers2d = centers2d / factor[:, 0:2]
|
135 |
+
centers2d_cost = self.centers2d_cost(pred_centers2d, normalize_centers2d)
|
136 |
+
|
137 |
+
# weighted sum of above four costs
|
138 |
+
cost = cls_cost + reg_cost + iou_cost + centers2d_cost
|
139 |
+
cost = torch.nan_to_num(cost, nan=100.0, posinf=100.0, neginf=-100.0)
|
140 |
+
# 3. do Hungarian matching on CPU using linear_sum_assignment
|
141 |
+
cost = cost.detach().cpu()
|
142 |
+
if linear_sum_assignment is None:
|
143 |
+
raise ImportError('Please run "pip install scipy" '
|
144 |
+
'to install scipy first.')
|
145 |
+
matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
|
146 |
+
matched_row_inds = torch.from_numpy(matched_row_inds).to(
|
147 |
+
bbox_pred.device)
|
148 |
+
matched_col_inds = torch.from_numpy(matched_col_inds).to(
|
149 |
+
bbox_pred.device)
|
150 |
+
|
151 |
+
# 4. assign backgrounds and foregrounds
|
152 |
+
# assign all indices to backgrounds first
|
153 |
+
assigned_gt_inds[:] = 0
|
154 |
+
# assign foregrounds based on matching results
|
155 |
+
assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
|
156 |
+
assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
|
157 |
+
return AssignResult(
|
158 |
+
num_gts, assigned_gt_inds, None, labels=assigned_labels)
|
projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ------------------------------------------------------------------------
|
2 |
+
# Modified from DETR3D (https://github.com/WangYueFt/detr3d)
|
3 |
+
# Copyright (c) 2021 Wang, Yue
|
4 |
+
# ------------------------------------------------------------------------
|
5 |
+
import torch
|
6 |
+
from mmdet.core.bbox.builder import BBOX_ASSIGNERS
|
7 |
+
from mmdet.core.bbox.assigners import AssignResult
|
8 |
+
from mmdet.core.bbox.assigners import BaseAssigner
|
9 |
+
from mmdet.core.bbox.match_costs import build_match_cost
|
10 |
+
from projects.mmdet3d_plugin.core.bbox.util import normalize_bbox
|
11 |
+
|
12 |
+
try:
|
13 |
+
from scipy.optimize import linear_sum_assignment
|
14 |
+
except ImportError:
|
15 |
+
linear_sum_assignment = None
|
16 |
+
|
17 |
+
@BBOX_ASSIGNERS.register_module()
|
18 |
+
class HungarianAssigner3D(BaseAssigner):
|
19 |
+
def __init__(self,
|
20 |
+
cls_cost=dict(type='ClassificationCost', weight=1.),
|
21 |
+
reg_cost=dict(type='BBoxL1Cost', weight=1.0),
|
22 |
+
iou_cost=dict(type='IoUCost', weight=0.0),
|
23 |
+
pc_range=None):
|
24 |
+
self.cls_cost = build_match_cost(cls_cost)
|
25 |
+
self.reg_cost = build_match_cost(reg_cost)
|
26 |
+
self.iou_cost = build_match_cost(iou_cost)
|
27 |
+
self.pc_range = pc_range
|
28 |
+
|
29 |
+
def assign(self,
|
30 |
+
bbox_pred,
|
31 |
+
cls_pred,
|
32 |
+
gt_bboxes,
|
33 |
+
gt_labels,
|
34 |
+
gt_bboxes_ignore=None,
|
35 |
+
code_weights=None,
|
36 |
+
with_velo=False,
|
37 |
+
eps=1e-7):
|
38 |
+
assert gt_bboxes_ignore is None, \
|
39 |
+
'Only case when gt_bboxes_ignore is None is supported.'
|
40 |
+
num_gts, num_bboxes = gt_bboxes.size(0), bbox_pred.size(0)
|
41 |
+
# 1. assign -1 by default
|
42 |
+
assigned_gt_inds = bbox_pred.new_full((num_bboxes, ),
|
43 |
+
-1,
|
44 |
+
dtype=torch.long)
|
45 |
+
assigned_labels = bbox_pred.new_full((num_bboxes, ),
|
46 |
+
-1,
|
47 |
+
dtype=torch.long)
|
48 |
+
if num_gts == 0 or num_bboxes == 0:
|
49 |
+
# No ground truth or boxes, return empty assignment
|
50 |
+
if num_gts == 0:
|
51 |
+
# No ground truth, assign all to background
|
52 |
+
assigned_gt_inds[:] = 0
|
53 |
+
return AssignResult(
|
54 |
+
num_gts, assigned_gt_inds, None, labels=assigned_labels)
|
55 |
+
# 2. compute the weighted costs
|
56 |
+
# classification and bboxcost.
|
57 |
+
cls_cost = self.cls_cost(cls_pred, gt_labels)
|
58 |
+
# regression L1 cost
|
59 |
+
normalized_gt_bboxes = normalize_bbox(gt_bboxes, self.pc_range)
|
60 |
+
if code_weights is not None:
|
61 |
+
bbox_pred = bbox_pred * code_weights
|
62 |
+
normalized_gt_bboxes = normalized_gt_bboxes * code_weights
|
63 |
+
|
64 |
+
if with_velo:
|
65 |
+
reg_cost = self.reg_cost(bbox_pred, normalized_gt_bboxes)
|
66 |
+
else:
|
67 |
+
reg_cost = self.reg_cost(bbox_pred[:, :8], normalized_gt_bboxes[:, :8])
|
68 |
+
|
69 |
+
# weighted sum of above two costs
|
70 |
+
cost = cls_cost + reg_cost
|
71 |
+
|
72 |
+
# 3. do Hungarian matching on CPU using linear_sum_assignment
|
73 |
+
cost = cost.detach().cpu()
|
74 |
+
if linear_sum_assignment is None:
|
75 |
+
raise ImportError('Please run "pip install scipy" '
|
76 |
+
'to install scipy first.')
|
77 |
+
cost = torch.nan_to_num(cost, nan=100.0, posinf=100.0, neginf=-100.0)
|
78 |
+
matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
|
79 |
+
matched_row_inds = torch.from_numpy(matched_row_inds).to(
|
80 |
+
bbox_pred.device)
|
81 |
+
matched_col_inds = torch.from_numpy(matched_col_inds).to(
|
82 |
+
bbox_pred.device)
|
83 |
+
|
84 |
+
# 4. assign backgrounds and foregrounds
|
85 |
+
# assign all indices to backgrounds first
|
86 |
+
assigned_gt_inds[:] = 0
|
87 |
+
# assign foregrounds based on matching results
|
88 |
+
assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
|
89 |
+
assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
|
90 |
+
return AssignResult(
|
91 |
+
num_gts, assigned_gt_inds, None, labels=assigned_labels)
|
projects/mmdet3d_plugin/core/bbox/assigners/map_assigner.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from mmdet.core.bbox.builder import BBOX_ASSIGNERS
|
3 |
+
from mmdet.core.bbox.assigners import HungarianAssigner, AssignResult
|
4 |
+
try:
|
5 |
+
from scipy.optimize import linear_sum_assignment
|
6 |
+
except ImportError:
|
7 |
+
linear_sum_assignment = None
|
8 |
+
|
9 |
+
@BBOX_ASSIGNERS.register_module()
|
10 |
+
class LaneHungarianAssigner(HungarianAssigner):
|
11 |
+
|
12 |
+
def assign(self,
|
13 |
+
lane_pred,
|
14 |
+
cls_pred,
|
15 |
+
gt_lanes,
|
16 |
+
gt_labels,
|
17 |
+
img_meta,
|
18 |
+
gt_lanes_ignore=None,
|
19 |
+
eps=1e-7):
|
20 |
+
assert gt_lanes_ignore is None, \
|
21 |
+
'Only case when gt_lanes_ignore is None is supported.'
|
22 |
+
num_gts, num_lanes = gt_lanes.size(0), lane_pred.size(0)
|
23 |
+
|
24 |
+
# 1. assign -1 by default
|
25 |
+
assigned_gt_inds = lane_pred.new_full((num_lanes, ),
|
26 |
+
-1,
|
27 |
+
dtype=torch.long)
|
28 |
+
assigned_labels = lane_pred.new_full((num_lanes, ),
|
29 |
+
-1,
|
30 |
+
dtype=torch.long)
|
31 |
+
if num_gts == 0 or num_lanes == 0:
|
32 |
+
# No ground truth or boxes, return empty assignment
|
33 |
+
if num_gts == 0:
|
34 |
+
# No ground truth, assign all to background
|
35 |
+
assigned_gt_inds[:] = 0
|
36 |
+
return AssignResult(
|
37 |
+
num_gts, assigned_gt_inds, None, labels=assigned_labels)
|
38 |
+
|
39 |
+
# 2. compute the weighted costs
|
40 |
+
# classification and lanecost.
|
41 |
+
cls_cost = self.cls_cost(cls_pred, gt_labels)
|
42 |
+
# regression L1 cost
|
43 |
+
reg_cost = self.reg_cost(lane_pred, gt_lanes)
|
44 |
+
# weighted sum of above three costs
|
45 |
+
cost = cls_cost + reg_cost
|
46 |
+
|
47 |
+
# 3. do Hungarian matching on CPU using linear_sum_assignment
|
48 |
+
cost = torch.nan_to_num(cost)
|
49 |
+
cost = cost.detach().cpu()
|
50 |
+
matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
|
51 |
+
matched_row_inds = torch.from_numpy(matched_row_inds).to(
|
52 |
+
lane_pred.device)
|
53 |
+
matched_col_inds = torch.from_numpy(matched_col_inds).to(
|
54 |
+
lane_pred.device)
|
55 |
+
|
56 |
+
# 4. assign backgrounds and foregrounds
|
57 |
+
# assign all indices to backgrounds first
|
58 |
+
assigned_gt_inds[:] = 0
|
59 |
+
# assign foregrounds based on matching results
|
60 |
+
assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
|
61 |
+
assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
|
62 |
+
return AssignResult(
|
63 |
+
num_gts, assigned_gt_inds, None, labels=assigned_labels)
|
projects/mmdet3d_plugin/core/bbox/coders/__init__.py
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
from .nms_free_coder import NMSFreeCoder
|
2 |
+
__all__ = ['NMSFreeCoder']
|
projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-38.pyc
ADDED
Binary file (290 Bytes). View file
|
|
projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (237 Bytes). View file
|
|
projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-38.pyc
ADDED
Binary file (3.74 kB). View file
|
|
projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-39.pyc
ADDED
Binary file (3.68 kB). View file
|
|
projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
from mmdet.core.bbox import BaseBBoxCoder
|
4 |
+
from mmdet.core.bbox.builder import BBOX_CODERS
|
5 |
+
from projects.mmdet3d_plugin.core.bbox.util import denormalize_bbox
|
6 |
+
|
7 |
+
|
8 |
+
@BBOX_CODERS.register_module()
|
9 |
+
class NMSFreeCoder(BaseBBoxCoder):
|
10 |
+
"""Bbox coder for NMS-free detector.
|
11 |
+
Args:
|
12 |
+
pc_range (list[float]): Range of point cloud.
|
13 |
+
post_center_range (list[float]): Limit of the center.
|
14 |
+
Default: None.
|
15 |
+
max_num (int): Max number to be kept. Default: 100.
|
16 |
+
score_threshold (float): Threshold to filter boxes based on score.
|
17 |
+
Default: None.
|
18 |
+
code_size (int): Code size of bboxes. Default: 9
|
19 |
+
"""
|
20 |
+
|
21 |
+
def __init__(self,
|
22 |
+
pc_range,
|
23 |
+
voxel_size=None,
|
24 |
+
post_center_range=None,
|
25 |
+
max_num=100,
|
26 |
+
score_threshold=None,
|
27 |
+
num_classes=10):
|
28 |
+
|
29 |
+
self.pc_range = pc_range
|
30 |
+
self.voxel_size = voxel_size
|
31 |
+
self.post_center_range = post_center_range
|
32 |
+
self.max_num = max_num
|
33 |
+
self.score_threshold = score_threshold
|
34 |
+
self.num_classes = num_classes
|
35 |
+
|
36 |
+
def encode(self):
|
37 |
+
pass
|
38 |
+
|
39 |
+
def decode_single(self, cls_scores, bbox_preds):
|
40 |
+
"""Decode bboxes.
|
41 |
+
Args:
|
42 |
+
cls_scores (Tensor): Outputs from the classification head, \
|
43 |
+
shape [num_query, cls_out_channels]. Note \
|
44 |
+
cls_out_channels should includes background.
|
45 |
+
bbox_preds (Tensor): Outputs from the regression \
|
46 |
+
head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
|
47 |
+
Shape [num_query, 9].
|
48 |
+
Returns:
|
49 |
+
list[dict]: Decoded boxes.
|
50 |
+
"""
|
51 |
+
max_num = self.max_num
|
52 |
+
|
53 |
+
cls_scores = cls_scores.sigmoid()
|
54 |
+
scores, indexs = cls_scores.view(-1).topk(max_num)
|
55 |
+
labels = indexs % self.num_classes
|
56 |
+
bbox_index = torch.div(indexs, self.num_classes, rounding_mode='floor')
|
57 |
+
bbox_preds = bbox_preds[bbox_index]
|
58 |
+
|
59 |
+
final_box_preds = denormalize_bbox(bbox_preds, self.pc_range)
|
60 |
+
final_scores = scores
|
61 |
+
final_preds = labels
|
62 |
+
|
63 |
+
# use score threshold
|
64 |
+
if self.score_threshold is not None:
|
65 |
+
thresh_mask = final_scores >= self.score_threshold
|
66 |
+
if self.post_center_range is not None:
|
67 |
+
self.post_center_range = torch.tensor(self.post_center_range, device=scores.device)
|
68 |
+
|
69 |
+
mask = (final_box_preds[..., :3] >=
|
70 |
+
self.post_center_range[:3]).all(1)
|
71 |
+
mask &= (final_box_preds[..., :3] <=
|
72 |
+
self.post_center_range[3:]).all(1)
|
73 |
+
|
74 |
+
if self.score_threshold:
|
75 |
+
mask &= thresh_mask
|
76 |
+
|
77 |
+
boxes3d = final_box_preds[mask]
|
78 |
+
scores = final_scores[mask]
|
79 |
+
labels = final_preds[mask]
|
80 |
+
predictions_dict = {
|
81 |
+
'bboxes': boxes3d,
|
82 |
+
'scores': scores,
|
83 |
+
'labels': labels
|
84 |
+
}
|
85 |
+
|
86 |
+
else:
|
87 |
+
raise NotImplementedError(
|
88 |
+
'Need to reorganize output as a batch, only '
|
89 |
+
'support post_center_range is not None for now!')
|
90 |
+
return predictions_dict
|
91 |
+
|
92 |
+
def decode(self, preds_dicts):
|
93 |
+
"""Decode bboxes.
|
94 |
+
Args:
|
95 |
+
all_cls_scores (Tensor): Outputs from the classification head, \
|
96 |
+
shape [nb_dec, bs, num_query, cls_out_channels]. Note \
|
97 |
+
cls_out_channels should includes background.
|
98 |
+
all_bbox_preds (Tensor): Sigmoid outputs from the regression \
|
99 |
+
head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
|
100 |
+
Shape [nb_dec, bs, num_query, 9].
|
101 |
+
Returns:
|
102 |
+
list[dict]: Decoded boxes.
|
103 |
+
"""
|
104 |
+
all_cls_scores = preds_dicts['all_cls_scores'][-1]
|
105 |
+
all_bbox_preds = preds_dicts['all_bbox_preds'][-1]
|
106 |
+
|
107 |
+
batch_size = all_cls_scores.size()[0]
|
108 |
+
predictions_list = []
|
109 |
+
for i in range(batch_size):
|
110 |
+
predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i]))
|
111 |
+
return predictions_list
|
projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mmdet.core.bbox.match_costs import build_match_cost
|
2 |
+
from .match_cost import BBox3DL1Cost
|
3 |
+
|
4 |
+
__all__ = ['build_match_cost', 'BBox3DL1Cost']
|
projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-38.pyc
ADDED
Binary file (371 Bytes). View file
|
|
projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (318 Bytes). View file
|
|
projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-38.pyc
ADDED
Binary file (1.97 kB). View file
|
|
projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-39.pyc
ADDED
Binary file (1.91 kB). View file
|
|