Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +2 -0
- .gitignore +27 -0
- Dockerfile +24 -0
- LICENSE +201 -0
- README.md +125 -0
- assets/ckpts.png +0 -0
- assets/navsim_transparent.png +3 -0
- det_map/__init__.py +0 -0
- det_map/agent_lightning.py +93 -0
- det_map/config/agent/det_agent.yaml +203 -0
- det_map/config/agent/map_agent.yaml +320 -0
- det_map/config/defaults/default_common.yaml +23 -0
- det_map/config/defaults/default_evaluation.yaml +7 -0
- det_map/config/defaults/ray_distributed_no_torch.yaml +8 -0
- det_map/config/scene_filter/det_all_scenes.yaml +12 -0
- det_map/config/scene_filter/navtiny.yaml +265 -0
- det_map/config/splits/default_train_val_test_log_split.yaml +0 -0
- det_map/config/train_det.yaml +48 -0
- det_map/data/__init__.py +0 -0
- det_map/data/datasets/__init__.py +0 -0
- det_map/data/datasets/dataclasses.py +521 -0
- det_map/data/datasets/dataloader.py +172 -0
- det_map/data/datasets/dataset.py +41 -0
- det_map/data/datasets/dataset_det.py +28 -0
- det_map/data/datasets/feature_builders.py +94 -0
- det_map/data/datasets/lidar_utils.py +66 -0
- det_map/data/pipelines/__init__.py +0 -0
- det_map/data/pipelines/color_utils.py +357 -0
- det_map/data/pipelines/filter_lidar.py +74 -0
- det_map/data/pipelines/lidar_aug.py +151 -0
- det_map/data/pipelines/point_shuffle.py +17 -0
- det_map/data/pipelines/prepare_depth.py +76 -0
- det_map/data/pipelines/prepare_img.py +218 -0
- det_map/det/__init__.py +0 -0
- det_map/det/dal/__init__.py +0 -0
- det_map/det/dal/dal.py +159 -0
- det_map/det/dal/mmdet3d/__init__.py +0 -0
- det_map/det/dal/mmdet3d/core/__init__.py +6 -0
- det_map/det/dal/mmdet3d/core/bbox/__init__.py +24 -0
- det_map/det/dal/mmdet3d/core/bbox/assigners/__init__.py +6 -0
- det_map/det/dal/mmdet3d/core/bbox/assigners/hungarian_assigner_3d.py +148 -0
- det_map/det/dal/mmdet3d/core/bbox/box_np_ops.py +827 -0
- det_map/det/dal/mmdet3d/core/bbox/coders/__init__.py +7 -0
- det_map/det/dal/mmdet3d/core/bbox/coders/transfusion_bbox_coder.py +124 -0
- det_map/det/dal/mmdet3d/core/bbox/iou_calculators/__init__.py +10 -0
- det_map/det/dal/mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py +232 -0
- det_map/det/dal/mmdet3d/core/bbox/structures/__init__.py +18 -0
- det_map/det/dal/mmdet3d/core/bbox/structures/base_box3d.py +578 -0
- det_map/det/dal/mmdet3d/core/bbox/structures/box_3d_mode.py +197 -0
- det_map/det/dal/mmdet3d/core/bbox/structures/cam_box3d.py +354 -0
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
assets/navsim_transparent.png filter=lfs diff=lfs merge=lfs -text
|
37 |
+
navsim/agents/backbones/ops_dcnv3/build/temp.linux-x86_64-cpython-39/zhenxinl_nuplan/navsim_workspace/navsim_ours/navsim/agents/backbones/ops_dcnv3/src/cuda/dcnv3_cuda.o filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# python
|
2 |
+
build/
|
3 |
+
vocab_score_local/
|
4 |
+
vocab_score_full/
|
5 |
+
vocab_score_full_8192/
|
6 |
+
vocab_score_local_8192/
|
7 |
+
models_local/
|
8 |
+
traj_local/
|
9 |
+
*.so
|
10 |
+
*.pyc
|
11 |
+
**/__pycache__/
|
12 |
+
dist/
|
13 |
+
.pytest_cache/*
|
14 |
+
.pydevproject
|
15 |
+
.idea/
|
16 |
+
debug/
|
17 |
+
# IDE
|
18 |
+
.vscode/*
|
19 |
+
|
20 |
+
# Pip
|
21 |
+
*.egg-info
|
22 |
+
|
23 |
+
# files
|
24 |
+
*.log
|
25 |
+
|
26 |
+
*.jpg
|
27 |
+
*.pcd
|
Dockerfile
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM nvcr.io/nvidia/pytorch:23.05-py3
|
2 |
+
RUN apt-get update
|
3 |
+
RUN apt-get install -y tmux htop
|
4 |
+
|
5 |
+
RUN git clone https://[email protected]/woxihuanjiangguo/navsim_ours.git /navsim_ours
|
6 |
+
WORKDIR /navsim_ours
|
7 |
+
|
8 |
+
ENV HYDRA_FULL_ERROR=1
|
9 |
+
ENV NUPLAN_MAP_VERSION="nuplan-maps-v1.0"
|
10 |
+
ENV NUPLAN_MAPS_ROOT="/zhenxinl_nuplan/navsim_workspace/dataset/maps"
|
11 |
+
ENV NAVSIM_EXP_ROOT="/zhenxinl_nuplan/navsim_workspace/exp"
|
12 |
+
ENV NAVSIM_DEVKIT_ROOT="/navsim_ours"
|
13 |
+
ENV NAVSIM_TRAJPDM_ROOT="/zhenxinl_nuplan/navsim_workspace/dataset/traj_pdm"
|
14 |
+
ENV OPENSCENE_DATA_ROOT="/zhenxinl_nuplan/navsim_workspace/dataset"
|
15 |
+
ENV CUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME
|
16 |
+
ENV CFLAGS="-I$CUDA_HOME/include $CFLAGS"
|
17 |
+
|
18 |
+
RUN pip uninstall torch torchvision torchaudio -y
|
19 |
+
RUN pip3 install torch torchvision torchaudio
|
20 |
+
RUN pip install openmim
|
21 |
+
RUN mim install mmdet==2.28.2
|
22 |
+
RUN pip install spconv-cu120
|
23 |
+
RUN pip install numba
|
24 |
+
RUN pip install -e .
|
LICENSE
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright 2024 autonomousvision
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
README.md
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<div id="top" align="center">
|
2 |
+
|
3 |
+
<p align="center">
|
4 |
+
<img src="assets/navsim_transparent.png" width="500">
|
5 |
+
</p>
|
6 |
+
|
7 |
+
**NAVSIM:** *Data-Driven **N**on-Reactive **A**utonomous **V**ehicle **Sim**ulation*
|
8 |
+
|
9 |
+
</div>
|
10 |
+
|
11 |
+
|
12 |
+
## Highlights <a name="highlight"></a>
|
13 |
+
|
14 |
+
🔥 NAVSIM gathers simulation-based metrics (such as progress and time to collision) for end-to-end driving by unrolling simplified bird's eye view abstractions of scenes for a short simulation horizon. It operates under the condition that the policy has no influence on the environment, which enables **efficient, open-loop metric computation** while being **better aligned with closed-loop** evaluations than traditional displacement errors.
|
15 |
+
|
16 |
+
> NAVSIM attempts to address some of the challenges faced by the community:
|
17 |
+
>
|
18 |
+
> 1. **Providing a principled evaluation** (by incorporating ideas + data from nuPlan)
|
19 |
+
> - Key Idea: **PDM Score**, a multi-dimensional metric implemented in open-loop with strong correlation to closed-loop metrics
|
20 |
+
> - Critical scenario sampling, focusing on situations with intention changes where the ego history cannot be extrapolated into a plan
|
21 |
+
> - Official leaderboard on HuggingFace that remains open and prevents ambiguity in metric definitions between projects
|
22 |
+
>
|
23 |
+
> 2. **Maintaining ease of use** (by emulating nuScenes)
|
24 |
+
> - Simple data format and reasonably-sized download (<nuPlan’s 20+ TB)
|
25 |
+
> - Large-scale publicly available test split for internal benchmarking
|
26 |
+
> - Continually-maintained devkit
|
27 |
+
|
28 |
+
🏁 **NAVSIM** will serve as a main track in the **`CVPR 2024 Autonomous Grand Challenge`**. The leaderboard for the challenge is open! For further details, please [check the challenge website](https://opendrivelab.com/challenge2024/)!
|
29 |
+
|
30 |
+
<p align="center">
|
31 |
+
<img src="assets/navsim_cameras.gif" width="800">
|
32 |
+
</p>
|
33 |
+
|
34 |
+
## Table of Contents
|
35 |
+
1. [Highlights](#highlight)
|
36 |
+
2. [Getting started](#gettingstarted)
|
37 |
+
3. [Changelog](#changelog)
|
38 |
+
4. [License and citation](#licenseandcitation)
|
39 |
+
5. [Other resources](#otherresources)
|
40 |
+
|
41 |
+
|
42 |
+
## Getting started <a name="gettingstarted"></a>
|
43 |
+
|
44 |
+
- [Download and installation](docs/install.md)
|
45 |
+
- [Understanding and creating agents](docs/agents.md)
|
46 |
+
- [Understanding the data format and classes](docs/cache.md)
|
47 |
+
- [Dataset splits vs. filtered training / test splits](docs/splits.md)
|
48 |
+
- [Understanding the PDM Score](docs/metrics.md)
|
49 |
+
- [Submitting to the Leaderboard](docs/submission.md)
|
50 |
+
|
51 |
+
<p align="right">(<a href="#top">back to top</a>)</p>
|
52 |
+
|
53 |
+
|
54 |
+
## Changelog <a name="changelog"></a>
|
55 |
+
- **`[2024/04/21]`** NAVSIM v1.0 release (official devkit version for [AGC 2024](https://opendrivelab.com/challenge2024/))
|
56 |
+
- **IMPORTANT NOTE**: The name of the data split `competition_test` was changed to `private_test_e2e`. Please adapt your directory name accordingly. For details see [installation](docs/install.md).
|
57 |
+
- Parallelization of metric caching / evaluation
|
58 |
+
- Adds [Transfuser](https://arxiv.org/abs/2205.15997) baseline (see [agents](docs/agents.md#Baselines))
|
59 |
+
- Adds standardized training and test filtered splits (see [splits](docs/splits.md))
|
60 |
+
- Visualization tools (see [tutorial_visualization.ipynb](tutorial/tutorial_visualization.ipynb))
|
61 |
+
- Refactoring
|
62 |
+
- **`[2024/04/03]`** NAVSIM v0.4 release
|
63 |
+
- Support for test phase frames of competition
|
64 |
+
- Download script for trainval
|
65 |
+
- Egostatus MLP Agent and training pipeline
|
66 |
+
- Refactoring, Fixes, Documentation
|
67 |
+
- **`[2024/03/25]`** NAVSIM v0.3 release (official devkit version for warm-up phase)
|
68 |
+
- Changes env variable NUPLAN_EXP_ROOT to NAVSIM_EXP_ROOT
|
69 |
+
- Adds code for Leaderboard submission
|
70 |
+
- Major refactoring of dataloading and configs
|
71 |
+
- **`[2024/03/11]`** NAVSIM v0.2 release
|
72 |
+
- Easier installation and download
|
73 |
+
- mini and test data split integration
|
74 |
+
- Privileged `Human` agent
|
75 |
+
- **`[2024/02/20]`** NAVSIM v0.1 release (initial demo)
|
76 |
+
- OpenScene-mini sensor blobs and annotation logs
|
77 |
+
- Naive `ConstantVelocity` agent
|
78 |
+
|
79 |
+
|
80 |
+
<p align="right">(<a href="#top">back to top</a>)</p>
|
81 |
+
|
82 |
+
|
83 |
+
## License and citation <a name="licenseandcitation"></a>
|
84 |
+
All assets and code in this repository are under the [Apache 2.0 license](./LICENSE) unless specified otherwise. The datasets (including nuPlan and OpenScene) inherit their own distribution licenses. Please consider citing our paper and project if they help your research.
|
85 |
+
|
86 |
+
```BibTeX
|
87 |
+
@misc{Contributors2024navsim,
|
88 |
+
title={NAVSIM: Data-Driven Non-Reactive Autonomous Vehicle Simulation},
|
89 |
+
author={NAVSIM Contributors},
|
90 |
+
howpublished={\url{https://github.com/autonomousvision/navsim}},
|
91 |
+
year={2024}
|
92 |
+
}
|
93 |
+
```
|
94 |
+
|
95 |
+
```BibTeX
|
96 |
+
@inproceedings{Dauner2023CORL,
|
97 |
+
title = {Parting with Misconceptions about Learning-based Vehicle Motion Planning},
|
98 |
+
author = {Daniel Dauner and Marcel Hallgarten and Andreas Geiger and Kashyap Chitta},
|
99 |
+
booktitle = {Conference on Robot Learning (CoRL)},
|
100 |
+
year = {2023}
|
101 |
+
}
|
102 |
+
```
|
103 |
+
|
104 |
+
<p align="right">(<a href="#top">back to top</a>)</p>
|
105 |
+
|
106 |
+
|
107 |
+
## Other resources <a name="otherresources"></a>
|
108 |
+
|
109 |
+
<a href="https://twitter.com/AutoVisionGroup" target="_blank">
|
110 |
+
<img alt="Twitter Follow" src="https://img.shields.io/twitter/follow/Awesome Vision Group?style=social&color=brightgreen&logo=twitter" />
|
111 |
+
</a>
|
112 |
+
<a href="https://twitter.com/kashyap7x" target="_blank">
|
113 |
+
<img alt="Twitter Follow" src="https://img.shields.io/twitter/follow/Kashyap Chitta?style=social&color=brightgreen&logo=twitter" />
|
114 |
+
</a>
|
115 |
+
<a href="https://twitter.com/DanielDauner" target="_blank">
|
116 |
+
<img alt="Twitter Follow" src="https://img.shields.io/twitter/follow/Daniel Dauner?style=social&color=brightgreen&logo=twitter" />
|
117 |
+
</a>
|
118 |
+
<a href="https://twitter.com/MHallgarten0797" target="_blank">
|
119 |
+
<img alt="Twitter Follow" src="https://img.shields.io/twitter/follow/Marcel Hallgarten?style=social&color=brightgreen&logo=twitter" />
|
120 |
+
</a>
|
121 |
+
|
122 |
+
- [SLEDGE](https://github.com/autonomousvision/sledge) | [tuPlan garage](https://github.com/autonomousvision/tuplan_garage) | [CARLA garage](https://github.com/autonomousvision/carla_garage) | [Survey on E2EAD](https://github.com/OpenDriveLab/End-to-end-Autonomous-Driving)
|
123 |
+
- [PlanT](https://github.com/autonomousvision/plant) | [KING](https://github.com/autonomousvision/king) | [TransFuser](https://github.com/autonomousvision/transfuser) | [NEAT](https://github.com/autonomousvision/neat)
|
124 |
+
|
125 |
+
<p align="right">(<a href="#top">back to top</a>)</p>
|
assets/ckpts.png
ADDED
![]() |
assets/navsim_transparent.png
ADDED
![]() |
Git LFS Details
|
det_map/__init__.py
ADDED
File without changes
|
det_map/agent_lightning.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, Tuple, List
|
2 |
+
|
3 |
+
import pytorch_lightning as pl
|
4 |
+
import torch
|
5 |
+
from nuplan.planning.simulation.trajectory.trajectory_sampling import TrajectorySampling
|
6 |
+
from torch import Tensor
|
7 |
+
|
8 |
+
from navsim.agents.abstract_agent import AbstractAgent
|
9 |
+
from navsim.agents.vadv2.vadv2_agent import Vadv2Agent
|
10 |
+
from navsim.common.dataclasses import Trajectory
|
11 |
+
|
12 |
+
|
13 |
+
class AgentLightningModuleMap(pl.LightningModule):
|
14 |
+
def __init__(
|
15 |
+
self,
|
16 |
+
agent: AbstractAgent,
|
17 |
+
):
|
18 |
+
super().__init__()
|
19 |
+
self.agent = agent
|
20 |
+
|
21 |
+
def _step(
|
22 |
+
self,
|
23 |
+
batch: Tuple[Dict[str, Tensor], Dict[str, Tensor], List[str]],
|
24 |
+
logging_prefix: str,
|
25 |
+
):
|
26 |
+
features, targets = batch
|
27 |
+
if logging_prefix in ['train', 'val'] and isinstance(self.agent, Vadv2Agent):
|
28 |
+
prediction = self.agent.forward_train(features, targets['interpolated_traj'])
|
29 |
+
else:
|
30 |
+
prediction = self.agent.forward(features)
|
31 |
+
|
32 |
+
loss, loss_dict = self.agent.compute_loss(features, targets, prediction)
|
33 |
+
|
34 |
+
for k, v in loss_dict.items():
|
35 |
+
self.log(f"{logging_prefix}/{k}", v, on_step=True, on_epoch=True, prog_bar=True, sync_dist=True)
|
36 |
+
self.log(f"{logging_prefix}/loss", loss, on_step=True, on_epoch=True, prog_bar=True, sync_dist=True)
|
37 |
+
return loss
|
38 |
+
|
39 |
+
def training_step(
|
40 |
+
self,
|
41 |
+
batch: Tuple[Dict[str, Tensor], Dict[str, Tensor]],
|
42 |
+
batch_idx: int
|
43 |
+
):
|
44 |
+
return self._step(batch, "train")
|
45 |
+
|
46 |
+
def validation_step(
|
47 |
+
self,
|
48 |
+
batch: Tuple[Dict[str, Tensor], Dict[str, Tensor]],
|
49 |
+
batch_idx: int
|
50 |
+
):
|
51 |
+
return self._step(batch, "val")
|
52 |
+
|
53 |
+
def configure_optimizers(self):
|
54 |
+
return self.agent.get_optimizers()
|
55 |
+
|
56 |
+
def predict_step(
|
57 |
+
self,
|
58 |
+
batch: Tuple[Dict[str, Tensor], Dict[str, Tensor]],
|
59 |
+
batch_idx: int
|
60 |
+
):
|
61 |
+
features, targets, tokens = batch
|
62 |
+
self.agent.eval()
|
63 |
+
with torch.no_grad():
|
64 |
+
predictions = self.agent.forward(features)
|
65 |
+
poses = predictions["trajectory"].cpu().numpy()
|
66 |
+
|
67 |
+
imis = predictions["imi"].softmax(-1).log().cpu().numpy()
|
68 |
+
nocs = predictions["noc"].log().cpu().numpy()
|
69 |
+
das = predictions["da"].log().cpu().numpy()
|
70 |
+
ttcs = predictions["ttc"].log().cpu().numpy()
|
71 |
+
comforts = predictions["comfort"].log().cpu().numpy()
|
72 |
+
progresses = predictions["progress"].log().cpu().numpy()
|
73 |
+
if poses.shape[1] == 40:
|
74 |
+
interval_length = 0.1
|
75 |
+
else:
|
76 |
+
interval_length = 0.5
|
77 |
+
|
78 |
+
return {token: {
|
79 |
+
'trajectory': Trajectory(pose, TrajectorySampling(time_horizon=4, interval_length=interval_length)),
|
80 |
+
'imi': imi,
|
81 |
+
'noc': noc,
|
82 |
+
'da': da,
|
83 |
+
'ttc': ttc,
|
84 |
+
'comfort': comfort,
|
85 |
+
'progress': progress
|
86 |
+
} for pose, imi, noc, da, ttc, comfort, progress, token in zip(poses, imis, nocs, das, ttcs, comforts, progresses,
|
87 |
+
tokens)}
|
88 |
+
# def on_after_backward(self) -> None:
|
89 |
+
# print("on_after_backward enter")
|
90 |
+
# for name, param in self.named_parameters():
|
91 |
+
# if param.grad is None:
|
92 |
+
# print(name)
|
93 |
+
# print("on_after_backward exit")
|
det_map/config/agent/det_agent.yaml
ADDED
@@ -0,0 +1,203 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_target_: det_map.det.det_agent.DetAgent
|
2 |
+
_convert_: 'all'
|
3 |
+
|
4 |
+
is_train: &is_train
|
5 |
+
is_train: True
|
6 |
+
|
7 |
+
ranges: &ranges
|
8 |
+
x_range: (-54.0, 54.0)
|
9 |
+
y_range: (-54.0, 54.0)
|
10 |
+
z_range: (-10.0, 10.0)
|
11 |
+
|
12 |
+
point_cloud_range: &point_cloud_range
|
13 |
+
point_cloud_range: [ -54.0, -54.0, -10.0, 54.0, 54.0, 10.0 ]
|
14 |
+
voxel_size: &voxel_size
|
15 |
+
voxel_size: [0.075, 0.075, 0.2]
|
16 |
+
|
17 |
+
|
18 |
+
grid_config: &grid_config
|
19 |
+
grid_config:
|
20 |
+
x: (-54.0, 54.0, 0.6)
|
21 |
+
y: (-54.0, 54.0, 0.6)
|
22 |
+
z: (-10.0, 10.0, 20.0)
|
23 |
+
depth: (1.0, 60.0, 0.5)
|
24 |
+
|
25 |
+
model:
|
26 |
+
_target_: det_map.det.dal.dal.DAL
|
27 |
+
_convert_: 'all'
|
28 |
+
use_grid_mask: true
|
29 |
+
pts_voxel_layer:
|
30 |
+
max_num_points: 10
|
31 |
+
<<: *voxel_size
|
32 |
+
<<: *point_cloud_range
|
33 |
+
max_voxels: [ 120000, 160000 ]
|
34 |
+
pts_voxel_encoder:
|
35 |
+
type: HardSimpleVFE
|
36 |
+
num_features: 5
|
37 |
+
pts_middle_encoder:
|
38 |
+
type: SparseEncoder
|
39 |
+
in_channels: 5
|
40 |
+
base_channels: 24
|
41 |
+
sparse_shape: [ 41, 1440, 1440 ]
|
42 |
+
output_channels: 192
|
43 |
+
order: [ 'conv', 'norm', 'act' ]
|
44 |
+
encoder_channels: ((24, 24, 48), (48, 48, 96), (96, 96, 192), (192, 192))
|
45 |
+
encoder_paddings: ((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0))
|
46 |
+
block_type: basicblock
|
47 |
+
pts_backbone:
|
48 |
+
type: SECOND
|
49 |
+
in_channels: 384
|
50 |
+
out_channels: [ 192, 384 ]
|
51 |
+
layer_nums: [ 8, 8 ]
|
52 |
+
layer_strides: [ 1, 2 ]
|
53 |
+
norm_cfg:
|
54 |
+
type: BN
|
55 |
+
eps: 1e-3
|
56 |
+
momentum: 0.01
|
57 |
+
conv_cfg:
|
58 |
+
type: Conv2d
|
59 |
+
bias: false
|
60 |
+
pts_neck:
|
61 |
+
type: SECONDFPN
|
62 |
+
in_channels: [ 192, 384 ]
|
63 |
+
out_channels: [ 256, 256 ]
|
64 |
+
upsample_strides: [ 1, 2 ]
|
65 |
+
norm_cfg:
|
66 |
+
type: BN
|
67 |
+
eps: 1e-3
|
68 |
+
momentum: 0.01
|
69 |
+
upsample_cfg:
|
70 |
+
type: deconv
|
71 |
+
bias: false
|
72 |
+
use_conv_for_no_stride: true
|
73 |
+
img_backbone:
|
74 |
+
pretrained: 'torchvision://resnet18'
|
75 |
+
type: ResNet
|
76 |
+
depth: 18
|
77 |
+
num_stages: 4
|
78 |
+
out_indices: [ 1, 2, 3 ]
|
79 |
+
frozen_stages: -1
|
80 |
+
norm_cfg:
|
81 |
+
type: BN
|
82 |
+
requires_grad: true
|
83 |
+
norm_eval: false
|
84 |
+
with_cp: false
|
85 |
+
style: pytorch
|
86 |
+
img_neck:
|
87 |
+
type: CustomFPN
|
88 |
+
in_channels: [ 128, 256, 512 ]
|
89 |
+
out_channels: img_feat_dim
|
90 |
+
num_outs: 1
|
91 |
+
start_level: 0
|
92 |
+
out_ids: [ 0 ]
|
93 |
+
img_view_transformer:
|
94 |
+
type: LSSViewTransformer
|
95 |
+
<<: *grid_config
|
96 |
+
input_size: data_config['input_size']
|
97 |
+
in_channels: img_feat_dim
|
98 |
+
out_channels: feat_bev_img_dim
|
99 |
+
downsample: 8
|
100 |
+
with_depth_from_lidar: true
|
101 |
+
pts_bbox_head:
|
102 |
+
type: DALHead
|
103 |
+
feat_bev_img_dim: feat_bev_img_dim
|
104 |
+
img_feat_dim: img_feat_dim
|
105 |
+
sparse_fuse_layers: 2
|
106 |
+
dense_fuse_layers: 2
|
107 |
+
instance_attn: false
|
108 |
+
num_proposals: 200
|
109 |
+
in_channels: 512
|
110 |
+
hidden_channel: 128
|
111 |
+
num_classes: 10
|
112 |
+
num_decoder_layers: 1
|
113 |
+
num_heads: 8
|
114 |
+
nms_kernel_size: 3
|
115 |
+
ffn_channel: 256
|
116 |
+
dropout: 0.1
|
117 |
+
bn_momentum: 0.1
|
118 |
+
activation: relu
|
119 |
+
auxiliary: true
|
120 |
+
common_heads:
|
121 |
+
center: [ 2, 2 ]
|
122 |
+
height: [ 1, 2 ]
|
123 |
+
dim: [ 3, 2 ]
|
124 |
+
rot: [ 2, 2 ]
|
125 |
+
vel: [ 2, 2 ]
|
126 |
+
bbox_coder:
|
127 |
+
type: TransFusionBBoxCoder
|
128 |
+
pc_range: point_cloud_range[:2]
|
129 |
+
post_center_range: [ -61.2, -61.2, -10.0, 61.2, 61.2, 10.0 ]
|
130 |
+
score_threshold: 0.0
|
131 |
+
out_size_factor: 8
|
132 |
+
voxel_size: voxel_size[:2]
|
133 |
+
code_size: 10
|
134 |
+
loss_cls:
|
135 |
+
type: FocalLoss
|
136 |
+
use_sigmoid: true
|
137 |
+
gamma: 2.0
|
138 |
+
alpha: 0.25
|
139 |
+
reduction: mean
|
140 |
+
loss_weight: 1.0
|
141 |
+
loss_heatmap:
|
142 |
+
type: GaussianFocalLoss
|
143 |
+
reduction: mean
|
144 |
+
|
145 |
+
pipelines:
|
146 |
+
lidar_filter:
|
147 |
+
_target_: det_map.data.pipelines.filter_lidar.LiDARFilter
|
148 |
+
_convert_: 'all'
|
149 |
+
close_radius: 1.0
|
150 |
+
<<: *ranges
|
151 |
+
|
152 |
+
# only include in training
|
153 |
+
point_shuffle:
|
154 |
+
_target_: det_map.data.pipelines.point_shuffle.PointShuffle
|
155 |
+
<<: *is_train
|
156 |
+
|
157 |
+
lidar_aug:
|
158 |
+
_target_: det_map.data.pipelines.lidar_aug.LiDARAug
|
159 |
+
bda_aug_conf:
|
160 |
+
rot_lim: (-22.5 * 2, 22.5 * 2)
|
161 |
+
scale_lim: (0.9, 1.1)
|
162 |
+
flip_dx_ratio: 0.5
|
163 |
+
flip_dy_ratio: 0.5
|
164 |
+
tran_lim: (0.5, 0.5, 0.5)
|
165 |
+
<<: *ranges
|
166 |
+
# if no aug for map, set this is_train to False
|
167 |
+
<<: *is_train
|
168 |
+
|
169 |
+
depth:
|
170 |
+
_target_: det_map.data.pipelines.prepare_depth.LiDAR2Depth
|
171 |
+
<<: *grid_config
|
172 |
+
|
173 |
+
img:
|
174 |
+
_target_: det_map.data.pipelines.prepare_img.PrepareImageInputs
|
175 |
+
_convert_: 'all'
|
176 |
+
opencv_pp: True
|
177 |
+
# Flag should be False in Eval!!!!
|
178 |
+
<<: *is_train
|
179 |
+
data_config:
|
180 |
+
input_size: (256, 704)
|
181 |
+
src_size: (900, 1600)
|
182 |
+
# Augmentation
|
183 |
+
resize: (-0.06, 0.44)
|
184 |
+
rot: (-5.4, 5.4)
|
185 |
+
flip: True
|
186 |
+
crop_h: (0.0, 0.0)
|
187 |
+
random_crop_height: True
|
188 |
+
vflip: True
|
189 |
+
resize_test: 0.04
|
190 |
+
pmd:
|
191 |
+
brightness_delta: 32
|
192 |
+
contrast_lower: 0.5
|
193 |
+
contrast_upper: 1.5
|
194 |
+
saturation_lower: 0.5
|
195 |
+
saturation_upper: 1.5
|
196 |
+
hue_delta: 18
|
197 |
+
rate: 0.5
|
198 |
+
|
199 |
+
|
200 |
+
<<: *is_train
|
201 |
+
checkpoint_path: null
|
202 |
+
hidden_layer_dim: 512
|
203 |
+
lr: 1e-4
|
det_map/config/agent/map_agent.yaml
ADDED
@@ -0,0 +1,320 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_target_: det_map.map.map_agent.MapAgent
|
2 |
+
_convert_: 'all'
|
3 |
+
|
4 |
+
|
5 |
+
is_train: &is_train
|
6 |
+
is_train: True
|
7 |
+
|
8 |
+
point_cloud_range: &point_cloud_range
|
9 |
+
pc_range: [ -15.0, -30.0, -10.0, 15.0, 30.0, 10.0 ]
|
10 |
+
|
11 |
+
lidar_filter_ranges: &lidar_filter_ranges
|
12 |
+
x_range: (-15.0, 15.0)
|
13 |
+
y_range: (-30.0, 30.0)
|
14 |
+
z_range: (-10.0, 10.0)
|
15 |
+
|
16 |
+
voxel_size: &voxel_size
|
17 |
+
voxel_size: [0.075, 0.075, 20.0]
|
18 |
+
|
19 |
+
img_voxel_size: &img_voxel_size
|
20 |
+
voxel_size: [0.3, 0.3, 20.0]
|
21 |
+
|
22 |
+
|
23 |
+
dbound: &dbound
|
24 |
+
dbound: [1.0, 35.0, 0.5]
|
25 |
+
|
26 |
+
grid_config: &grid_config
|
27 |
+
grid_config:
|
28 |
+
x: (-15.0, 15.0, 0.6)
|
29 |
+
y: (-30.0, 30.0, 0.6)
|
30 |
+
z: (-10.0, 10.0, 20.0)
|
31 |
+
depth: (1.0, 60.0, 0.5)
|
32 |
+
|
33 |
+
img_norm_cfg : &img_norm_cfg
|
34 |
+
img_norm_cfg:
|
35 |
+
mean: [123.675, 116.28, 103.53]
|
36 |
+
std: [58.395, 57.12, 57.375]
|
37 |
+
to_rgb: True
|
38 |
+
|
39 |
+
map_classes: &map_classes
|
40 |
+
map_classes: ['divider', 'ped_crossing','boundary', 'centerline']
|
41 |
+
|
42 |
+
#fixed_ptsnum_per_gt_line: &fixed_ptsnum_per_gt_line
|
43 |
+
# fixed_ptsnum_per_gt_line: 20
|
44 |
+
|
45 |
+
#fixed_ptsnum_per_pred_line: &fixed_ptsnum_per_pred_line
|
46 |
+
# fixed_ptsnum_per_pred_line: 20
|
47 |
+
|
48 |
+
eval_use_same_gt_sample_num_flag: &eval_use_same_gt_sample_num_flag
|
49 |
+
eval_use_same_gt_sample_num_flag: True
|
50 |
+
|
51 |
+
|
52 |
+
|
53 |
+
#_pos_dim_: &_pos_dim_
|
54 |
+
# _pos_dim_: 128
|
55 |
+
|
56 |
+
#_ffn_dim_: &_ffn_dim_
|
57 |
+
# _ffn_dim_: 512
|
58 |
+
|
59 |
+
#_num_levels_: &_num_levels_
|
60 |
+
# _num_levels_: 1
|
61 |
+
|
62 |
+
#bev_h_: &bev_h_
|
63 |
+
# bev_h_: 100
|
64 |
+
|
65 |
+
#bev_w_: &bev_w_
|
66 |
+
# bev_w_: 200
|
67 |
+
|
68 |
+
#queue_length: &queue_length
|
69 |
+
# queue_length: 1
|
70 |
+
|
71 |
+
aux_seg : &aux_seg_cfg
|
72 |
+
aux_seg:
|
73 |
+
use_aux_seg: False
|
74 |
+
bev_seg: False
|
75 |
+
pv_seg: False
|
76 |
+
seg_classes: 1
|
77 |
+
feat_down_sample: 32
|
78 |
+
pv_thickness: 1
|
79 |
+
|
80 |
+
#z_cfg : &z_cfg
|
81 |
+
#
|
82 |
+
# pred_z_flag: True
|
83 |
+
# gt_z_flag: True
|
84 |
+
|
85 |
+
model:
|
86 |
+
_target_: det_map.map.map_model.MapModel
|
87 |
+
_convert_: 'all'
|
88 |
+
use_grid_mask: True
|
89 |
+
video_test_mode: False
|
90 |
+
pretrained:
|
91 |
+
img: ckpts/resnet50-19c8e357.pth
|
92 |
+
|
93 |
+
img_backbone:
|
94 |
+
type: ResNet
|
95 |
+
depth: 50
|
96 |
+
num_stages: 4
|
97 |
+
out_indices: [3]
|
98 |
+
frozen_stages: 1
|
99 |
+
norm_cfg:
|
100 |
+
type: BN
|
101 |
+
requires_grad: False
|
102 |
+
norm_eval: True
|
103 |
+
style: pytorch
|
104 |
+
img_neck:
|
105 |
+
type: FPN
|
106 |
+
in_channels: [2048]
|
107 |
+
out_channels: 256
|
108 |
+
start_level: 0
|
109 |
+
add_extra_convs: on_output
|
110 |
+
num_outs: 1
|
111 |
+
relu_before_extra_convs: True
|
112 |
+
pts_bbox_head:
|
113 |
+
type: MapTRv2Head
|
114 |
+
<<: *point_cloud_range
|
115 |
+
bev_h: 100
|
116 |
+
bev_w: 50
|
117 |
+
num_query: 900
|
118 |
+
num_vec_one2one: 20
|
119 |
+
num_vec_one2many: 300
|
120 |
+
k_one2many: 6
|
121 |
+
num_pts_per_vec: 20
|
122 |
+
num_pts_per_gt_vec: 20
|
123 |
+
dir_interval: 1
|
124 |
+
query_embed_type: 'instance_pts'
|
125 |
+
transform_method: 'minmax'
|
126 |
+
gt_shift_pts_pattern: 'v2'
|
127 |
+
num_classes: 2
|
128 |
+
in_channels: 256
|
129 |
+
sync_cls_avg_factor: True
|
130 |
+
with_box_refine: True
|
131 |
+
as_two_stage: False
|
132 |
+
code_size: 2
|
133 |
+
code_weights: None
|
134 |
+
<<: *aux_seg_cfg
|
135 |
+
# z_cfg: *z_cfg
|
136 |
+
transformer:
|
137 |
+
type: MapTRPerceptionTransformer
|
138 |
+
bev_h: 100
|
139 |
+
bev_w: 50
|
140 |
+
# fuser:
|
141 |
+
# type: 'ConvFuser'
|
142 |
+
# in_channels: [256, 256]
|
143 |
+
# out_channels: 256
|
144 |
+
num_cams: 2
|
145 |
+
# z_cfg: *z_cfg
|
146 |
+
rotate_prev_bev: False
|
147 |
+
use_shift: True
|
148 |
+
use_can_bus: False
|
149 |
+
embed_dims: 256
|
150 |
+
encoder:
|
151 |
+
type: 'SpatialDecoder'
|
152 |
+
num_layers: 1
|
153 |
+
<<: *point_cloud_range
|
154 |
+
grid_config:
|
155 |
+
x: [-15.0, 15.0, 0.6]
|
156 |
+
y: [-30.0, 30.0, 0.6]
|
157 |
+
z: [ -10.0, 10.0, 20.0 ]
|
158 |
+
data_config:
|
159 |
+
input_size: [256, 704]
|
160 |
+
transformerlayers:
|
161 |
+
type: 'SpatialDecoderLayer'
|
162 |
+
attn_cfgs:
|
163 |
+
- type: 'SpatialCrossAttention'
|
164 |
+
<<: *point_cloud_range
|
165 |
+
num_cams: 2
|
166 |
+
dropout: 0.0
|
167 |
+
embed_dims: 256
|
168 |
+
deformable_attention:
|
169 |
+
type: 'MSDeformableAttention'
|
170 |
+
embed_dims: 256
|
171 |
+
num_points: 8
|
172 |
+
num_levels: 1
|
173 |
+
ffn_cfgs:
|
174 |
+
type: 'FFN'
|
175 |
+
embed_dims: 256
|
176 |
+
feedforward_channels: 1024
|
177 |
+
ffn_drop: 0.0
|
178 |
+
act_cfg:
|
179 |
+
type: 'ReLU'
|
180 |
+
inplace: True
|
181 |
+
feedforward_channels: 1024
|
182 |
+
ffn_dropout: 0.0
|
183 |
+
operation_order: ['cross_attn', 'norm' ,'ffn', 'norm']
|
184 |
+
decoder:
|
185 |
+
type: MapTRDecoder
|
186 |
+
num_layers: 6
|
187 |
+
return_intermediate: True
|
188 |
+
transformerlayers:
|
189 |
+
type: DecoupledDetrTransformerDecoderLayer
|
190 |
+
num_vec: 20
|
191 |
+
num_pts_per_vec: 20
|
192 |
+
attn_cfgs:
|
193 |
+
- type: MultiheadAttention
|
194 |
+
embed_dims: 256
|
195 |
+
num_heads: 8
|
196 |
+
dropout: 0.1
|
197 |
+
- type: MultiheadAttention
|
198 |
+
embed_dims: 256
|
199 |
+
num_heads: 8
|
200 |
+
dropout: 0.1
|
201 |
+
- type: CustomMSDeformableAttention
|
202 |
+
embed_dims: 256
|
203 |
+
num_levels: 1
|
204 |
+
feedforward_channels: 512
|
205 |
+
ffn_dropout: 0.1
|
206 |
+
operation_order: ['self_attn', 'norm', 'self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm']
|
207 |
+
|
208 |
+
positional_encoding:
|
209 |
+
type: LearnedPositionalEncoding
|
210 |
+
num_feats: 128
|
211 |
+
row_num_embed: 100
|
212 |
+
col_num_embed: 50
|
213 |
+
loss_cls:
|
214 |
+
type: FocalLoss
|
215 |
+
use_sigmoid: True
|
216 |
+
gamma: 2.0
|
217 |
+
alpha: 0.25
|
218 |
+
loss_weight: 2.0
|
219 |
+
loss_bbox:
|
220 |
+
type: L1Loss
|
221 |
+
loss_weight: 0.0
|
222 |
+
loss_iou:
|
223 |
+
type: GIoULoss
|
224 |
+
loss_weight: 0.0
|
225 |
+
loss_pts:
|
226 |
+
type: PtsL1Loss
|
227 |
+
loss_weight: 5.0
|
228 |
+
loss_dir:
|
229 |
+
type: PtsDirCosLoss
|
230 |
+
loss_weight: 0.005
|
231 |
+
loss_seg:
|
232 |
+
type: SimpleLoss
|
233 |
+
pos_weight: 4.0
|
234 |
+
loss_weight: 1.0
|
235 |
+
loss_pv_seg:
|
236 |
+
type: SimpleLoss
|
237 |
+
pos_weight: 1.0
|
238 |
+
loss_weight: 2.0
|
239 |
+
# train_cfg:
|
240 |
+
# pts:
|
241 |
+
# grid_size: [512, 512, 1]
|
242 |
+
# <<: *voxel_size
|
243 |
+
# point_cloud_range: [ -15.0, -30.0, -10.0, 15.0, 30.0, 10.0 ]
|
244 |
+
# out_size_factor: 4
|
245 |
+
# assigner:
|
246 |
+
# type: MapTRAssigner
|
247 |
+
# cls_cost:
|
248 |
+
# type: FocalLossCost
|
249 |
+
# weight: 2.0
|
250 |
+
# reg_cost:
|
251 |
+
# type: BBoxL1Cost
|
252 |
+
# weight: 0.0
|
253 |
+
# box_format: 'xywh'
|
254 |
+
# iou_cost:
|
255 |
+
# type: IoUCost
|
256 |
+
# iou_mode: 'giou'
|
257 |
+
# weight: 0.0
|
258 |
+
# pts_cost:
|
259 |
+
# type: OrderedPtsL1Cost
|
260 |
+
# weight: 5
|
261 |
+
# pc_range: [ -15.0, -30.0, -10.0, 15.0, 30.0, 10.0 ]
|
262 |
+
|
263 |
+
pipelines:
|
264 |
+
lidar_filter:
|
265 |
+
_target_: det_map.data.pipelines.filter_lidar.LiDARFilter
|
266 |
+
_convert_: 'all'
|
267 |
+
close_radius: 1.0
|
268 |
+
<<: *lidar_filter_ranges
|
269 |
+
|
270 |
+
# only include in training
|
271 |
+
point_shuffle:
|
272 |
+
_target_: det_map.data.pipelines.point_shuffle.PointShuffle
|
273 |
+
<<: *is_train
|
274 |
+
|
275 |
+
lidar_aug:
|
276 |
+
_target_: det_map.data.pipelines.lidar_aug.LiDARAug
|
277 |
+
bda_aug_conf:
|
278 |
+
rot_lim: (-22.5 * 2, 22.5 * 2)
|
279 |
+
scale_lim: (0.9, 1.1)
|
280 |
+
flip_dx_ratio: 0.5
|
281 |
+
flip_dy_ratio: 0.5
|
282 |
+
tran_lim: (0.5, 0.5, 0.5)
|
283 |
+
<<: *lidar_filter_ranges
|
284 |
+
# if no aug for map, set this is_train to False
|
285 |
+
<<: *is_train
|
286 |
+
|
287 |
+
depth:
|
288 |
+
_target_: det_map.data.pipelines.prepare_depth.LiDAR2Depth
|
289 |
+
<<: *grid_config
|
290 |
+
|
291 |
+
img:
|
292 |
+
_target_: det_map.data.pipelines.prepare_img.PrepareImageInputs
|
293 |
+
_convert_: 'all'
|
294 |
+
opencv_pp: True
|
295 |
+
# Flag should be False in Eval!!!!
|
296 |
+
<<: *is_train
|
297 |
+
data_config:
|
298 |
+
input_size: (256, 704)
|
299 |
+
src_size: (900, 1600)
|
300 |
+
# Augmentation
|
301 |
+
resize: (-0.06, 0.44)
|
302 |
+
rot: (-5.4, 5.4)
|
303 |
+
flip: True
|
304 |
+
crop_h: (0.0, 0.0)
|
305 |
+
random_crop_height: True
|
306 |
+
vflip: True
|
307 |
+
resize_test: 0.04
|
308 |
+
pmd:
|
309 |
+
brightness_delta: 32
|
310 |
+
contrast_lower: 0.5
|
311 |
+
contrast_upper: 1.5
|
312 |
+
saturation_lower: 0.5
|
313 |
+
saturation_upper: 1.5
|
314 |
+
hue_delta: 18
|
315 |
+
rate: 0.5
|
316 |
+
|
317 |
+
#<<: *is_train
|
318 |
+
checkpoint_path: null
|
319 |
+
hidden_layer_dim: 512
|
320 |
+
lr: 1e-4
|
det_map/config/defaults/default_common.yaml
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Default common configs
|
2 |
+
|
3 |
+
defaults:
|
4 |
+
# Worker that is used to run simulations
|
5 |
+
# - ray_distributed_no_torch
|
6 |
+
- ray_distributed_no_torch
|
7 |
+
|
8 |
+
split: ???
|
9 |
+
|
10 |
+
distributed_timeout_seconds: 7200 # Sets how long to wait while synchronizing across worker nodes in a distributed context.
|
11 |
+
|
12 |
+
selected_simulation_metrics: null
|
13 |
+
|
14 |
+
# Sets verbosity level, in particular determines if progress bars are shown or not.
|
15 |
+
verbose: false
|
16 |
+
|
17 |
+
# Logger
|
18 |
+
logger_level: info # Level of logger
|
19 |
+
logger_format_string: null # Logger format string, set null to use the default format string
|
20 |
+
|
21 |
+
# Execution
|
22 |
+
max_number_of_workers: null # Set null to disable threading for simulation execution
|
23 |
+
gpu: true # Whether to use available GPUs during training/simulation
|
det_map/config/defaults/default_evaluation.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Cache parameters
|
2 |
+
experiment_name: ???
|
3 |
+
navsim_log_path: ${oc.env:OPENSCENE_DATA_ROOT}/navsim_logs/${split} # path to log annotations
|
4 |
+
sensor_blobs_path: ${oc.env:OPENSCENE_DATA_ROOT}/sensor_blobs/${split} # path to sensor blobs
|
5 |
+
date_format: '%Y.%m.%d.%H.%M.%S'
|
6 |
+
experiment_uid: ${now:${date_format}}
|
7 |
+
output_dir: ${oc.env:NAVSIM_EXP_ROOT}/${experiment_name}/${experiment_uid} # path where output csv is saved
|
det_map/config/defaults/ray_distributed_no_torch.yaml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_target_: navsim.planning.utils.multithreading.worker_ray_no_torch.RayDistributedNoTorch
|
2 |
+
_convert_: 'all'
|
3 |
+
master_node_ip: null # Set to a master node IP if you desire to connect to cluster remotely
|
4 |
+
threads_per_node: null # Number of CPU threads to use per node, "null" means all threads available
|
5 |
+
debug_mode: false # If true all tasks will be executed serially, mainly for testing
|
6 |
+
log_to_driver: true # If true, all printouts from ray threads will be displayed in driver
|
7 |
+
logs_subdir: 'logs' # Subdirectory to store logs inside the experiment directory
|
8 |
+
use_distributed: false # Whether to use the built-in distributed mode of ray
|
det_map/config/scene_filter/det_all_scenes.yaml
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_target_: det_map.data.datasets.dataloader.SceneFilter
|
2 |
+
_convert_: 'all'
|
3 |
+
|
4 |
+
num_history_frames: 4
|
5 |
+
num_future_frames: 10
|
6 |
+
# map has_route可能要设成 True
|
7 |
+
has_route: False
|
8 |
+
|
9 |
+
max_scenes: Null
|
10 |
+
log_names: Null
|
11 |
+
|
12 |
+
tokens: Null
|
det_map/config/scene_filter/navtiny.yaml
ADDED
@@ -0,0 +1,265 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_target_: navsim.common.dataclasses.SceneFilter
|
2 |
+
_convert_: 'all'
|
3 |
+
num_history_frames: 4
|
4 |
+
num_future_frames: 10
|
5 |
+
frame_interval: 1
|
6 |
+
has_route: true
|
7 |
+
max_scenes: null
|
8 |
+
|
9 |
+
log_names: null # list of log names to extract scenes from, if null, all logs are extracted
|
10 |
+
tokens:
|
11 |
+
- 'ed4ac2dad0fa584b'
|
12 |
+
- '2111b648fcba5bb7'
|
13 |
+
- '1fc1dd0dc3d157ae'
|
14 |
+
- '76a69c9e9e375670'
|
15 |
+
- '4d3a4cbc9efb5337'
|
16 |
+
- '06df05f607855dbf'
|
17 |
+
- 'c3856d49ecf453f0'
|
18 |
+
- '09d3f08395e05d1c'
|
19 |
+
- '0593ddf8a1bb5a57'
|
20 |
+
- 'c0b386ab15db56f9'
|
21 |
+
- '0ef0f369529e54a9'
|
22 |
+
- 'c754b1af814a5f23'
|
23 |
+
- 'b214f8e744075e96'
|
24 |
+
- '5cbacc029a9f5cb3'
|
25 |
+
- 'cb46ac2ddfdf506e'
|
26 |
+
- '108d77bad2275975'
|
27 |
+
- '3978246a10a25ab0'
|
28 |
+
- '41bb74b4738f5a8b'
|
29 |
+
- '3a8375c20b615fce'
|
30 |
+
- '82dc3fff070b5f80'
|
31 |
+
- '8bfb2d59b82057e6'
|
32 |
+
- 'e36d3626a55e54f9'
|
33 |
+
- '5b1c0e44a5505c06'
|
34 |
+
- '78e6ea95b854551c'
|
35 |
+
- '76af8c24431855c3'
|
36 |
+
- '1a84e817c1875ec6'
|
37 |
+
- 'e7ea3ed9a30e5444'
|
38 |
+
- '8c837572950a5ac0'
|
39 |
+
- 'c18f8cfc41385d8c'
|
40 |
+
- '11aa12f4e5715b08'
|
41 |
+
- '702bdcfabe0755fe'
|
42 |
+
- 'c11854507e515b05'
|
43 |
+
- '828f0769bf365504'
|
44 |
+
- '1d2d2ddbbd5450a4'
|
45 |
+
- '640423c4ff21538a'
|
46 |
+
- '93fa463a455857f6'
|
47 |
+
- '79214a9a65225eda'
|
48 |
+
- 'cd9d78a1011c555f'
|
49 |
+
- '2a3f7fbaa10b5627'
|
50 |
+
- '5abf2148971855ad'
|
51 |
+
- 'd9200709d73756c3'
|
52 |
+
- 'cf94200201a75af8'
|
53 |
+
- 'c97bad66929c58d1'
|
54 |
+
- 'e45b782c83a550c1'
|
55 |
+
- 'e869951de22f5ecc'
|
56 |
+
- '9610b02bc4ec529c'
|
57 |
+
- '70ed6ff1471f5d74'
|
58 |
+
- 'f8a971a1e94553ce'
|
59 |
+
- '91e77e1873d75afe'
|
60 |
+
- 'dc86b9a3e2e05466'
|
61 |
+
- 'a3efdab7285751a6'
|
62 |
+
- 'ecca4f25f1cd5a85'
|
63 |
+
- '3c09e960d73758eb'
|
64 |
+
- '58fb7f78e39451bc'
|
65 |
+
- '0ce0aa336fe751a4'
|
66 |
+
- '759d96676b965349'
|
67 |
+
- 'e3b1564e52cd52db'
|
68 |
+
- '48333fc684d454a2'
|
69 |
+
- '62cae48b4e445254'
|
70 |
+
- 'e97256ddafa85705'
|
71 |
+
- '568aee30ea2655e2'
|
72 |
+
- '2b8645e05e8854f0'
|
73 |
+
- '1ce8022305ba565c'
|
74 |
+
- 'fd3f8f3310255030'
|
75 |
+
- 'f0b74302312b5241'
|
76 |
+
- 'd74e1e5648e35864'
|
77 |
+
- '5bff4e6fa9c95deb'
|
78 |
+
- '97d3764b7be652cf'
|
79 |
+
- 'de681a4826e35220'
|
80 |
+
- 'be2540e76b10519d'
|
81 |
+
- 'c7e91cc3157b5937'
|
82 |
+
- '12a68a4c440c5396'
|
83 |
+
- 'ac0c803827d65b80'
|
84 |
+
- 'c18771a3868f5868'
|
85 |
+
- 'a6340d3e28b95701'
|
86 |
+
- '24fff541744b573f'
|
87 |
+
- 'e7165cb777e65dac'
|
88 |
+
- '7c1553e7080b5a70'
|
89 |
+
- '6dffb4d149eb5089'
|
90 |
+
- '0773a8971c5e5e5a'
|
91 |
+
- '72dac45a812f56fb'
|
92 |
+
- '75c16dc4849b5726'
|
93 |
+
- '523eab76cc4653bd'
|
94 |
+
- 'f246f785c3455caa'
|
95 |
+
- 'baf59d54fb78575a'
|
96 |
+
- 'b29743e5885f5514'
|
97 |
+
- 'd213c35fc6055569'
|
98 |
+
- '3ba8190534b1554c'
|
99 |
+
- '26e297939af25760'
|
100 |
+
- 'da643d2d70785c76'
|
101 |
+
- '2137a540b5f05b48'
|
102 |
+
- 'ed795a36682f5728'
|
103 |
+
- '000afad751a95adb'
|
104 |
+
- '7543fb2f2dcf5c7e'
|
105 |
+
- '9b5c00687d4e590b'
|
106 |
+
- '16d0a19acfcd5668'
|
107 |
+
- 'd91da3c6f79b53f6'
|
108 |
+
- '154694dd0f6c565c'
|
109 |
+
- '9b4b3a0261595a47'
|
110 |
+
- '0df3061f21f4502a'
|
111 |
+
- '7e0b549208c75322'
|
112 |
+
- '74678e95029e52a2'
|
113 |
+
- '49196fecbe9a549f'
|
114 |
+
- '0decaed8d0f45b26'
|
115 |
+
- 'b3671d0ef61e5391'
|
116 |
+
- '7b990d22090f5a21'
|
117 |
+
- '4fea3406427a52de'
|
118 |
+
- 'e7ac9da207d05a7f'
|
119 |
+
- '69b772bf2aa15e8b'
|
120 |
+
- '09300186157e51e9'
|
121 |
+
- 'c61c26797b2d52f8'
|
122 |
+
- 'eac8efd956975d88'
|
123 |
+
- 'ad0ca9004c1e56c6'
|
124 |
+
- '9c48c3a7714e5850'
|
125 |
+
- '1bac9ad3b5795fb9'
|
126 |
+
- '5dad11490b425565'
|
127 |
+
- '1f6cea56be625f10'
|
128 |
+
- 'f2fa70a966055b14'
|
129 |
+
- '68520950dcca56d2'
|
130 |
+
- 'e905af2fb80f5802'
|
131 |
+
- 'e5445523551c573a'
|
132 |
+
- '5a3b197e54495443'
|
133 |
+
- '35d813d8de5854f9'
|
134 |
+
- '25e0169687d659c0'
|
135 |
+
- '88f7863088bc593e'
|
136 |
+
- '06767022b8445e7f'
|
137 |
+
- '4fcdad926f4a5568'
|
138 |
+
- '8f5b483a5dd956d3'
|
139 |
+
- 'a64cd79798845d53'
|
140 |
+
- 'de864917fc075773'
|
141 |
+
- '50418b03a9345e7f'
|
142 |
+
- 'e991b5b1ef9d5fcd'
|
143 |
+
- 'ea75df402b6a5d37'
|
144 |
+
- '17b4e23eb78b547b'
|
145 |
+
- '79388c5790cf5b02'
|
146 |
+
- '7b9cc1b02566583e'
|
147 |
+
- 'a8b415f811cb5bfa'
|
148 |
+
- 'f4e49919c3d35a1a'
|
149 |
+
- '79ca73b34554570a'
|
150 |
+
- 'f9902a62c80c511a'
|
151 |
+
- '71057951bf9a5e81'
|
152 |
+
- '411cc15794895e1e'
|
153 |
+
- '7c4fca218b0854d7'
|
154 |
+
- '8498fd37028051b7'
|
155 |
+
- '27decc74a57b53ac'
|
156 |
+
- '50480a33ca215770'
|
157 |
+
- '47f300be059c5734'
|
158 |
+
- '70f2ea8358ed55f1'
|
159 |
+
- '471f7ca3148659cd'
|
160 |
+
- '4800f9f234c050fa'
|
161 |
+
- '64c71ae3532a5efb'
|
162 |
+
- '5e8f9f6ab5695769'
|
163 |
+
- '2d9168675ce355a2'
|
164 |
+
- '3c077c8da4615b33'
|
165 |
+
- 'c7e8c07beb135247'
|
166 |
+
- '2f8055010b905651'
|
167 |
+
- '340d245e2ee854fe'
|
168 |
+
- '70df39aae7b05204'
|
169 |
+
- '388782e615ec5bba'
|
170 |
+
- '7cb3886f8bb557d3'
|
171 |
+
- 'b37a0e95ac4055ba'
|
172 |
+
- '8be138812f1459d2'
|
173 |
+
- '3ff2c6494d63527b'
|
174 |
+
- '05fab28931d55ff9'
|
175 |
+
- '333189d65a42540d'
|
176 |
+
- '73bb3d277424505f'
|
177 |
+
- 'cbe6088df42d55dc'
|
178 |
+
- 'aa784b6564cb56a3'
|
179 |
+
- 'cd30af3a16945a92'
|
180 |
+
- 'c3a15b9f7dd55cce'
|
181 |
+
- '44b6e898e157569a'
|
182 |
+
- '4e4062c303565251'
|
183 |
+
- 'd74f9dfdb4125eaf'
|
184 |
+
- 'c0365ee92dec511d'
|
185 |
+
- '4e98aff61c5e57b1'
|
186 |
+
- '7200dcdd4ad05210'
|
187 |
+
- 'c8124080125a5278'
|
188 |
+
- '1586145ff7ae5b89'
|
189 |
+
- '6b7f1a53f7d3524c'
|
190 |
+
- '3bf37bad40c55175'
|
191 |
+
- 'bdde0c029ec25326'
|
192 |
+
- 'cd0a777bac035272'
|
193 |
+
- '67b76696aa305cdc'
|
194 |
+
- '614111a5d6045ae7'
|
195 |
+
- 'f383acca25ff59eb'
|
196 |
+
- 'cea15449dc0356bd'
|
197 |
+
- 'b80387b22e0c55b5'
|
198 |
+
- '065a0963a4125096'
|
199 |
+
- 'c9e06d789998518d'
|
200 |
+
- '4615024da7765d62'
|
201 |
+
- 'ef336e8b83245733'
|
202 |
+
- 'be4ec4d7ce745612'
|
203 |
+
- '5169ec4362225b58'
|
204 |
+
- 'c6f905906f9654a2'
|
205 |
+
- '194216a5f85d592d'
|
206 |
+
- '6529aed422f35336'
|
207 |
+
- '497ac853176d59b6'
|
208 |
+
- 'f280ba623a7f5321'
|
209 |
+
- 'b5fe876937af504a'
|
210 |
+
- 'c6b62c299ccc5274'
|
211 |
+
- 'dcb2a35ae605510a'
|
212 |
+
- 'd1c281e277d1532d'
|
213 |
+
- '8f3366be46c05d5f'
|
214 |
+
- 'af9f5f6fa1ad5182'
|
215 |
+
- '5054593a6d795256'
|
216 |
+
- '159b9b7451195c9c'
|
217 |
+
- '7687f25bf8845686'
|
218 |
+
- '560f3ccbaa5b53ef'
|
219 |
+
- 'e5a146299341551a'
|
220 |
+
- 'b794c616319352c3'
|
221 |
+
- 'fb68b32ec8a251da'
|
222 |
+
- '9fce6f03ef0351b0'
|
223 |
+
- '046fd63cb514581a'
|
224 |
+
- '0ce82a1caffc56af'
|
225 |
+
- '7cc94c33bbe052d7'
|
226 |
+
- 'b5126e9ddea25889'
|
227 |
+
- 'c123273de19d5c2f'
|
228 |
+
- 'df570b3785a95295'
|
229 |
+
- 'a5efa651fec451b5'
|
230 |
+
- '216f7065c13c5ec9'
|
231 |
+
- '4754eb209bc452e4'
|
232 |
+
- 'ce28728cdb6f50c9'
|
233 |
+
- '33461776a24d554f'
|
234 |
+
- '0920187661745605'
|
235 |
+
- '0633cb3809935cb7'
|
236 |
+
- 'f3e9317326955421'
|
237 |
+
- '1c371291fdc1551a'
|
238 |
+
- '37185bcf00de5be6'
|
239 |
+
- '224510571ce95a3f'
|
240 |
+
- 'e38a6e1fd4c55393'
|
241 |
+
- '3a0b00f0840658e5'
|
242 |
+
- '0d6abcbad24652c0'
|
243 |
+
- '4789245424875682'
|
244 |
+
- 'fba38dd9492a5341'
|
245 |
+
- 'b649dcb158a75dcd'
|
246 |
+
- '1a5182ccbf1b5955'
|
247 |
+
- '1ac622ff2d2e5210'
|
248 |
+
- 'f63cff56784d5cb9'
|
249 |
+
- '0ea876c450bb5aa6'
|
250 |
+
- '6fc06c6e4d1752a1'
|
251 |
+
- '88396ca47dcf5361'
|
252 |
+
- '7e1f829a0de95258'
|
253 |
+
- '5f9a9890f1a75602'
|
254 |
+
- '5a60c57493885588'
|
255 |
+
- '67be2615438d55fb'
|
256 |
+
- 'bda2fb6ea7735b5a'
|
257 |
+
- '55aa596e131d5734'
|
258 |
+
- 'd1a786625a885023'
|
259 |
+
- '8ec0cd02d7705766'
|
260 |
+
- 'e378bb756641598d'
|
261 |
+
- 'c853ae7a361f54d9'
|
262 |
+
- 'b1db6a099fea55f5'
|
263 |
+
- 'ca8bc031163a5765'
|
264 |
+
- 'eee8261221df5048'
|
265 |
+
- 'b33131090ada5f2d'
|
det_map/config/splits/default_train_val_test_log_split.yaml
ADDED
The diff for this file is too large to render.
See raw diff
|
|
det_map/config/train_det.yaml
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
hydra:
|
2 |
+
run:
|
3 |
+
dir: ${output_dir}
|
4 |
+
output_subdir: ${output_dir}/code/hydra # Store hydra's config breakdown here for debugging
|
5 |
+
searchpath: # Only <exp_dir> in these paths are discoverable
|
6 |
+
- det_map/config/defaults
|
7 |
+
- det_map/config
|
8 |
+
- det_map/config/splits
|
9 |
+
- det_map/config/agent
|
10 |
+
# - pkg://navsim.planning.script.config.training
|
11 |
+
|
12 |
+
defaults:
|
13 |
+
- default_common
|
14 |
+
- default_evaluation
|
15 |
+
- default_train_val_test_log_split
|
16 |
+
- agent: map_agent
|
17 |
+
- scene_filter: det_all_scenes
|
18 |
+
|
19 |
+
split: mini
|
20 |
+
|
21 |
+
dataloader:
|
22 |
+
params:
|
23 |
+
batch_size: 32 # number of samples per batch
|
24 |
+
num_workers: 4 # number of workers for data loading
|
25 |
+
pin_memory: true # pin memory for faster GPU transfer
|
26 |
+
prefetch_factor: 1
|
27 |
+
|
28 |
+
trainer:
|
29 |
+
params:
|
30 |
+
max_epochs: 20 # maximum number of training epochs
|
31 |
+
check_val_every_n_epoch: 1 # run validation set every n training epochs
|
32 |
+
val_check_interval: 1.0 # [%] run validation set every X% of training set
|
33 |
+
|
34 |
+
limit_train_batches: 1.0 # how much of training dataset to check (float = fraction, int = num_batches)
|
35 |
+
limit_val_batches: 1.0 # how much of validation dataset to check (float = fraction, int = num_batches)
|
36 |
+
|
37 |
+
accelerator: gpu # distribution method
|
38 |
+
strategy: ddp
|
39 |
+
precision: 32 # floating point precision
|
40 |
+
num_nodes: 1 # Number of nodes used for training
|
41 |
+
|
42 |
+
num_sanity_val_steps: 0 # number of validation steps to run before training begins
|
43 |
+
fast_dev_run: false # runs 1 batch of train/val/test for sanity
|
44 |
+
|
45 |
+
accumulate_grad_batches: 1 # accumulates gradients every n batches
|
46 |
+
# track_grad_norm: -1 # logs the p-norm for inspection
|
47 |
+
gradient_clip_val: 0.0 # value to clip gradients
|
48 |
+
gradient_clip_algorithm: norm # [value, norm] method to clip gradients
|
det_map/data/__init__.py
ADDED
File without changes
|
det_map/data/datasets/__init__.py
ADDED
File without changes
|
det_map/data/datasets/dataclasses.py
ADDED
@@ -0,0 +1,521 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import io
|
4 |
+
import os
|
5 |
+
from dataclasses import dataclass, asdict
|
6 |
+
from pathlib import Path
|
7 |
+
from typing import Any, Dict, List, Optional, Tuple, BinaryIO, Union
|
8 |
+
from nuplan.database.maps_db.gpkg_mapsdb import MAP_LOCATIONS
|
9 |
+
from nuplan.common.maps.nuplan_map.map_factory import get_maps_api
|
10 |
+
|
11 |
+
import numpy as np
|
12 |
+
import numpy.typing as npt
|
13 |
+
from PIL import Image
|
14 |
+
from nuplan.common.actor_state.state_representation import StateSE2
|
15 |
+
from nuplan.common.maps.abstract_map import AbstractMap
|
16 |
+
from nuplan.database.utils.pointclouds.lidar import LidarPointCloud
|
17 |
+
from nuplan.planning.simulation.trajectory.trajectory_sampling import TrajectorySampling
|
18 |
+
from pyquaternion import Quaternion
|
19 |
+
|
20 |
+
from navsim.planning.simulation.planner.pdm_planner.utils.pdm_geometry_utils import (
|
21 |
+
convert_absolute_to_relative_se2_array,
|
22 |
+
)
|
23 |
+
|
24 |
+
NAVSIM_INTERVAL_LENGTH: float = 0.5
|
25 |
+
OPENSCENE_DATA_ROOT = os.environ.get("OPENSCENE_DATA_ROOT")
|
26 |
+
NUPLAN_MAPS_ROOT = os.environ.get("NUPLAN_MAPS_ROOT")
|
27 |
+
|
28 |
+
|
29 |
+
@dataclass
|
30 |
+
class Camera:
|
31 |
+
image: Optional[npt.NDArray[np.float32]] = None
|
32 |
+
canvas: Optional[npt.NDArray[np.float32]] = None
|
33 |
+
|
34 |
+
sensor2lidar_rotation: Optional[npt.NDArray[np.float32]] = None
|
35 |
+
sensor2lidar_translation: Optional[npt.NDArray[np.float32]] = None
|
36 |
+
intrinsics: Optional[npt.NDArray[np.float32]] = None
|
37 |
+
distortion: Optional[npt.NDArray[np.float32]] = None
|
38 |
+
|
39 |
+
post_rot: Optional[npt.NDArray[np.float32]] = None
|
40 |
+
post_tran: Optional[npt.NDArray[np.float32]] = None
|
41 |
+
|
42 |
+
def to_dict(self):
|
43 |
+
return {
|
44 |
+
'image': self.image,
|
45 |
+
'canvas': self.canvas,
|
46 |
+
'sensor2lidar_rotation': self.sensor2lidar_rotation,
|
47 |
+
'sensor2lidar_translation': self.sensor2lidar_translation,
|
48 |
+
'intrinsics': self.intrinsics,
|
49 |
+
'distortion': self.distortion,
|
50 |
+
'post_rot': self.post_rot,
|
51 |
+
'post_tran': self.post_tran
|
52 |
+
}
|
53 |
+
|
54 |
+
|
55 |
+
@dataclass
|
56 |
+
class Cameras:
|
57 |
+
cam_f0: Camera
|
58 |
+
cam_l0: Camera
|
59 |
+
cam_l1: Camera
|
60 |
+
cam_l2: Camera
|
61 |
+
cam_r0: Camera
|
62 |
+
cam_r1: Camera
|
63 |
+
cam_r2: Camera
|
64 |
+
cam_b0: Camera
|
65 |
+
|
66 |
+
@classmethod
|
67 |
+
def from_camera_dict(
|
68 |
+
cls,
|
69 |
+
sensor_blobs_path: Path,
|
70 |
+
camera_dict: Dict[str, Any],
|
71 |
+
sensor_names: List[str],
|
72 |
+
) -> Cameras:
|
73 |
+
|
74 |
+
data_dict: Dict[str, Camera] = {}
|
75 |
+
for camera_name in camera_dict.keys():
|
76 |
+
camera_identifier = camera_name.lower()
|
77 |
+
if camera_identifier in sensor_names:
|
78 |
+
image_path = sensor_blobs_path / camera_dict[camera_name]["data_path"]
|
79 |
+
data_dict[camera_identifier] = Camera(
|
80 |
+
image=np.array(Image.open(image_path)),
|
81 |
+
sensor2lidar_rotation=camera_dict[camera_name]["sensor2lidar_rotation"],
|
82 |
+
sensor2lidar_translation=camera_dict[camera_name]["sensor2lidar_translation"],
|
83 |
+
intrinsics=camera_dict[camera_name]["cam_intrinsic"],
|
84 |
+
distortion=camera_dict[camera_name]["distortion"],
|
85 |
+
)
|
86 |
+
else:
|
87 |
+
data_dict[camera_identifier] = Camera() # empty camera
|
88 |
+
|
89 |
+
return Cameras(
|
90 |
+
cam_f0=data_dict["cam_f0"],
|
91 |
+
cam_l0=data_dict["cam_l0"],
|
92 |
+
cam_l1=data_dict["cam_l1"],
|
93 |
+
cam_l2=data_dict["cam_l2"],
|
94 |
+
cam_r0=data_dict["cam_r0"],
|
95 |
+
cam_r1=data_dict["cam_r1"],
|
96 |
+
cam_r2=data_dict["cam_r2"],
|
97 |
+
cam_b0=data_dict["cam_b0"],
|
98 |
+
)
|
99 |
+
|
100 |
+
|
101 |
+
@dataclass
|
102 |
+
class Lidar:
|
103 |
+
# merged lidar point cloud as (6,n) float32 array with n points
|
104 |
+
# first axis: (x, y, z, intensity, ring, lidar_id)
|
105 |
+
lidar_pc: Optional[npt.NDArray[np.float32]] = None
|
106 |
+
|
107 |
+
@staticmethod
|
108 |
+
def _load_bytes(lidar_path: Path) -> BinaryIO:
|
109 |
+
with open(lidar_path, "rb") as fp:
|
110 |
+
return io.BytesIO(fp.read())
|
111 |
+
|
112 |
+
@classmethod
|
113 |
+
def from_paths(
|
114 |
+
cls,
|
115 |
+
sensor_blobs_path: Path,
|
116 |
+
lidar_path: Path,
|
117 |
+
sensor_names: List[str],
|
118 |
+
) -> Lidar:
|
119 |
+
# NOTE: this could be extended to load specific LiDARs in the merged pc
|
120 |
+
if "lidar_pc" in sensor_names:
|
121 |
+
global_lidar_path = sensor_blobs_path / lidar_path
|
122 |
+
lidar_pc = LidarPointCloud.from_buffer(cls._load_bytes(global_lidar_path), "pcd").points
|
123 |
+
return Lidar(lidar_pc)
|
124 |
+
return Lidar() # empty lidar
|
125 |
+
|
126 |
+
|
127 |
+
@dataclass
|
128 |
+
class EgoStatus:
|
129 |
+
ego_pose: npt.NDArray[np.float64]
|
130 |
+
ego_velocity: npt.NDArray[np.float32]
|
131 |
+
ego_acceleration: npt.NDArray[np.float32]
|
132 |
+
driving_command: npt.NDArray[np.int]
|
133 |
+
in_global_frame: bool = False # False for AgentInput
|
134 |
+
|
135 |
+
|
136 |
+
@dataclass
|
137 |
+
class AgentInput:
|
138 |
+
tokens: List[str]
|
139 |
+
timestamps: List[int]
|
140 |
+
|
141 |
+
ego_statuses: List[EgoStatus]
|
142 |
+
cameras: List[Cameras]
|
143 |
+
lidars: List[Lidar]
|
144 |
+
ego2globals: List[np.ndarray]
|
145 |
+
|
146 |
+
def __post_init__(self):
|
147 |
+
pass
|
148 |
+
|
149 |
+
@classmethod
|
150 |
+
def from_scene_dict_list(
|
151 |
+
cls,
|
152 |
+
scene_dict_list: List[Dict],
|
153 |
+
sensor_blobs_path: Path,
|
154 |
+
num_history_frames: int,
|
155 |
+
sensor_config: SensorConfig,
|
156 |
+
) -> AgentInput:
|
157 |
+
assert len(scene_dict_list) > 0, "Scene list is empty!"
|
158 |
+
|
159 |
+
global_ego_poses = []
|
160 |
+
for frame_idx in range(num_history_frames):
|
161 |
+
ego_translation = scene_dict_list[frame_idx]["ego2global_translation"]
|
162 |
+
ego_quaternion = Quaternion(*scene_dict_list[frame_idx]["ego2global_rotation"])
|
163 |
+
global_ego_pose = np.array(
|
164 |
+
[ego_translation[0], ego_translation[1], ego_quaternion.yaw_pitch_roll[0]],
|
165 |
+
dtype=np.float64,
|
166 |
+
)
|
167 |
+
global_ego_poses.append(global_ego_pose)
|
168 |
+
|
169 |
+
local_ego_poses = convert_absolute_to_relative_se2_array(
|
170 |
+
StateSE2(*global_ego_poses[-1]), np.array(global_ego_poses, dtype=np.float64)
|
171 |
+
)
|
172 |
+
|
173 |
+
ego_statuses: List[EgoStatus] = []
|
174 |
+
cameras: List[Cameras] = []
|
175 |
+
lidars: List[Lidar] = []
|
176 |
+
ego2globals = []
|
177 |
+
tokens = []
|
178 |
+
timestamps = []
|
179 |
+
|
180 |
+
for frame_idx in range(num_history_frames):
|
181 |
+
tokens.append(scene_dict_list[frame_idx]['token'])
|
182 |
+
timestamps.append(scene_dict_list[frame_idx]['timestamp'])
|
183 |
+
|
184 |
+
ego_dynamic_state = scene_dict_list[frame_idx]["ego_dynamic_state"]
|
185 |
+
ego_status = EgoStatus(
|
186 |
+
ego_pose=np.array(local_ego_poses[frame_idx], dtype=np.float32),
|
187 |
+
ego_velocity=np.array(ego_dynamic_state[:2], dtype=np.float32),
|
188 |
+
ego_acceleration=np.array(ego_dynamic_state[2:], dtype=np.float32),
|
189 |
+
driving_command=scene_dict_list[frame_idx]["driving_command"],
|
190 |
+
)
|
191 |
+
ego_statuses.append(ego_status)
|
192 |
+
|
193 |
+
sensor_names = sensor_config.get_sensors_at_iteration(frame_idx)
|
194 |
+
cameras.append(
|
195 |
+
Cameras.from_camera_dict(
|
196 |
+
sensor_blobs_path=sensor_blobs_path,
|
197 |
+
camera_dict=scene_dict_list[frame_idx]["cams"],
|
198 |
+
sensor_names=sensor_names,
|
199 |
+
)
|
200 |
+
)
|
201 |
+
|
202 |
+
lidars.append(
|
203 |
+
Lidar.from_paths(
|
204 |
+
sensor_blobs_path=sensor_blobs_path,
|
205 |
+
lidar_path=Path(scene_dict_list[frame_idx]["lidar_path"]),
|
206 |
+
sensor_names=sensor_names,
|
207 |
+
)
|
208 |
+
)
|
209 |
+
|
210 |
+
ego2globals.append(scene_dict_list[frame_idx]['ego2global'])
|
211 |
+
|
212 |
+
return AgentInput(tokens, timestamps, ego_statuses, cameras, lidars, ego2globals)
|
213 |
+
|
214 |
+
|
215 |
+
@dataclass
|
216 |
+
class Annotations:
|
217 |
+
boxes: npt.NDArray[np.float32]
|
218 |
+
names: List[str]
|
219 |
+
velocity_3d: npt.NDArray[np.float32]
|
220 |
+
instance_tokens: List[str]
|
221 |
+
track_tokens: List[str]
|
222 |
+
|
223 |
+
def __post_init__(self):
|
224 |
+
annotation_lengths: Dict[str, int] = {
|
225 |
+
attribute_name: len(attribute) for attribute_name, attribute in vars(self).items()
|
226 |
+
}
|
227 |
+
assert (
|
228 |
+
len(set(annotation_lengths.values())) == 1
|
229 |
+
), f"Annotations expects all attributes to have equal length, but got {annotation_lengths}"
|
230 |
+
|
231 |
+
|
232 |
+
@dataclass
|
233 |
+
class Trajectory:
|
234 |
+
poses: npt.NDArray[np.float32] # local coordinates
|
235 |
+
trajectory_sampling: TrajectorySampling = TrajectorySampling(
|
236 |
+
time_horizon=4, interval_length=0.5
|
237 |
+
)
|
238 |
+
|
239 |
+
def __post_init__(self):
|
240 |
+
assert (
|
241 |
+
self.poses.ndim == 2
|
242 |
+
), "Trajectory poses should have two dimensions for samples and poses."
|
243 |
+
assert (
|
244 |
+
self.poses.shape[0] == self.trajectory_sampling.num_poses
|
245 |
+
), "Trajectory poses and sampling have unequal number of poses."
|
246 |
+
assert self.poses.shape[1] == 3, "Trajectory requires (x, y, heading) at last dim."
|
247 |
+
|
248 |
+
|
249 |
+
@dataclass
|
250 |
+
class SceneMetadata:
|
251 |
+
log_name: str
|
252 |
+
scene_token: str
|
253 |
+
map_name: str
|
254 |
+
initial_token: str
|
255 |
+
|
256 |
+
num_history_frames: int
|
257 |
+
num_future_frames: int
|
258 |
+
|
259 |
+
|
260 |
+
@dataclass
|
261 |
+
class Frame:
|
262 |
+
token: str
|
263 |
+
timestamp: int
|
264 |
+
roadblock_ids: List[str]
|
265 |
+
traffic_lights: List[Tuple[str, bool]]
|
266 |
+
annotations: Annotations
|
267 |
+
|
268 |
+
ego_status: EgoStatus
|
269 |
+
lidar: Lidar
|
270 |
+
cameras: Cameras
|
271 |
+
ego2global: np.ndarray
|
272 |
+
|
273 |
+
|
274 |
+
@dataclass
|
275 |
+
class Scene:
|
276 |
+
# Ground truth information
|
277 |
+
scene_metadata: SceneMetadata
|
278 |
+
map_api: AbstractMap
|
279 |
+
frames: List[Frame]
|
280 |
+
|
281 |
+
def get_future_trajectory(self, num_trajectory_frames: Optional[int] = None) -> Trajectory:
|
282 |
+
|
283 |
+
if num_trajectory_frames is None:
|
284 |
+
num_trajectory_frames = self.scene_metadata.num_future_frames
|
285 |
+
|
286 |
+
start_frame_idx = self.scene_metadata.num_history_frames - 1
|
287 |
+
|
288 |
+
global_ego_poses = []
|
289 |
+
for frame_idx in range(start_frame_idx, start_frame_idx + num_trajectory_frames + 1):
|
290 |
+
global_ego_poses.append(self.frames[frame_idx].ego_status.ego_pose)
|
291 |
+
|
292 |
+
local_ego_poses = convert_absolute_to_relative_se2_array(
|
293 |
+
StateSE2(*global_ego_poses[0]), np.array(global_ego_poses[1:], dtype=np.float64)
|
294 |
+
)
|
295 |
+
|
296 |
+
return Trajectory(
|
297 |
+
local_ego_poses,
|
298 |
+
TrajectorySampling(
|
299 |
+
num_poses=len(local_ego_poses),
|
300 |
+
interval_length=NAVSIM_INTERVAL_LENGTH,
|
301 |
+
),
|
302 |
+
)
|
303 |
+
|
304 |
+
def get_history_trajectory(self, num_trajectory_frames: Optional[int] = None) -> Trajectory:
|
305 |
+
|
306 |
+
if num_trajectory_frames is None:
|
307 |
+
num_trajectory_frames = self.scene_metadata.num_history_frames
|
308 |
+
|
309 |
+
global_ego_poses = []
|
310 |
+
for frame_idx in range(num_trajectory_frames):
|
311 |
+
global_ego_poses.append(self.frames[frame_idx].ego_status.ego_pose)
|
312 |
+
|
313 |
+
origin = StateSE2(*global_ego_poses[-1])
|
314 |
+
local_ego_poses = convert_absolute_to_relative_se2_array(
|
315 |
+
origin, np.array(global_ego_poses, dtype=np.float64)
|
316 |
+
)
|
317 |
+
|
318 |
+
return Trajectory(
|
319 |
+
local_ego_poses,
|
320 |
+
TrajectorySampling(
|
321 |
+
num_poses=len(local_ego_poses),
|
322 |
+
interval_length=NAVSIM_INTERVAL_LENGTH,
|
323 |
+
),
|
324 |
+
)
|
325 |
+
|
326 |
+
def get_agent_input(self) -> AgentInput:
|
327 |
+
# NOTE: this function is unused and might be removed.
|
328 |
+
|
329 |
+
local_ego_poses = self.get_history_trajectory().poses
|
330 |
+
|
331 |
+
ego_statuses: List[EgoStatus] = []
|
332 |
+
cameras: List[Cameras] = []
|
333 |
+
lidars: List[Lidar] = []
|
334 |
+
ego2globals = []
|
335 |
+
tokens, timestamps = [], []
|
336 |
+
for frame_idx in range(self.scene_metadata.num_history_frames):
|
337 |
+
frame_ego_status = self.frames[frame_idx].ego_status
|
338 |
+
tokens.append(self.frames[frame_idx].token)
|
339 |
+
timestamps.append(self.frames[frame_idx].timestamp)
|
340 |
+
ego_statuses.append(
|
341 |
+
EgoStatus(
|
342 |
+
ego_pose=local_ego_poses[frame_idx],
|
343 |
+
ego_velocity=frame_ego_status.ego_velocity,
|
344 |
+
ego_acceleration=frame_ego_status.ego_acceleration,
|
345 |
+
driving_command=frame_ego_status.driving_command,
|
346 |
+
)
|
347 |
+
)
|
348 |
+
cameras.append(self.frames[frame_idx].cameras)
|
349 |
+
lidars.append(self.frames[frame_idx].lidar)
|
350 |
+
ego2globals.append(self.frames[frame_idx].ego2global)
|
351 |
+
|
352 |
+
return AgentInput(tokens, timestamps, ego_statuses, cameras, lidars, ego2globals)
|
353 |
+
|
354 |
+
@classmethod
|
355 |
+
def _build_annotations(
|
356 |
+
cls,
|
357 |
+
scene_frame: Dict,
|
358 |
+
) -> Annotations:
|
359 |
+
return Annotations(
|
360 |
+
boxes=scene_frame["anns"]["gt_boxes"],
|
361 |
+
names=scene_frame["anns"]["gt_names"],
|
362 |
+
velocity_3d=scene_frame["anns"]["gt_velocity_3d"],
|
363 |
+
instance_tokens=scene_frame["anns"]["instance_tokens"],
|
364 |
+
track_tokens=scene_frame["anns"]["track_tokens"],
|
365 |
+
)
|
366 |
+
|
367 |
+
@classmethod
|
368 |
+
def _build_ego_status(
|
369 |
+
cls,
|
370 |
+
scene_frame: Dict,
|
371 |
+
) -> EgoStatus:
|
372 |
+
ego_translation = scene_frame["ego2global_translation"]
|
373 |
+
ego_quaternion = Quaternion(*scene_frame["ego2global_rotation"])
|
374 |
+
global_ego_pose = np.array(
|
375 |
+
[ego_translation[0], ego_translation[1], ego_quaternion.yaw_pitch_roll[0]],
|
376 |
+
dtype=np.float64,
|
377 |
+
)
|
378 |
+
ego_dynamic_state = scene_frame["ego_dynamic_state"]
|
379 |
+
return EgoStatus(
|
380 |
+
ego_pose=global_ego_pose,
|
381 |
+
ego_velocity=np.array(ego_dynamic_state[:2], dtype=np.float32),
|
382 |
+
ego_acceleration=np.array(ego_dynamic_state[2:], dtype=np.float32),
|
383 |
+
driving_command=scene_frame["driving_command"],
|
384 |
+
in_global_frame=True,
|
385 |
+
)
|
386 |
+
|
387 |
+
@classmethod
|
388 |
+
def _build_map_api(cls, map_name: str) -> AbstractMap:
|
389 |
+
assert (
|
390 |
+
map_name in MAP_LOCATIONS
|
391 |
+
), f"The map name {map_name} is invalid, must be in {MAP_LOCATIONS}"
|
392 |
+
return get_maps_api(NUPLAN_MAPS_ROOT, "nuplan-maps-v1.0", map_name)
|
393 |
+
|
394 |
+
@classmethod
|
395 |
+
def from_scene_dict_list(
|
396 |
+
cls,
|
397 |
+
scene_dict_list: List[Dict],
|
398 |
+
sensor_blobs_path: Path,
|
399 |
+
num_history_frames: int,
|
400 |
+
num_future_frames: int,
|
401 |
+
sensor_config: SensorConfig,
|
402 |
+
) -> Scene:
|
403 |
+
assert len(scene_dict_list) >= 0, "Scene list is empty!"
|
404 |
+
|
405 |
+
scene_metadata = SceneMetadata(
|
406 |
+
log_name=scene_dict_list[num_history_frames - 1]["log_name"],
|
407 |
+
scene_token=scene_dict_list[num_history_frames - 1]["scene_token"],
|
408 |
+
map_name=scene_dict_list[num_history_frames - 1]["map_location"],
|
409 |
+
initial_token=scene_dict_list[num_history_frames - 1]["token"],
|
410 |
+
num_history_frames=num_history_frames,
|
411 |
+
num_future_frames=num_future_frames,
|
412 |
+
)
|
413 |
+
map_api = cls._build_map_api(scene_metadata.map_name)
|
414 |
+
|
415 |
+
frames: List[Frame] = []
|
416 |
+
for frame_idx in range(len(scene_dict_list)):
|
417 |
+
global_ego_status = cls._build_ego_status(scene_dict_list[frame_idx])
|
418 |
+
annotations = cls._build_annotations(scene_dict_list[frame_idx])
|
419 |
+
|
420 |
+
sensor_names = sensor_config.get_sensors_at_iteration(frame_idx)
|
421 |
+
|
422 |
+
cameras = Cameras.from_camera_dict(
|
423 |
+
sensor_blobs_path=sensor_blobs_path,
|
424 |
+
camera_dict=scene_dict_list[frame_idx]["cams"],
|
425 |
+
sensor_names=sensor_names,
|
426 |
+
)
|
427 |
+
|
428 |
+
lidar = Lidar.from_paths(
|
429 |
+
sensor_blobs_path=sensor_blobs_path,
|
430 |
+
lidar_path=Path(scene_dict_list[frame_idx]["lidar_path"]),
|
431 |
+
sensor_names=sensor_names,
|
432 |
+
)
|
433 |
+
|
434 |
+
frame = Frame(
|
435 |
+
token=scene_dict_list[frame_idx]["token"],
|
436 |
+
timestamp=scene_dict_list[frame_idx]["timestamp"],
|
437 |
+
roadblock_ids=scene_dict_list[frame_idx]["roadblock_ids"],
|
438 |
+
traffic_lights=scene_dict_list[frame_idx]["traffic_lights"],
|
439 |
+
annotations=annotations,
|
440 |
+
ego_status=global_ego_status,
|
441 |
+
lidar=lidar,
|
442 |
+
cameras=cameras,
|
443 |
+
ego2global=scene_dict_list[frame_idx]['ego2global']
|
444 |
+
)
|
445 |
+
frames.append(frame)
|
446 |
+
|
447 |
+
return Scene(scene_metadata=scene_metadata, frames=frames, map_api=map_api)
|
448 |
+
|
449 |
+
|
450 |
+
@dataclass
|
451 |
+
class SceneFilter:
|
452 |
+
num_history_frames: int = 4
|
453 |
+
num_future_frames: int = 10
|
454 |
+
has_route: bool = True
|
455 |
+
|
456 |
+
max_scenes: Optional[int] = None
|
457 |
+
log_names: Optional[List[str]] = None
|
458 |
+
tokens: Optional[List[str]] = None
|
459 |
+
|
460 |
+
@property
|
461 |
+
def num_frames(self) -> int:
|
462 |
+
return self.num_history_frames
|
463 |
+
|
464 |
+
|
465 |
+
@dataclass
|
466 |
+
class SensorConfig:
|
467 |
+
# Config values of sensors are either
|
468 |
+
# - bool: Whether to load history or not
|
469 |
+
# - List[int]: For loading specific history steps
|
470 |
+
|
471 |
+
cam_f0: Union[bool, List[int]]
|
472 |
+
cam_l0: Union[bool, List[int]]
|
473 |
+
cam_l1: Union[bool, List[int]]
|
474 |
+
cam_l2: Union[bool, List[int]]
|
475 |
+
cam_r0: Union[bool, List[int]]
|
476 |
+
cam_r1: Union[bool, List[int]]
|
477 |
+
cam_r2: Union[bool, List[int]]
|
478 |
+
cam_b0: Union[bool, List[int]]
|
479 |
+
lidar_pc: Union[bool, List[int]]
|
480 |
+
|
481 |
+
def get_sensors_at_iteration(self, iteration: int) -> List[str]:
|
482 |
+
|
483 |
+
sensors_at_iteration: List[str] = []
|
484 |
+
for sensor_name, sensor_include in asdict(self).items():
|
485 |
+
if isinstance(sensor_include, bool) and sensor_include:
|
486 |
+
sensors_at_iteration.append(sensor_name)
|
487 |
+
elif isinstance(sensor_include, list) and iteration in sensor_include:
|
488 |
+
sensors_at_iteration.append(sensor_name)
|
489 |
+
|
490 |
+
return sensors_at_iteration
|
491 |
+
|
492 |
+
@classmethod
|
493 |
+
def build_all_sensors(cls, include: Union[bool, List[int]] = True) -> SensorConfig:
|
494 |
+
return SensorConfig(
|
495 |
+
cam_f0=include,
|
496 |
+
cam_l0=include,
|
497 |
+
cam_l1=include,
|
498 |
+
cam_l2=include,
|
499 |
+
cam_r0=include,
|
500 |
+
cam_r1=include,
|
501 |
+
cam_r2=include,
|
502 |
+
cam_b0=include,
|
503 |
+
lidar_pc=include,
|
504 |
+
)
|
505 |
+
|
506 |
+
@classmethod
|
507 |
+
def build_no_sensors(cls) -> SensorConfig:
|
508 |
+
return cls.build_all_sensors(include=False)
|
509 |
+
|
510 |
+
|
511 |
+
@dataclass
|
512 |
+
class PDMResults:
|
513 |
+
no_at_fault_collisions: float
|
514 |
+
drivable_area_compliance: float
|
515 |
+
driving_direction_compliance: float
|
516 |
+
|
517 |
+
ego_progress: float
|
518 |
+
time_to_collision_within_bound: float
|
519 |
+
comfort: float
|
520 |
+
|
521 |
+
score: float
|
det_map/data/datasets/dataloader.py
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import lzma
|
4 |
+
import pickle
|
5 |
+
|
6 |
+
from pathlib import Path
|
7 |
+
from typing import Any, Dict, List
|
8 |
+
from tqdm import tqdm
|
9 |
+
|
10 |
+
from navsim.common.dataclasses import AgentInput, Scene, SceneFilter, SensorConfig
|
11 |
+
from navsim.planning.metric_caching.metric_cache import MetricCache
|
12 |
+
|
13 |
+
|
14 |
+
def filter_scenes(data_path: Path, scene_filter: SceneFilter) -> Dict[str, List[Dict[str, Any]]]:
|
15 |
+
|
16 |
+
def split_list(input_list: List[Any], num_frames: int, frame_interval: int) -> List[List[Any]]:
|
17 |
+
return [input_list[i : i + num_frames] for i in range(0, len(input_list), frame_interval)]
|
18 |
+
|
19 |
+
filtered_scenes: Dict[str, Scene] = {}
|
20 |
+
stop_loading: bool = False
|
21 |
+
|
22 |
+
# filter logs
|
23 |
+
log_files = list(data_path.iterdir())
|
24 |
+
if scene_filter.log_names is not None:
|
25 |
+
log_files = [
|
26 |
+
log_file
|
27 |
+
for log_file in log_files
|
28 |
+
if log_file.name.replace(".pkl", "") in scene_filter.log_names
|
29 |
+
]
|
30 |
+
|
31 |
+
if scene_filter.tokens is not None:
|
32 |
+
filter_tokens = True
|
33 |
+
tokens = set(scene_filter.tokens)
|
34 |
+
else:
|
35 |
+
filter_tokens = False
|
36 |
+
|
37 |
+
for log_pickle_path in tqdm(log_files, desc="Loading logs"):
|
38 |
+
|
39 |
+
scene_dict_list = pickle.load(open(log_pickle_path, "rb"))
|
40 |
+
for frame_list in split_list(
|
41 |
+
scene_dict_list, scene_filter.num_frames, scene_filter.frame_interval
|
42 |
+
):
|
43 |
+
# Filter scenes which are too short
|
44 |
+
if len(frame_list) < scene_filter.num_frames:
|
45 |
+
continue
|
46 |
+
|
47 |
+
# Filter scenes with no route
|
48 |
+
if (
|
49 |
+
scene_filter.has_route
|
50 |
+
and len(frame_list[scene_filter.num_history_frames - 1]["roadblock_ids"]) == 0
|
51 |
+
):
|
52 |
+
continue
|
53 |
+
|
54 |
+
# Filter by token
|
55 |
+
token = frame_list[scene_filter.num_history_frames - 1]["token"]
|
56 |
+
if filter_tokens and token not in tokens:
|
57 |
+
continue
|
58 |
+
|
59 |
+
filtered_scenes[token] = frame_list
|
60 |
+
|
61 |
+
if (scene_filter.max_scenes is not None) and (
|
62 |
+
len(filtered_scenes) >= scene_filter.max_scenes
|
63 |
+
):
|
64 |
+
stop_loading = True
|
65 |
+
break
|
66 |
+
|
67 |
+
if stop_loading:
|
68 |
+
break
|
69 |
+
|
70 |
+
return filtered_scenes
|
71 |
+
|
72 |
+
|
73 |
+
class SceneLoader:
|
74 |
+
|
75 |
+
def __init__(
|
76 |
+
self,
|
77 |
+
data_path: Path,
|
78 |
+
sensor_blobs_path: Path,
|
79 |
+
scene_filter: SceneFilter,
|
80 |
+
sensor_config: SensorConfig = SensorConfig.build_no_sensors(),
|
81 |
+
):
|
82 |
+
|
83 |
+
self.scene_frames_dicts = filter_scenes(data_path, scene_filter)
|
84 |
+
self._sensor_blobs_path = sensor_blobs_path
|
85 |
+
self._scene_filter = scene_filter
|
86 |
+
self._sensor_config = sensor_config
|
87 |
+
|
88 |
+
@property
|
89 |
+
def tokens(self) -> List[str]:
|
90 |
+
return list(self.scene_frames_dicts.keys())
|
91 |
+
|
92 |
+
def __len__(self):
|
93 |
+
return len(self.tokens)
|
94 |
+
|
95 |
+
def __getitem__(self, idx) -> str:
|
96 |
+
return self.tokens[idx]
|
97 |
+
|
98 |
+
def get_scene_from_token(self, token: str) -> Scene:
|
99 |
+
assert token in self.tokens
|
100 |
+
return Scene.from_scene_dict_list(
|
101 |
+
self.scene_frames_dicts[token],
|
102 |
+
self._sensor_blobs_path,
|
103 |
+
num_history_frames=self._scene_filter.num_history_frames,
|
104 |
+
num_future_frames=self._scene_filter.num_future_frames,
|
105 |
+
sensor_config=self._sensor_config,
|
106 |
+
)
|
107 |
+
|
108 |
+
def get_agent_input_from_token(self, token: str) -> AgentInput:
|
109 |
+
assert token in self.tokens
|
110 |
+
return AgentInput.from_scene_dict_list(
|
111 |
+
self.scene_frames_dicts[token],
|
112 |
+
self._sensor_blobs_path,
|
113 |
+
num_history_frames=self._scene_filter.num_history_frames,
|
114 |
+
sensor_config=self._sensor_config,
|
115 |
+
)
|
116 |
+
|
117 |
+
def get_tokens_list_per_log(self) -> Dict[str, List[str]]:
|
118 |
+
# generate a dict that contains a list of tokens for each log-name
|
119 |
+
tokens_per_logs: Dict[str, List[str]] = {}
|
120 |
+
for token, scene_dict_list in self.scene_frames_dicts.items():
|
121 |
+
log_name = scene_dict_list[0]["log_name"]
|
122 |
+
if tokens_per_logs.get(log_name):
|
123 |
+
tokens_per_logs[log_name].append(token)
|
124 |
+
else:
|
125 |
+
tokens_per_logs.update({log_name: [token]})
|
126 |
+
return tokens_per_logs
|
127 |
+
|
128 |
+
class MetricCacheLoader:
|
129 |
+
|
130 |
+
def __init__(
|
131 |
+
self,
|
132 |
+
cache_path: Path,
|
133 |
+
file_name: str = "metric_cache.pkl",
|
134 |
+
):
|
135 |
+
|
136 |
+
self._file_name = file_name
|
137 |
+
self.metric_cache_paths = self._load_metric_cache_paths(cache_path)
|
138 |
+
|
139 |
+
def _load_metric_cache_paths(self, cache_path: Path) -> Dict[str, Path]:
|
140 |
+
metadata_dir = cache_path / "metadata"
|
141 |
+
metadata_file = [file for file in metadata_dir.iterdir() if ".csv" in str(file)][0]
|
142 |
+
with open(str(metadata_file), "r") as f:
|
143 |
+
cache_paths=f.read().splitlines()[1:]
|
144 |
+
metric_cache_dict = {
|
145 |
+
cache_path.split("/")[-2]: cache_path
|
146 |
+
for cache_path in cache_paths
|
147 |
+
}
|
148 |
+
return metric_cache_dict
|
149 |
+
|
150 |
+
@property
|
151 |
+
def tokens(self) -> List[str]:
|
152 |
+
return list(self.metric_cache_paths.keys())
|
153 |
+
|
154 |
+
def __len__(self):
|
155 |
+
return len(self.metric_cache_paths)
|
156 |
+
|
157 |
+
def __getitem__(self, idx: int) -> MetricCache:
|
158 |
+
return self.get_from_token(self.tokens[idx])
|
159 |
+
|
160 |
+
def get_from_token(self, token: str) -> MetricCache:
|
161 |
+
|
162 |
+
with lzma.open(self.metric_cache_paths[token], "rb") as f:
|
163 |
+
metric_cache: MetricCache = pickle.load(f)
|
164 |
+
|
165 |
+
return metric_cache
|
166 |
+
|
167 |
+
def to_pickle(self, path: Path) -> None:
|
168 |
+
full_metric_cache = {}
|
169 |
+
for token in tqdm(self.tokens):
|
170 |
+
full_metric_cache[token] = self.get_from_token(token)
|
171 |
+
with open(path, "wb") as f:
|
172 |
+
pickle.dump(full_metric_cache, f)
|
det_map/data/datasets/dataset.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, List, Tuple
|
2 |
+
import torch
|
3 |
+
|
4 |
+
from det_map.data.datasets.dataloader import SceneLoader
|
5 |
+
from navsim.planning.training.abstract_feature_target_builder import AbstractFeatureBuilder, AbstractTargetBuilder
|
6 |
+
|
7 |
+
class Dataset(torch.utils.data.Dataset):
|
8 |
+
def __init__(
|
9 |
+
self,
|
10 |
+
pipelines, is_train,
|
11 |
+
scene_loader: SceneLoader,
|
12 |
+
feature_builders: List[AbstractFeatureBuilder],
|
13 |
+
target_builders: List[AbstractTargetBuilder]
|
14 |
+
):
|
15 |
+
super().__init__()
|
16 |
+
self._scene_loader = scene_loader
|
17 |
+
self._feature_builders = feature_builders
|
18 |
+
self._target_builders = target_builders
|
19 |
+
self.pipelines = pipelines
|
20 |
+
self.is_train = is_train
|
21 |
+
|
22 |
+
def __len__(self):
|
23 |
+
return len(self._scene_loader)
|
24 |
+
|
25 |
+
def __getitem__(self, idx: int) -> Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor]]:
|
26 |
+
scene = self._scene_loader.get_scene_from_token(self._scene_loader.tokens[idx])
|
27 |
+
features: Dict[str, torch.Tensor] = {}
|
28 |
+
for builder in self._feature_builders:
|
29 |
+
features.update(builder.compute_features(scene.get_agent_input()))
|
30 |
+
targets: Dict[str, torch.Tensor] = {}
|
31 |
+
for builder in self._target_builders:
|
32 |
+
targets.update(builder.compute_targets(scene))
|
33 |
+
# aug for four frames respectively
|
34 |
+
features, targets = self.pipelines['lidar_aug'](features, targets)
|
35 |
+
# project lidar at frame i to image i
|
36 |
+
features, targets = self.pipelines['depth'](features, targets)
|
37 |
+
# concat all lidar points, remove points too far/close
|
38 |
+
features, targets = self.pipelines['lidar_filter'](features, targets)
|
39 |
+
# shuffle all lidar points
|
40 |
+
features, targets = self.pipelines['point_shuffle'](features, targets)
|
41 |
+
return (features, targets)
|
det_map/data/datasets/dataset_det.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, List, Tuple
|
2 |
+
import torch
|
3 |
+
|
4 |
+
from det_map.data.datasets.dataloader import SceneLoader
|
5 |
+
from det_map.data.datasets.dataset import Dataset
|
6 |
+
from navsim.planning.training.abstract_feature_target_builder import AbstractFeatureBuilder, AbstractTargetBuilder
|
7 |
+
|
8 |
+
class DetDataset(Dataset):
|
9 |
+
def __init__(
|
10 |
+
self, **kwargs
|
11 |
+
):
|
12 |
+
super().__init__(**kwargs)
|
13 |
+
|
14 |
+
def __getitem__(self, idx: int) -> Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor]]:
|
15 |
+
scene = self._scene_loader.get_scene_from_token(self._scene_loader.tokens[idx])
|
16 |
+
features: Dict[str, torch.Tensor] = {}
|
17 |
+
for builder in self._feature_builders:
|
18 |
+
features.update(builder.compute_features(scene.get_agent_input()))
|
19 |
+
targets: Dict[str, torch.Tensor] = {}
|
20 |
+
for builder in self._target_builders:
|
21 |
+
targets.update(builder.compute_targets(scene))
|
22 |
+
# todo sampler
|
23 |
+
features, targets = self.pipelines['lidar_aug'](features, targets)
|
24 |
+
features, targets = self.pipelines['depth'](features, targets)
|
25 |
+
features, targets = self.pipelines['lidar_filter'](features, targets)
|
26 |
+
features, targets = self.pipelines['point_shuffle'](features, targets)
|
27 |
+
|
28 |
+
return (features, targets)
|
det_map/data/datasets/feature_builders.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
from typing import Dict
|
4 |
+
|
5 |
+
import numpy as np
|
6 |
+
import torch
|
7 |
+
|
8 |
+
from det_map.data.datasets.dataclasses import AgentInput, Camera
|
9 |
+
from det_map.data.datasets.lidar_utils import transform_points, render_image
|
10 |
+
from navsim.planning.training.abstract_feature_target_builder import AbstractFeatureBuilder
|
11 |
+
from mmcv.parallel import DataContainer as DC
|
12 |
+
|
13 |
+
class LiDARCameraFeatureBuilder(AbstractFeatureBuilder):
|
14 |
+
def __init__(self, pipelines):
|
15 |
+
super().__init__()
|
16 |
+
self.pipelines = pipelines
|
17 |
+
|
18 |
+
def compute_features(self, agent_input: AgentInput) -> Dict[str, torch.Tensor]:
|
19 |
+
img_pipeline = self.pipelines['img']
|
20 |
+
timestamps_ori = agent_input.timestamps
|
21 |
+
timestamps = [(timestamps_ori[-1] - tmp) / 1e6 for tmp in timestamps_ori]
|
22 |
+
|
23 |
+
lidars = [np.copy(tmp.lidar_pc) for tmp in agent_input.lidars]
|
24 |
+
ego2globals = [tmp for tmp in agent_input.ego2globals]
|
25 |
+
|
26 |
+
# last frame is the key frame
|
27 |
+
global2ego_key = np.linalg.inv(ego2globals[-1])
|
28 |
+
# ego2global, global2ego key frame
|
29 |
+
lidars_warped = [transform_points(transform_points(pts, mat), global2ego_key)
|
30 |
+
for pts, mat in zip(lidars[:-1], ego2globals[:-1])]
|
31 |
+
lidars_warped.append(lidars[-1])
|
32 |
+
for i, l in enumerate(lidars_warped):
|
33 |
+
# x,y,z,intensity,timestamp
|
34 |
+
l[4] = timestamps[i]
|
35 |
+
lidars_warped[i] = torch.from_numpy(l[:5]).t()
|
36 |
+
|
37 |
+
|
38 |
+
# debug visualize lidar pc
|
39 |
+
# for idx, lidar in enumerate(lidars_warped):
|
40 |
+
# render_image(lidar, str('warped'+ str(idx)))
|
41 |
+
# for idx, lidar in enumerate([tmp.lidar_pc for tmp in agent_input.lidars]):
|
42 |
+
# render_image(lidar, str('ori'+ str(idx)))
|
43 |
+
|
44 |
+
cams_all_frames = [[
|
45 |
+
tmp.cam_f0,
|
46 |
+
# tmp.cam_l0,
|
47 |
+
# tmp.cam_l1,
|
48 |
+
# tmp.cam_l2,
|
49 |
+
# tmp.cam_r0,
|
50 |
+
# tmp.cam_r1,
|
51 |
+
# tmp.cam_r2,
|
52 |
+
tmp.cam_b0
|
53 |
+
] for tmp in agent_input.cameras]
|
54 |
+
|
55 |
+
image, canvas, sensor2lidar_rotation, sensor2lidar_translation, intrinsics, distortion, post_rot, post_tran = [], [], [], [], [], [], [], []
|
56 |
+
for cams_frame_t in cams_all_frames:
|
57 |
+
image_t, canvas_t, sensor2lidar_rotation_t, sensor2lidar_translation_t, intrinsics_t, distortion_t, post_rot_t, post_tran_t = [], [], [], [], [], [], [], []
|
58 |
+
for cam in cams_frame_t:
|
59 |
+
cam_processed: Camera = img_pipeline(cam)
|
60 |
+
image_t.append(cam_processed.image)
|
61 |
+
canvas_t.append(cam_processed.canvas)
|
62 |
+
sensor2lidar_rotation_t.append(cam_processed.sensor2lidar_rotation)
|
63 |
+
sensor2lidar_translation_t.append(cam_processed.sensor2lidar_translation)
|
64 |
+
intrinsics_t.append(cam_processed.intrinsics)
|
65 |
+
distortion_t.append(cam_processed.distortion)
|
66 |
+
post_rot_t.append(cam_processed.post_rot)
|
67 |
+
post_tran_t.append(cam_processed.post_tran)
|
68 |
+
image.append(torch.stack(image_t))
|
69 |
+
canvas.append(torch.stack(canvas_t))
|
70 |
+
sensor2lidar_rotation.append(torch.stack(sensor2lidar_rotation_t))
|
71 |
+
sensor2lidar_translation.append(torch.stack(sensor2lidar_translation_t))
|
72 |
+
intrinsics.append(torch.stack(intrinsics_t))
|
73 |
+
distortion.append(torch.stack(distortion_t))
|
74 |
+
post_rot.append(torch.stack(post_rot_t))
|
75 |
+
post_tran.append(torch.stack(post_tran_t))
|
76 |
+
|
77 |
+
|
78 |
+
# img: T, N_CAM, C, H, W
|
79 |
+
# imgs = DC(torch.stack(image), cpu_only=False, stack=True)
|
80 |
+
#combine = torch.matmul(sensor2lidar_rotation, torch.inverse(intrinsics))
|
81 |
+
#coords = torch.matmul(combine, coords)
|
82 |
+
#coords += sensor2lidar_translation
|
83 |
+
imgs = torch.stack(image)
|
84 |
+
return {
|
85 |
+
"image": imgs,
|
86 |
+
'canvas': torch.stack(canvas).to(imgs),
|
87 |
+
'sensor2lidar_rotation': torch.stack(sensor2lidar_rotation).to(imgs),
|
88 |
+
'sensor2lidar_translation': torch.stack(sensor2lidar_translation).to(imgs),
|
89 |
+
'intrinsics': torch.stack(intrinsics).to(imgs),
|
90 |
+
'distortion': torch.stack(distortion).to(imgs),
|
91 |
+
'post_rot': torch.stack(post_rot).to(imgs),
|
92 |
+
'post_tran': torch.stack(post_tran).to(imgs),
|
93 |
+
"lidars_warped": lidars_warped
|
94 |
+
}
|
det_map/data/datasets/lidar_utils.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
from typing import Tuple
|
4 |
+
|
5 |
+
import numpy as np
|
6 |
+
import numpy.typing as npt
|
7 |
+
from PIL import Image
|
8 |
+
from matplotlib import cm
|
9 |
+
from nuplan.database.utils.geometry import view_points
|
10 |
+
|
11 |
+
|
12 |
+
def transform_points(points, transf_matrix: npt.NDArray[np.float64]):
|
13 |
+
"""
|
14 |
+
Applies a homogeneous transform.
|
15 |
+
:param transf_matrix: <np.float: 4, 4>. Homogeneous transformation matrix.
|
16 |
+
"""
|
17 |
+
transf_matrix = transf_matrix.astype(np.float32)
|
18 |
+
points[:3, :] = transf_matrix[:3, :3] @ points[:3] + transf_matrix[:3, 3].reshape((-1, 1))
|
19 |
+
return points
|
20 |
+
|
21 |
+
|
22 |
+
def render_image(
|
23 |
+
points, name,
|
24 |
+
canvas_size: Tuple[int, int] = (1001, 1001),
|
25 |
+
view: npt.NDArray[np.float64] = np.array([[10, 0, 0, 500], [0, 10, 0, 500], [0, 0, 10, 0]]),
|
26 |
+
color_dim: int = 2,
|
27 |
+
):
|
28 |
+
"""
|
29 |
+
Renders pointcloud to an array with 3 channels appropriate for viewing as an image. The image is color coded
|
30 |
+
according the color_dim dimension of points (typically the height).
|
31 |
+
:param canvas_size: (width, height). Size of the canvas on which to render the image.
|
32 |
+
:param view: <np.float: n, n>. Defines an arbitrary projection (n <= 4).
|
33 |
+
:param color_dim: The dimension of the points to be visualized as color. Default is 2 for height.
|
34 |
+
:return: A Image instance.
|
35 |
+
"""
|
36 |
+
# Apply desired transformation to the point cloud. (height is here considered independent of the view).
|
37 |
+
heights = points[2, :]
|
38 |
+
points = view_points(points[:3, :], view, normalize=False)
|
39 |
+
points[2, :] = heights
|
40 |
+
|
41 |
+
# Remove points that fall outside the canvas.
|
42 |
+
mask = np.ones(points.shape[1], dtype=bool) # type: ignore
|
43 |
+
mask = np.logical_and(mask, points[0, :] < canvas_size[0] - 1)
|
44 |
+
mask = np.logical_and(mask, points[0, :] > 0)
|
45 |
+
mask = np.logical_and(mask, points[1, :] < canvas_size[1] - 1)
|
46 |
+
mask = np.logical_and(mask, points[1, :] > 0)
|
47 |
+
points = points[:, mask]
|
48 |
+
|
49 |
+
# Scale color_values to be between 0 and 255.
|
50 |
+
color_values = points[color_dim, :]
|
51 |
+
color_values = 255.0 * (color_values - np.amin(color_values)) / (np.amax(color_values) - np.amin(color_values))
|
52 |
+
|
53 |
+
# Rounds to ints and generate colors that will be used in the image.
|
54 |
+
points = np.int16(np.round(points[:2, :]))
|
55 |
+
color_values = np.int16(np.round(color_values))
|
56 |
+
cmap = [cm.jet(i / 255, bytes=True)[:3] for i in range(256)]
|
57 |
+
|
58 |
+
# Populate canvas, use maximum color_value for each bin
|
59 |
+
render = np.tile(np.expand_dims(np.zeros(canvas_size, dtype=np.uint8), axis=2), [1, 1, 3]) # type: ignore
|
60 |
+
color_value_array: npt.NDArray[np.float64] = -1 * np.ones(canvas_size, dtype=float) # type: ignore
|
61 |
+
for (col, row), color_value in zip(points.T, color_values.T):
|
62 |
+
if color_value > color_value_array[row, col]:
|
63 |
+
color_value_array[row, col] = color_value
|
64 |
+
render[row, col] = cmap[color_value]
|
65 |
+
|
66 |
+
Image.fromarray(render).save(f'/mnt/f/e2e/navsim_ours/debug/{name}.png')
|
det_map/data/pipelines/__init__.py
ADDED
File without changes
|
det_map/data/pipelines/color_utils.py
ADDED
@@ -0,0 +1,357 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
from typing import Callable, Union
|
3 |
+
|
4 |
+
import cv2
|
5 |
+
import numpy as np
|
6 |
+
import torch
|
7 |
+
|
8 |
+
def imnormalize_(img, mean, std, to_rgb=True):
|
9 |
+
"""Inplace normalize an image with mean and std.
|
10 |
+
|
11 |
+
Args:
|
12 |
+
img (ndarray): Image to be normalized.
|
13 |
+
mean (ndarray): The mean to be used for normalize.
|
14 |
+
std (ndarray): The std to be used for normalize.
|
15 |
+
to_rgb (bool): Whether to convert to rgb.
|
16 |
+
|
17 |
+
Returns:
|
18 |
+
ndarray: The normalized image.
|
19 |
+
"""
|
20 |
+
# cv2 inplace normalization does not accept uint8
|
21 |
+
assert img.dtype != np.uint8
|
22 |
+
mean = np.float64(mean.reshape(1, -1))
|
23 |
+
stdinv = 1 / np.float64(std.reshape(1, -1))
|
24 |
+
if to_rgb:
|
25 |
+
cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace
|
26 |
+
cv2.subtract(img, mean, img) # inplace
|
27 |
+
cv2.multiply(img, stdinv, img) # inplace
|
28 |
+
return img
|
29 |
+
|
30 |
+
|
31 |
+
def imnormalize(img, mean, std, to_rgb=True):
|
32 |
+
"""Normalize an image with mean and std.
|
33 |
+
|
34 |
+
Args:
|
35 |
+
img (ndarray): Image to be normalized.
|
36 |
+
mean (ndarray): The mean to be used for normalize.
|
37 |
+
std (ndarray): The std to be used for normalize.
|
38 |
+
to_rgb (bool): Whether to convert to rgb.
|
39 |
+
|
40 |
+
Returns:
|
41 |
+
ndarray: The normalized image.
|
42 |
+
"""
|
43 |
+
img = img.copy().astype(np.float32)
|
44 |
+
return imnormalize_(img, mean, std, to_rgb)
|
45 |
+
|
46 |
+
|
47 |
+
def mmlabNormalize(img):
|
48 |
+
mean = np.array([123.675, 116.28, 103.53], dtype=np.float32)
|
49 |
+
std = np.array([58.395, 57.12, 57.375], dtype=np.float32)
|
50 |
+
to_rgb = True
|
51 |
+
img = imnormalize(np.array(img), mean, std, to_rgb)
|
52 |
+
img = torch.tensor(img).float().permute(2, 0, 1).contiguous()
|
53 |
+
return img
|
54 |
+
|
55 |
+
|
56 |
+
def imconvert(img: np.ndarray, src: str, dst: str) -> np.ndarray:
|
57 |
+
"""Convert an image from the src colorspace to dst colorspace.
|
58 |
+
|
59 |
+
Args:
|
60 |
+
img (ndarray): The input image.
|
61 |
+
src (str): The source colorspace, e.g., 'rgb', 'hsv'.
|
62 |
+
dst (str): The destination colorspace, e.g., 'rgb', 'hsv'.
|
63 |
+
|
64 |
+
Returns:
|
65 |
+
ndarray: The converted image.
|
66 |
+
"""
|
67 |
+
code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}')
|
68 |
+
out_img = cv2.cvtColor(img, code)
|
69 |
+
return out_img
|
70 |
+
|
71 |
+
|
72 |
+
def bgr2gray(img: np.ndarray, keepdim: bool = False) -> np.ndarray:
|
73 |
+
"""Convert a BGR image to grayscale image.
|
74 |
+
|
75 |
+
Args:
|
76 |
+
img (ndarray): The input image.
|
77 |
+
keepdim (bool): If False (by default), then return the grayscale image
|
78 |
+
with 2 dims, otherwise 3 dims.
|
79 |
+
|
80 |
+
Returns:
|
81 |
+
ndarray: The converted grayscale image.
|
82 |
+
"""
|
83 |
+
out_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
84 |
+
if keepdim:
|
85 |
+
out_img = out_img[..., None]
|
86 |
+
return out_img
|
87 |
+
|
88 |
+
|
89 |
+
def rgb2gray(img: np.ndarray, keepdim: bool = False) -> np.ndarray:
|
90 |
+
"""Convert a RGB image to grayscale image.
|
91 |
+
|
92 |
+
Args:
|
93 |
+
img (ndarray): The input image.
|
94 |
+
keepdim (bool): If False (by default), then return the grayscale image
|
95 |
+
with 2 dims, otherwise 3 dims.
|
96 |
+
|
97 |
+
Returns:
|
98 |
+
ndarray: The converted grayscale image.
|
99 |
+
"""
|
100 |
+
out_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
|
101 |
+
if keepdim:
|
102 |
+
out_img = out_img[..., None]
|
103 |
+
return out_img
|
104 |
+
|
105 |
+
|
106 |
+
def gray2bgr(img: np.ndarray) -> np.ndarray:
|
107 |
+
"""Convert a grayscale image to BGR image.
|
108 |
+
|
109 |
+
Args:
|
110 |
+
img (ndarray): The input image.
|
111 |
+
|
112 |
+
Returns:
|
113 |
+
ndarray: The converted BGR image.
|
114 |
+
"""
|
115 |
+
img = img[..., None] if img.ndim == 2 else img
|
116 |
+
out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
117 |
+
return out_img
|
118 |
+
|
119 |
+
|
120 |
+
def gray2rgb(img: np.ndarray) -> np.ndarray:
|
121 |
+
"""Convert a grayscale image to RGB image.
|
122 |
+
|
123 |
+
Args:
|
124 |
+
img (ndarray): The input image.
|
125 |
+
|
126 |
+
Returns:
|
127 |
+
ndarray: The converted RGB image.
|
128 |
+
"""
|
129 |
+
img = img[..., None] if img.ndim == 2 else img
|
130 |
+
out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
|
131 |
+
return out_img
|
132 |
+
|
133 |
+
|
134 |
+
def _convert_input_type_range(img: np.ndarray) -> np.ndarray:
|
135 |
+
"""Convert the type and range of the input image.
|
136 |
+
|
137 |
+
It converts the input image to np.float32 type and range of [0, 1].
|
138 |
+
It is mainly used for pre-processing the input image in colorspace
|
139 |
+
conversion functions such as rgb2ycbcr and ycbcr2rgb.
|
140 |
+
|
141 |
+
Args:
|
142 |
+
img (ndarray): The input image. It accepts:
|
143 |
+
1. np.uint8 type with range [0, 255];
|
144 |
+
2. np.float32 type with range [0, 1].
|
145 |
+
|
146 |
+
Returns:
|
147 |
+
(ndarray): The converted image with type of np.float32 and range of
|
148 |
+
[0, 1].
|
149 |
+
"""
|
150 |
+
img_type = img.dtype
|
151 |
+
img = img.astype(np.float32)
|
152 |
+
if img_type == np.float32:
|
153 |
+
pass
|
154 |
+
elif img_type == np.uint8:
|
155 |
+
img /= 255.
|
156 |
+
else:
|
157 |
+
raise TypeError('The img type should be np.float32 or np.uint8, '
|
158 |
+
f'but got {img_type}')
|
159 |
+
return img
|
160 |
+
|
161 |
+
|
162 |
+
def _convert_output_type_range(
|
163 |
+
img: np.ndarray, dst_type: Union[np.uint8, np.float32]) -> np.ndarray:
|
164 |
+
"""Convert the type and range of the image according to dst_type.
|
165 |
+
|
166 |
+
It converts the image to desired type and range. If `dst_type` is np.uint8,
|
167 |
+
images will be converted to np.uint8 type with range [0, 255]. If
|
168 |
+
`dst_type` is np.float32, it converts the image to np.float32 type with
|
169 |
+
range [0, 1].
|
170 |
+
It is mainly used for post-processing images in colorspace conversion
|
171 |
+
functions such as rgb2ycbcr and ycbcr2rgb.
|
172 |
+
|
173 |
+
Args:
|
174 |
+
img (ndarray): The image to be converted with np.float32 type and
|
175 |
+
range [0, 255].
|
176 |
+
dst_type (np.uint8 | np.float32): If dst_type is np.uint8, it
|
177 |
+
converts the image to np.uint8 type with range [0, 255]. If
|
178 |
+
dst_type is np.float32, it converts the image to np.float32 type
|
179 |
+
with range [0, 1].
|
180 |
+
|
181 |
+
Returns:
|
182 |
+
(ndarray): The converted image with desired type and range.
|
183 |
+
"""
|
184 |
+
if dst_type not in (np.uint8, np.float32):
|
185 |
+
raise TypeError('The dst_type should be np.float32 or np.uint8, '
|
186 |
+
f'but got {dst_type}')
|
187 |
+
if dst_type == np.uint8:
|
188 |
+
img = img.round()
|
189 |
+
else:
|
190 |
+
img /= 255.
|
191 |
+
return img.astype(dst_type)
|
192 |
+
|
193 |
+
|
194 |
+
def rgb2ycbcr(img: np.ndarray, y_only: bool = False) -> np.ndarray:
|
195 |
+
"""Convert a RGB image to YCbCr image.
|
196 |
+
|
197 |
+
This function produces the same results as Matlab's `rgb2ycbcr` function.
|
198 |
+
It implements the ITU-R BT.601 conversion for standard-definition
|
199 |
+
television. See more details in
|
200 |
+
https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
|
201 |
+
|
202 |
+
It differs from a similar function in cv2.cvtColor: `RGB <-> YCrCb`.
|
203 |
+
In OpenCV, it implements a JPEG conversion. See more details in
|
204 |
+
https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
|
205 |
+
|
206 |
+
Args:
|
207 |
+
img (ndarray): The input image. It accepts:
|
208 |
+
1. np.uint8 type with range [0, 255];
|
209 |
+
2. np.float32 type with range [0, 1].
|
210 |
+
y_only (bool): Whether to only return Y channel. Default: False.
|
211 |
+
|
212 |
+
Returns:
|
213 |
+
ndarray: The converted YCbCr image. The output image has the same type
|
214 |
+
and range as input image.
|
215 |
+
"""
|
216 |
+
img_type = img.dtype
|
217 |
+
img = _convert_input_type_range(img)
|
218 |
+
if y_only:
|
219 |
+
out_img = np.dot(img, [65.481, 128.553, 24.966]) + 16.0
|
220 |
+
else:
|
221 |
+
out_img = np.matmul(
|
222 |
+
img, [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786],
|
223 |
+
[24.966, 112.0, -18.214]]) + [16, 128, 128]
|
224 |
+
out_img = _convert_output_type_range(out_img, img_type)
|
225 |
+
return out_img
|
226 |
+
|
227 |
+
|
228 |
+
def bgr2ycbcr(img: np.ndarray, y_only: bool = False) -> np.ndarray:
|
229 |
+
"""Convert a BGR image to YCbCr image.
|
230 |
+
|
231 |
+
The bgr version of rgb2ycbcr.
|
232 |
+
It implements the ITU-R BT.601 conversion for standard-definition
|
233 |
+
television. See more details in
|
234 |
+
https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
|
235 |
+
|
236 |
+
It differs from a similar function in cv2.cvtColor: `BGR <-> YCrCb`.
|
237 |
+
In OpenCV, it implements a JPEG conversion. See more details in
|
238 |
+
https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
|
239 |
+
|
240 |
+
Args:
|
241 |
+
img (ndarray): The input image. It accepts:
|
242 |
+
1. np.uint8 type with range [0, 255];
|
243 |
+
2. np.float32 type with range [0, 1].
|
244 |
+
y_only (bool): Whether to only return Y channel. Default: False.
|
245 |
+
|
246 |
+
Returns:
|
247 |
+
ndarray: The converted YCbCr image. The output image has the same type
|
248 |
+
and range as input image.
|
249 |
+
"""
|
250 |
+
img_type = img.dtype
|
251 |
+
img = _convert_input_type_range(img)
|
252 |
+
if y_only:
|
253 |
+
out_img = np.dot(img, [24.966, 128.553, 65.481]) + 16.0
|
254 |
+
else:
|
255 |
+
out_img = np.matmul(
|
256 |
+
img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786],
|
257 |
+
[65.481, -37.797, 112.0]]) + [16, 128, 128]
|
258 |
+
out_img = _convert_output_type_range(out_img, img_type)
|
259 |
+
return out_img
|
260 |
+
|
261 |
+
|
262 |
+
def ycbcr2rgb(img: np.ndarray) -> np.ndarray:
|
263 |
+
"""Convert a YCbCr image to RGB image.
|
264 |
+
|
265 |
+
This function produces the same results as Matlab's ycbcr2rgb function.
|
266 |
+
It implements the ITU-R BT.601 conversion for standard-definition
|
267 |
+
television. See more details in
|
268 |
+
https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
|
269 |
+
|
270 |
+
It differs from a similar function in cv2.cvtColor: `YCrCb <-> RGB`.
|
271 |
+
In OpenCV, it implements a JPEG conversion. See more details in
|
272 |
+
https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
|
273 |
+
|
274 |
+
Args:
|
275 |
+
img (ndarray): The input image. It accepts:
|
276 |
+
1. np.uint8 type with range [0, 255];
|
277 |
+
2. np.float32 type with range [0, 1].
|
278 |
+
|
279 |
+
Returns:
|
280 |
+
ndarray: The converted RGB image. The output image has the same type
|
281 |
+
and range as input image.
|
282 |
+
"""
|
283 |
+
img_type = img.dtype
|
284 |
+
img = _convert_input_type_range(img) * 255
|
285 |
+
out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621],
|
286 |
+
[0, -0.00153632, 0.00791071],
|
287 |
+
[0.00625893, -0.00318811, 0]]) * 255.0 + [
|
288 |
+
-222.921, 135.576, -276.836
|
289 |
+
]
|
290 |
+
out_img = _convert_output_type_range(out_img, img_type)
|
291 |
+
return out_img
|
292 |
+
|
293 |
+
|
294 |
+
def ycbcr2bgr(img: np.ndarray) -> np.ndarray:
|
295 |
+
"""Convert a YCbCr image to BGR image.
|
296 |
+
|
297 |
+
The bgr version of ycbcr2rgb.
|
298 |
+
It implements the ITU-R BT.601 conversion for standard-definition
|
299 |
+
television. See more details in
|
300 |
+
https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
|
301 |
+
|
302 |
+
It differs from a similar function in cv2.cvtColor: `YCrCb <-> BGR`.
|
303 |
+
In OpenCV, it implements a JPEG conversion. See more details in
|
304 |
+
https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
|
305 |
+
|
306 |
+
Args:
|
307 |
+
img (ndarray): The input image. It accepts:
|
308 |
+
1. np.uint8 type with range [0, 255];
|
309 |
+
2. np.float32 type with range [0, 1].
|
310 |
+
|
311 |
+
Returns:
|
312 |
+
ndarray: The converted BGR image. The output image has the same type
|
313 |
+
and range as input image.
|
314 |
+
"""
|
315 |
+
img_type = img.dtype
|
316 |
+
img = _convert_input_type_range(img) * 255
|
317 |
+
out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621],
|
318 |
+
[0.00791071, -0.00153632, 0],
|
319 |
+
[0, -0.00318811, 0.00625893]]) * 255.0 + [
|
320 |
+
-276.836, 135.576, -222.921
|
321 |
+
]
|
322 |
+
out_img = _convert_output_type_range(out_img, img_type)
|
323 |
+
return out_img
|
324 |
+
|
325 |
+
|
326 |
+
def convert_color_factory(src: str, dst: str) -> Callable:
|
327 |
+
|
328 |
+
code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}')
|
329 |
+
|
330 |
+
def convert_color(img: np.ndarray) -> np.ndarray:
|
331 |
+
out_img = cv2.cvtColor(img, code)
|
332 |
+
return out_img
|
333 |
+
|
334 |
+
convert_color.__doc__ = f"""Convert a {src.upper()} image to {dst.upper()}
|
335 |
+
image.
|
336 |
+
|
337 |
+
Args:
|
338 |
+
img (ndarray or str): The input image.
|
339 |
+
|
340 |
+
Returns:
|
341 |
+
ndarray: The converted {dst.upper()} image.
|
342 |
+
"""
|
343 |
+
|
344 |
+
return convert_color
|
345 |
+
|
346 |
+
|
347 |
+
bgr2rgb = convert_color_factory('bgr', 'rgb')
|
348 |
+
|
349 |
+
rgb2bgr = convert_color_factory('rgb', 'bgr')
|
350 |
+
|
351 |
+
bgr2hsv = convert_color_factory('bgr', 'hsv')
|
352 |
+
|
353 |
+
hsv2bgr = convert_color_factory('hsv', 'bgr')
|
354 |
+
|
355 |
+
bgr2hls = convert_color_factory('bgr', 'hls')
|
356 |
+
|
357 |
+
hls2bgr = convert_color_factory('hls', 'bgr')
|
det_map/data/pipelines/filter_lidar.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from typing import Tuple
|
3 |
+
|
4 |
+
import torch
|
5 |
+
|
6 |
+
|
7 |
+
class LiDARFilter(object):
|
8 |
+
def __init__(self,
|
9 |
+
close_radius=1.0,
|
10 |
+
x_range='(-50.0, 50.0)',
|
11 |
+
y_range='(-50.0, 50.0)',
|
12 |
+
z_range='(-5, 20)',
|
13 |
+
):
|
14 |
+
self.radius = close_radius
|
15 |
+
self.x_range = eval(x_range)
|
16 |
+
self.y_range = eval(y_range)
|
17 |
+
self.z_range = eval(z_range)
|
18 |
+
|
19 |
+
def _remove_close(self, points, radius=1.0):
|
20 |
+
"""Removes point too close within a certain radius from origin.
|
21 |
+
|
22 |
+
Args:
|
23 |
+
points (np.ndarray | :obj:`BasePoints`): Sweep points.
|
24 |
+
radius (float, optional): Radius below which points are removed.
|
25 |
+
Defaults to 1.0.
|
26 |
+
|
27 |
+
Returns:
|
28 |
+
np.ndarray: Points after removing.
|
29 |
+
"""
|
30 |
+
x_filt = torch.abs(points[:, 0]) < radius
|
31 |
+
y_filt = torch.abs(points[:, 1]) < radius
|
32 |
+
not_close = torch.logical_not(torch.logical_and(x_filt, y_filt))
|
33 |
+
return points[not_close]
|
34 |
+
|
35 |
+
def range_filter(
|
36 |
+
self,
|
37 |
+
points,
|
38 |
+
xrange: Tuple[float, float] = (-np.inf, np.inf),
|
39 |
+
yrange: Tuple[float, float] = (-np.inf, np.inf),
|
40 |
+
zrange: Tuple[float, float] = (-np.inf, np.inf),
|
41 |
+
) -> None:
|
42 |
+
"""
|
43 |
+
Restricts points to specified ranges.
|
44 |
+
:param xrange: (xmin, xmax).
|
45 |
+
:param yrange: (ymin, ymax).
|
46 |
+
:param zrange: (zmin, zmax).
|
47 |
+
"""
|
48 |
+
# Figure out which points to keep.
|
49 |
+
keep_x = torch.logical_and(xrange[0] <= points[:, 0], points[:, 0] <= xrange[1])
|
50 |
+
keep_y = torch.logical_and(yrange[0] <= points[:, 1], points[:, 1] <= yrange[1])
|
51 |
+
keep_z = torch.logical_and(zrange[0] <= points[:, 2], points[:, 2] <= zrange[1])
|
52 |
+
keep = torch.logical_and(keep_x, torch.logical_and(keep_y, keep_z))
|
53 |
+
return points[keep]
|
54 |
+
|
55 |
+
|
56 |
+
def __call__(self, features, targets):
|
57 |
+
"""Call function to load multi-sweep point clouds from files.
|
58 |
+
|
59 |
+
Args:
|
60 |
+
results (dict): Result dict containing multi-sweep point cloud
|
61 |
+
filenames.
|
62 |
+
|
63 |
+
Returns:
|
64 |
+
dict: The result dict containing the multi-sweep points data.
|
65 |
+
Added key and value are described below.
|
66 |
+
|
67 |
+
- points (np.ndarray | :obj:`BasePoints`): Multi-sweep point
|
68 |
+
cloud arrays.
|
69 |
+
"""
|
70 |
+
points = torch.cat(features['lidars_warped'], 0)
|
71 |
+
points = self._remove_close(points, self.radius)
|
72 |
+
points = self.range_filter(points, self.x_range, self.y_range, self.z_range)
|
73 |
+
features['lidar'] = points
|
74 |
+
return features, targets
|
det_map/data/pipelines/lidar_aug.py
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
from nuplan.common.actor_state.tracked_objects_types import (
|
4 |
+
TrackedObjectType,
|
5 |
+
)
|
6 |
+
|
7 |
+
OBJECT_TYPE_DICT = {
|
8 |
+
"vehicle": TrackedObjectType.VEHICLE,
|
9 |
+
"pedestrian": TrackedObjectType.PEDESTRIAN,
|
10 |
+
"bicycle": TrackedObjectType.BICYCLE,
|
11 |
+
"traffic_cone": TrackedObjectType.TRAFFIC_CONE,
|
12 |
+
"barrier": TrackedObjectType.BARRIER,
|
13 |
+
"czone_sign": TrackedObjectType.CZONE_SIGN,
|
14 |
+
"generic_object": TrackedObjectType.GENERIC_OBJECT,
|
15 |
+
}
|
16 |
+
|
17 |
+
|
18 |
+
def limit_period(val, offset=0.5, period=2 * np.pi):
|
19 |
+
"""Limit the value into a period for periodic function.
|
20 |
+
|
21 |
+
Args:
|
22 |
+
val (torch.Tensor | np.ndarray): The value to be converted.
|
23 |
+
offset (float, optional): Offset to set the value range.
|
24 |
+
Defaults to 0.5.
|
25 |
+
period ([type], optional): Period of the value. Defaults to np.pi.
|
26 |
+
|
27 |
+
Returns:
|
28 |
+
(torch.Tensor | np.ndarray): Value in the range of
|
29 |
+
[-offset * period, (1-offset) * period]
|
30 |
+
"""
|
31 |
+
limited_val = val - torch.floor(val / period + offset) * period
|
32 |
+
return limited_val
|
33 |
+
|
34 |
+
|
35 |
+
class LiDARAug(object):
|
36 |
+
def __init__(self,
|
37 |
+
bda_aug_conf, is_train,
|
38 |
+
x_range='(-50.0, 50.0)',
|
39 |
+
y_range='(-50.0, 50.0)',
|
40 |
+
z_range='(-5, 20)',
|
41 |
+
):
|
42 |
+
for k in ['rot_lim', 'scale_lim', 'tran_lim']:
|
43 |
+
bda_aug_conf[k] = eval(bda_aug_conf[k])
|
44 |
+
self.bda_aug_conf = bda_aug_conf
|
45 |
+
self.is_train = False
|
46 |
+
self.x_range = eval(x_range)
|
47 |
+
self.y_range = eval(y_range)
|
48 |
+
self.z_range = eval(z_range)
|
49 |
+
|
50 |
+
def sample_bda_augmentation(self):
|
51 |
+
"""Generate bda augmentation values based on bda_config."""
|
52 |
+
if self.is_train:
|
53 |
+
rotate_bda = np.random.uniform(*self.bda_aug_conf['rot_lim'])
|
54 |
+
scale_bda = np.random.uniform(*self.bda_aug_conf['scale_lim'])
|
55 |
+
flip_dx = np.random.uniform() < self.bda_aug_conf['flip_dx_ratio']
|
56 |
+
flip_dy = np.random.uniform() < self.bda_aug_conf['flip_dy_ratio']
|
57 |
+
translation_std = self.bda_aug_conf.get('tran_lim', [0.0, 0.0, 0.0])
|
58 |
+
tran_bda = np.random.normal(scale=translation_std, size=3).T
|
59 |
+
else:
|
60 |
+
rotate_bda = 0
|
61 |
+
scale_bda = 1.0
|
62 |
+
flip_dx = False
|
63 |
+
flip_dy = False
|
64 |
+
tran_bda = np.zeros((1, 3), dtype=np.float32)
|
65 |
+
return rotate_bda, scale_bda, flip_dx, flip_dy, tran_bda
|
66 |
+
|
67 |
+
def bev_transform(self, gt_boxes, rotate_angle, scale_ratio, flip_dx,
|
68 |
+
flip_dy, tran_bda, rot_mat):
|
69 |
+
if gt_boxes.shape[0] > 0:
|
70 |
+
gt_boxes[:, :3] = (
|
71 |
+
rot_mat @ gt_boxes[:, :3].unsqueeze(-1)).squeeze(-1)
|
72 |
+
gt_boxes[:, 3:6] *= scale_ratio
|
73 |
+
gt_boxes[:, 6] += rotate_angle
|
74 |
+
if flip_dx:
|
75 |
+
gt_boxes[:,
|
76 |
+
6] = 2 * torch.asin(torch.tensor(1.0)) - gt_boxes[:,
|
77 |
+
6]
|
78 |
+
if flip_dy:
|
79 |
+
gt_boxes[:, 6] = -gt_boxes[:, 6]
|
80 |
+
gt_boxes[:, 7:] = (
|
81 |
+
rot_mat[:2, :2] @ gt_boxes[:, 7:].unsqueeze(-1)).squeeze(-1)
|
82 |
+
gt_boxes[:, :3] = gt_boxes[:, :3] + tran_bda
|
83 |
+
return gt_boxes
|
84 |
+
|
85 |
+
def __call__(self, features, targets):
|
86 |
+
# 1. filter box based on ranges
|
87 |
+
# 2. filter label based on classes
|
88 |
+
if 'dets' in targets and 'labels' in targets:
|
89 |
+
boxes = targets['dets']
|
90 |
+
labels = targets['labels']
|
91 |
+
|
92 |
+
for t, (box, label) in enumerate(zip(boxes, labels)):
|
93 |
+
label_mask = np.array([n in OBJECT_TYPE_DICT for n in label], dtype=np.bool_)
|
94 |
+
label_mask = torch.from_numpy(label_mask)
|
95 |
+
range_mask = ((box[:, 0] > self.x_range[0]) &
|
96 |
+
(box[:, 0] < self.x_range[1]) &
|
97 |
+
(box[:, 1] > self.y_range[0]) &
|
98 |
+
(box[:, 1] < self.y_range[1]))
|
99 |
+
mask = range_mask & label_mask
|
100 |
+
box_of_interest = box[mask]
|
101 |
+
box_of_interest[:, 6] = limit_period(box_of_interest[:, 6])
|
102 |
+
boxes[t] = box_of_interest.float()
|
103 |
+
|
104 |
+
labels[t] = torch.from_numpy(np.array([OBJECT_TYPE_DICT[x].value for
|
105 |
+
x in label], dtype=np.int64))[mask]
|
106 |
+
targets['dets'] = boxes
|
107 |
+
targets['labels'] = labels
|
108 |
+
|
109 |
+
rotate_bda, scale_bda, flip_dx, flip_dy, tran_bda = \
|
110 |
+
self.sample_bda_augmentation()
|
111 |
+
bda_mat = torch.zeros(4, 4)
|
112 |
+
bda_mat[3, 3] = 1
|
113 |
+
rotate_angle = torch.tensor(rotate_bda / 180 * np.pi)
|
114 |
+
rot_sin = torch.sin(rotate_angle)
|
115 |
+
rot_cos = torch.cos(rotate_angle)
|
116 |
+
rot_mat = torch.Tensor([[rot_cos, -rot_sin, 0], [rot_sin, rot_cos, 0],
|
117 |
+
[0, 0, 1]])
|
118 |
+
scale_mat = torch.Tensor([[scale_bda, 0, 0], [0, scale_bda, 0],
|
119 |
+
[0, 0, scale_bda]])
|
120 |
+
flip_mat = torch.Tensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
|
121 |
+
if flip_dx:
|
122 |
+
flip_mat = flip_mat @ torch.Tensor([[-1, 0, 0], [0, 1, 0],
|
123 |
+
[0, 0, 1]])
|
124 |
+
if flip_dy:
|
125 |
+
flip_mat = flip_mat @ torch.Tensor([[1, 0, 0], [0, -1, 0],
|
126 |
+
[0, 0, 1]])
|
127 |
+
bda_rot = flip_mat @ (scale_mat @ rot_mat)
|
128 |
+
|
129 |
+
if 'dets' in targets:
|
130 |
+
for idx, boxes in enumerate(targets['dets']):
|
131 |
+
targets['dets'][idx] = self.bev_transform(boxes, rotate_bda, scale_bda,
|
132 |
+
flip_dx, flip_dy, tran_bda, bda_rot)
|
133 |
+
# print('before bda')
|
134 |
+
# print(features['lidars_warped'][-1][:, 0].max())
|
135 |
+
# print(features['lidars_warped'][-1][:, 0].min())
|
136 |
+
# print(features['lidars_warped'][-1][:, 1].max())
|
137 |
+
# print(features['lidars_warped'][-1][:, 1].min())
|
138 |
+
for idx, points in enumerate(features['lidars_warped']):
|
139 |
+
points_aug = (bda_rot @ points[:, :3].unsqueeze(-1)).squeeze(-1)
|
140 |
+
points[:, :3] = points_aug + tran_bda
|
141 |
+
features['lidars_warped'][idx] = points
|
142 |
+
|
143 |
+
# print('after bda')
|
144 |
+
# print(features['lidars_warped'][-1][:, 0].max())
|
145 |
+
# print(features['lidars_warped'][-1][:, 0].min())
|
146 |
+
# print(features['lidars_warped'][-1][:, 1].max())
|
147 |
+
# print(features['lidars_warped'][-1][:, 1].min())
|
148 |
+
bda_mat[:3, :3] = bda_rot
|
149 |
+
bda_mat[:3, 3] = torch.from_numpy(tran_bda)
|
150 |
+
features['bda'] = bda_mat
|
151 |
+
return features, targets
|
det_map/data/pipelines/point_shuffle.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from typing import Tuple
|
3 |
+
|
4 |
+
import torch
|
5 |
+
|
6 |
+
|
7 |
+
class PointShuffle(object):
|
8 |
+
def __init__(self, is_train):
|
9 |
+
self.is_train = is_train
|
10 |
+
|
11 |
+
def __call__(self, features, targets):
|
12 |
+
if self.is_train:
|
13 |
+
points = features['lidar']
|
14 |
+
cnt = points.shape[0]
|
15 |
+
idx = torch.randperm(cnt, device=points.device)
|
16 |
+
features['lidar'] = points[idx]
|
17 |
+
return features, targets
|
det_map/data/pipelines/prepare_depth.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import numpy as np
|
3 |
+
import PIL.Image as Image
|
4 |
+
|
5 |
+
class LiDAR2Depth(object):
|
6 |
+
|
7 |
+
def __init__(self,
|
8 |
+
grid_config,
|
9 |
+
):
|
10 |
+
self.x = eval(grid_config['x'])
|
11 |
+
self.y = eval(grid_config['y'])
|
12 |
+
self.z = eval(grid_config['z'])
|
13 |
+
self.depth = eval(grid_config['depth'])
|
14 |
+
|
15 |
+
def points2depthmap(self, points, height, width):
|
16 |
+
height, width = height, width
|
17 |
+
depth_map = torch.zeros((height, width), dtype=torch.float32)
|
18 |
+
coor = torch.round(points[:, :2])
|
19 |
+
depth = points[:, 2]
|
20 |
+
kept1 = (coor[:, 0] >= 0) & (coor[:, 0] < width) & (
|
21 |
+
coor[:, 1] >= 0) & (coor[:, 1] < height) & (
|
22 |
+
depth < self.depth[1]) & (
|
23 |
+
depth >= self.depth[0])
|
24 |
+
coor, depth = coor[kept1], depth[kept1]
|
25 |
+
ranks = coor[:, 0] + coor[:, 1] * width
|
26 |
+
sort = (ranks + depth / 100.).argsort()
|
27 |
+
coor, depth, ranks = coor[sort], depth[sort], ranks[sort]
|
28 |
+
|
29 |
+
kept2 = torch.ones(coor.shape[0], device=coor.device, dtype=torch.bool)
|
30 |
+
kept2[1:] = (ranks[1:] != ranks[:-1])
|
31 |
+
coor, depth = coor[kept2], depth[kept2]
|
32 |
+
coor = coor.to(torch.long)
|
33 |
+
depth_map[coor[:, 1], coor[:, 0]] = depth
|
34 |
+
return depth_map
|
35 |
+
|
36 |
+
def __call__(self, features, targets):
|
37 |
+
# points, img, sensor2lidar_rotation, sensor2lidar_translation, intrinsics,
|
38 |
+
# post_rot, post_tran
|
39 |
+
# List: length=frames
|
40 |
+
lidar_all_frames = features['lidars_warped']
|
41 |
+
# image: T, N_CAMS, C, H, W
|
42 |
+
T, N, _, H, W = features['image'].shape
|
43 |
+
rots, trans, intrinsics = (features['sensor2lidar_rotation'],
|
44 |
+
features['sensor2lidar_translation'],
|
45 |
+
features['intrinsics'])
|
46 |
+
post_rot, post_tran, bda = (features['post_rot'],
|
47 |
+
features['post_tran'], features['bda'])
|
48 |
+
|
49 |
+
t = -1
|
50 |
+
depth_t = []
|
51 |
+
lidar_t = lidar_all_frames[t][:, :3]
|
52 |
+
lidar_t = lidar_t - bda[:3, 3].view(1, 3)
|
53 |
+
lidar_t = lidar_t.matmul(torch.inverse(bda[:3, :3]).T)
|
54 |
+
|
55 |
+
# print('cancel bda')
|
56 |
+
# print(lidar_t[:, 0].max())
|
57 |
+
# print(lidar_t[:, 0].min())
|
58 |
+
# print(lidar_t[:, 1].max())
|
59 |
+
# print(lidar_t[:, 1].min())
|
60 |
+
|
61 |
+
for n in range(N):
|
62 |
+
points_img = lidar_t - trans[t, n:n + 1, :]
|
63 |
+
lidar2cam_rot = torch.inverse(rots[t, n])
|
64 |
+
# lidar2cam, cam2img
|
65 |
+
points_img = points_img.matmul(lidar2cam_rot.T).matmul(intrinsics[t, n].T)
|
66 |
+
points_img = torch.cat(
|
67 |
+
[points_img[:, :2] / points_img[:, 2:3], points_img[:, 2:3]],
|
68 |
+
1)
|
69 |
+
points_img = points_img.matmul(
|
70 |
+
post_rot[t, n].T) + post_tran[t, n:n + 1, :]
|
71 |
+
depth_curr = self.points2depthmap(points_img, features['canvas'][-1, n].shape[0], features['canvas'][-1, n].shape[1])
|
72 |
+
depth_t.append(depth_curr)
|
73 |
+
# Image.fromarray((1- depth_curr.clamp(0,1)).cpu().numpy() * 255).convert('L').save(f'/mnt/f/e2e/navsim_ours/debug/depth{n}.png')
|
74 |
+
# Image.fromarray(features['canvas'][-1, n].cpu().numpy().astype(np.uint8)).convert('RGB').save(f'/mnt/f/e2e/navsim_ours/debug/canvas{n}.png')
|
75 |
+
features['gt_depth'] = torch.stack(depth_t)
|
76 |
+
return features, targets
|
det_map/data/pipelines/prepare_img.py
ADDED
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import numpy as np
|
3 |
+
import torch
|
4 |
+
from PIL import Image
|
5 |
+
|
6 |
+
from det_map.data.datasets.dataclasses import Camera
|
7 |
+
from det_map.data.pipelines.color_utils import bgr2hsv, hsv2bgr, mmlabNormalize
|
8 |
+
|
9 |
+
|
10 |
+
class PrepareImageInputs(object):
|
11 |
+
"""Load multi channel images from a list of separate channel files.
|
12 |
+
|
13 |
+
Expects results['img_filename'] to be a list of filenames.
|
14 |
+
|
15 |
+
Args:
|
16 |
+
to_float32 (bool): Whether to convert the img to float32.
|
17 |
+
Defaults to False.
|
18 |
+
color_type (str): Color type of the file. Defaults to 'unchanged'.
|
19 |
+
"""
|
20 |
+
|
21 |
+
def __init__(
|
22 |
+
self,
|
23 |
+
data_config,
|
24 |
+
is_train=False,
|
25 |
+
opencv_pp=False,
|
26 |
+
):
|
27 |
+
self.is_train = is_train
|
28 |
+
self.data_config = data_config
|
29 |
+
self.normalize_img = mmlabNormalize
|
30 |
+
self.opencv_pp = opencv_pp
|
31 |
+
|
32 |
+
def get_rot(self, h):
|
33 |
+
return torch.Tensor([
|
34 |
+
[np.cos(h), np.sin(h)],
|
35 |
+
[-np.sin(h), np.cos(h)],
|
36 |
+
])
|
37 |
+
|
38 |
+
def img_transform(self, img, post_rot, post_tran, resize, resize_dims,
|
39 |
+
crop, flip, rotate):
|
40 |
+
# adjust image
|
41 |
+
if not self.opencv_pp:
|
42 |
+
img = self.img_transform_core(img, resize_dims, crop, flip, rotate)
|
43 |
+
|
44 |
+
# post-homography transformation
|
45 |
+
post_rot *= resize
|
46 |
+
post_tran -= torch.Tensor(crop[:2])
|
47 |
+
if flip:
|
48 |
+
A = torch.Tensor([[-1, 0], [0, 1]])
|
49 |
+
b = torch.Tensor([crop[2] - crop[0], 0])
|
50 |
+
post_rot = A.matmul(post_rot)
|
51 |
+
post_tran = A.matmul(post_tran) + b
|
52 |
+
A = self.get_rot(rotate / 180 * np.pi)
|
53 |
+
b = torch.Tensor([crop[2] - crop[0], crop[3] - crop[1]]) / 2
|
54 |
+
b = A.matmul(-b) + b
|
55 |
+
post_rot = A.matmul(post_rot)
|
56 |
+
post_tran = A.matmul(post_tran) + b
|
57 |
+
if self.opencv_pp:
|
58 |
+
img = self.img_transform_core_opencv(img, post_rot, post_tran, crop)
|
59 |
+
return img, post_rot, post_tran
|
60 |
+
|
61 |
+
def img_transform_core_opencv(self, img, post_rot, post_tran,
|
62 |
+
crop):
|
63 |
+
img = np.array(img).astype(np.float32)
|
64 |
+
img = cv2.warpAffine(img,
|
65 |
+
np.concatenate([post_rot,
|
66 |
+
post_tran.reshape(2, 1)],
|
67 |
+
axis=1),
|
68 |
+
(crop[2] - crop[0], crop[3] - crop[1]),
|
69 |
+
flags=cv2.INTER_LINEAR)
|
70 |
+
return img
|
71 |
+
|
72 |
+
def img_transform_core(self, img, resize_dims, crop, flip, rotate):
|
73 |
+
# adjust image
|
74 |
+
img = img.resize(resize_dims)
|
75 |
+
img = img.crop(crop)
|
76 |
+
if flip:
|
77 |
+
img = img.transpose(method=Image.FLIP_LEFT_RIGHT)
|
78 |
+
img = img.rotate(rotate)
|
79 |
+
return img
|
80 |
+
|
81 |
+
def sample_augmentation(self, H, W, flip=None, scale=None):
|
82 |
+
fH, fW = eval(self.data_config['input_size'])
|
83 |
+
if self.is_train:
|
84 |
+
resize = float(fW) / float(W)
|
85 |
+
resize += np.random.uniform(*eval(self.data_config['resize']))
|
86 |
+
resize_dims = (int(W * resize), int(H * resize))
|
87 |
+
newW, newH = resize_dims
|
88 |
+
random_crop_height = \
|
89 |
+
self.data_config.get('random_crop_height', False)
|
90 |
+
if random_crop_height:
|
91 |
+
crop_h = int(np.random.uniform(max(0.3 * newH, newH - fH),
|
92 |
+
newH - fH))
|
93 |
+
else:
|
94 |
+
crop_h = \
|
95 |
+
int((1 - np.random.uniform(*eval(self.data_config['crop_h']))) *
|
96 |
+
newH) - fH
|
97 |
+
crop_w = int(np.random.uniform(0, max(0, newW - fW)))
|
98 |
+
crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)
|
99 |
+
flip = self.data_config['flip'] and np.random.choice([0, 1])
|
100 |
+
rotate = np.random.uniform(*eval(self.data_config['rot']))
|
101 |
+
if self.data_config.get('vflip', False) and np.random.choice([0, 1]):
|
102 |
+
rotate += 180
|
103 |
+
else:
|
104 |
+
resize = float(fW) / float(W)
|
105 |
+
if scale is not None:
|
106 |
+
resize += scale
|
107 |
+
else:
|
108 |
+
resize += self.data_config.get('resize_test', 0.0)
|
109 |
+
resize_dims = (int(W * resize), int(H * resize))
|
110 |
+
newW, newH = resize_dims
|
111 |
+
crop_h = int((1 - np.mean(eval(self.data_config['crop_h']))) * newH) - fH
|
112 |
+
crop_w = int(max(0, newW - fW) / 2)
|
113 |
+
crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)
|
114 |
+
flip = False if flip is None else flip
|
115 |
+
rotate = 0
|
116 |
+
return resize, resize_dims, crop, flip, rotate
|
117 |
+
|
118 |
+
def photo_metric_distortion(self, img, pmd):
|
119 |
+
"""Call function to perform photometric distortion on images.
|
120 |
+
Args:
|
121 |
+
results (dict): Result dict from loading pipeline.
|
122 |
+
Returns:
|
123 |
+
dict: Result dict with images distorted.
|
124 |
+
"""
|
125 |
+
if np.random.rand() > pmd.get('rate', 1.0):
|
126 |
+
return img
|
127 |
+
|
128 |
+
img = np.array(img).astype(np.float32)
|
129 |
+
assert img.dtype == np.float32, \
|
130 |
+
'PhotoMetricDistortion needs the input image of dtype np.float32,' \
|
131 |
+
' please set "to_float32=True" in "LoadImageFromFile" pipeline'
|
132 |
+
# random brightness
|
133 |
+
if np.random.randint(2):
|
134 |
+
delta = np.random.uniform(-pmd['brightness_delta'],
|
135 |
+
pmd['brightness_delta'])
|
136 |
+
img += delta
|
137 |
+
|
138 |
+
# mode == 0 --> do random contrast first
|
139 |
+
# mode == 1 --> do random contrast last
|
140 |
+
mode = np.random.randint(2)
|
141 |
+
if mode == 1:
|
142 |
+
if np.random.randint(2):
|
143 |
+
alpha = np.random.uniform(pmd['contrast_lower'],
|
144 |
+
pmd['contrast_upper'])
|
145 |
+
img *= alpha
|
146 |
+
|
147 |
+
# convert color from BGR to HSV
|
148 |
+
img = bgr2hsv(img)
|
149 |
+
|
150 |
+
# random saturation
|
151 |
+
if np.random.randint(2):
|
152 |
+
img[..., 1] *= np.random.uniform(pmd['saturation_lower'],
|
153 |
+
pmd['saturation_upper'])
|
154 |
+
|
155 |
+
# random hue
|
156 |
+
if np.random.randint(2):
|
157 |
+
img[..., 0] += np.random.uniform(-pmd['hue_delta'], pmd['hue_delta'])
|
158 |
+
img[..., 0][img[..., 0] > 360] -= 360
|
159 |
+
img[..., 0][img[..., 0] < 0] += 360
|
160 |
+
|
161 |
+
# convert color from HSV to BGR
|
162 |
+
img = hsv2bgr(img)
|
163 |
+
|
164 |
+
# random contrast
|
165 |
+
if mode == 0:
|
166 |
+
if np.random.randint(2):
|
167 |
+
alpha = np.random.uniform(pmd['contrast_lower'],
|
168 |
+
pmd['contrast_upper'])
|
169 |
+
img *= alpha
|
170 |
+
|
171 |
+
# randomly swap channels
|
172 |
+
if np.random.randint(2):
|
173 |
+
img = img[..., np.random.permutation(3)]
|
174 |
+
return Image.fromarray(img.astype(np.uint8))
|
175 |
+
|
176 |
+
def get_inputs(self, cam: Camera, flip=None, scale=None):
|
177 |
+
|
178 |
+
img = Image.fromarray(cam.image)
|
179 |
+
# original copy of image
|
180 |
+
cam.canvas = torch.tensor(np.array(img))
|
181 |
+
|
182 |
+
post_rot = torch.eye(2)
|
183 |
+
post_tran = torch.zeros(2)
|
184 |
+
|
185 |
+
# image view augmentation (resize, crop, horizontal flip, rotate)
|
186 |
+
img_augs = self.sample_augmentation(
|
187 |
+
H=img.height, W=img.width, flip=flip, scale=scale)
|
188 |
+
resize, resize_dims, crop, flip, rotate = img_augs
|
189 |
+
img, post_rot2, post_tran2 = \
|
190 |
+
self.img_transform(img, post_rot,
|
191 |
+
post_tran,
|
192 |
+
resize=resize,
|
193 |
+
resize_dims=resize_dims,
|
194 |
+
crop=crop,
|
195 |
+
flip=flip,
|
196 |
+
rotate=rotate)
|
197 |
+
|
198 |
+
# for convenience, make augmentation matrices 3x3
|
199 |
+
post_tran = torch.zeros(3)
|
200 |
+
post_rot = torch.eye(3)
|
201 |
+
post_tran[:2] = post_tran2
|
202 |
+
post_rot[:2, :2] = post_rot2
|
203 |
+
|
204 |
+
if self.is_train and self.data_config.get('pmd', None) is not None:
|
205 |
+
img = self.photo_metric_distortion(img, self.data_config['pmd'])
|
206 |
+
|
207 |
+
# original image
|
208 |
+
cam.image = self.normalize_img(img)
|
209 |
+
cam.post_rot = post_rot
|
210 |
+
cam.post_tran = post_tran
|
211 |
+
cam.sensor2lidar_rotation = torch.tensor(cam.sensor2lidar_rotation)
|
212 |
+
cam.sensor2lidar_translation = torch.tensor(cam.sensor2lidar_translation)
|
213 |
+
cam.intrinsics = torch.tensor(cam.intrinsics)
|
214 |
+
cam.distortion = torch.tensor(cam.distortion)
|
215 |
+
return cam
|
216 |
+
|
217 |
+
def __call__(self, results):
|
218 |
+
return self.get_inputs(results)
|
det_map/det/__init__.py
ADDED
File without changes
|
det_map/det/dal/__init__.py
ADDED
File without changes
|
det_map/det/dal/dal.py
ADDED
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Phigent Robotics. All rights reserved.
|
2 |
+
import torch
|
3 |
+
|
4 |
+
from det_map.det.dal.mmdet3d.models.detectors.bevdet import BEVDet
|
5 |
+
from det_map.det.dal.mmdet3d.models.utils import FFN
|
6 |
+
from det_map.det.dal.mmdet3d.models.utils.spconv_voxelize import SPConvVoxelization
|
7 |
+
try:
|
8 |
+
from det_map.det.dal.mmdet3d.models import *
|
9 |
+
from det_map.det.dal.mmdet3d.core import *
|
10 |
+
except Exception:
|
11 |
+
raise Exception
|
12 |
+
|
13 |
+
class DAL(BEVDet):
|
14 |
+
def __init__(self, **kwargs):
|
15 |
+
super(DAL, self).__init__(**kwargs)
|
16 |
+
|
17 |
+
# image view auxiliary task heads
|
18 |
+
self.num_cls = self.pts_bbox_head.num_classes
|
19 |
+
heads = dict(heatmap=(self.num_cls, 2))
|
20 |
+
input_feat_dim = kwargs['pts_bbox_head']['hidden_channel']
|
21 |
+
self.auxiliary_heads = FFN(
|
22 |
+
input_feat_dim,
|
23 |
+
heads,
|
24 |
+
conv_cfg=dict(type="Conv1d"),
|
25 |
+
norm_cfg=dict(type="BN1d"),
|
26 |
+
bias=True)
|
27 |
+
self.auxiliary_heads.init_weights()
|
28 |
+
|
29 |
+
pts_voxel_cfg = kwargs.get('pts_voxel_layer', None)
|
30 |
+
if pts_voxel_cfg:
|
31 |
+
pts_voxel_cfg['num_point_features'] = 5
|
32 |
+
self.pts_voxel_layer = SPConvVoxelization(**pts_voxel_cfg)
|
33 |
+
|
34 |
+
def extract_img_feat(self, img, img_metas):
|
35 |
+
"""Extract features of images."""
|
36 |
+
img = self.prepare_inputs(img)
|
37 |
+
x, _ = self.image_encoder(img[0])
|
38 |
+
return [x] + img[1:]
|
39 |
+
|
40 |
+
def extract_feat(self, points, img, img_metas):
|
41 |
+
"""Extract features from images and points."""
|
42 |
+
img_feats = self.extract_img_feat(img, img_metas)
|
43 |
+
pts_feats = self.extract_pts_feat(points, img_feats, img_metas)
|
44 |
+
return (img_feats, pts_feats)
|
45 |
+
|
46 |
+
def forward_img_auxiliary_train(self,
|
47 |
+
x,
|
48 |
+
img_metas,
|
49 |
+
gt_bboxes,
|
50 |
+
gt_labels,
|
51 |
+
gt_bboxes_ignore=None,
|
52 |
+
proposals=None,
|
53 |
+
**kwargs):
|
54 |
+
max_instance = 150
|
55 |
+
num_pos = 0
|
56 |
+
centers_augego = x[0].new_zeros((len(gt_bboxes), max_instance, 3))
|
57 |
+
box_targets_all = x[0].new_zeros((len(gt_bboxes), max_instance, 10))
|
58 |
+
valid_mask = x[0].new_zeros((len(gt_bboxes), max_instance, 1))
|
59 |
+
label = x[0].new_zeros((len(gt_bboxes), max_instance, 1)).to(torch.long)
|
60 |
+
for sid in range(len(gt_bboxes)):
|
61 |
+
centers_augego_tmp = gt_bboxes[sid].gravity_center.to(x[0])
|
62 |
+
box_targets_tmp = self.pts_bbox_head.bbox_coder.encode(gt_bboxes[sid].tensor)
|
63 |
+
if gt_bboxes_ignore is not None:
|
64 |
+
centers_augego_tmp = centers_augego_tmp[gt_bboxes_ignore[sid], :]
|
65 |
+
box_targets_tmp = box_targets_tmp[gt_bboxes_ignore[sid], :]
|
66 |
+
num_valid_samples = centers_augego_tmp.shape[0]
|
67 |
+
num_pos += num_valid_samples
|
68 |
+
valid_mask[sid, :num_valid_samples, :] = 1.0
|
69 |
+
centers_augego[sid, :num_valid_samples, :] = centers_augego_tmp
|
70 |
+
box_targets_all[sid, :num_valid_samples, :] = box_targets_tmp
|
71 |
+
label_tmp = gt_labels[sid].unsqueeze(-1)
|
72 |
+
if gt_bboxes_ignore is not None:
|
73 |
+
label_tmp = label_tmp[gt_bboxes_ignore[sid], :]
|
74 |
+
label[sid, :num_valid_samples, :] = label_tmp
|
75 |
+
img_feats = self.pts_bbox_head.extract_img_feat_from_3dpoints(
|
76 |
+
centers_augego, x, fuse=False)
|
77 |
+
heatmap = self.auxiliary_heads.heatmap(img_feats)
|
78 |
+
loss_cls_img = self.pts_bbox_head.loss_cls(
|
79 |
+
heatmap.permute(0, 2, 1).reshape(-1, self.num_cls),
|
80 |
+
label.flatten(),
|
81 |
+
valid_mask.flatten(),
|
82 |
+
avg_factor=max(num_pos, 1))
|
83 |
+
return dict(loss_cls_img=loss_cls_img)
|
84 |
+
|
85 |
+
def forward_train(self,
|
86 |
+
points=None,
|
87 |
+
img_metas=None,
|
88 |
+
gt_bboxes_3d=None,
|
89 |
+
gt_labels_3d=None,
|
90 |
+
gt_labels=None,
|
91 |
+
gt_bboxes=None,
|
92 |
+
img_inputs=None,
|
93 |
+
proposals=None,
|
94 |
+
gt_bboxes_ignore=None,
|
95 |
+
**kwargs):
|
96 |
+
"""Forward training function.
|
97 |
+
|
98 |
+
Args:
|
99 |
+
points (list[torch.Tensor], optional): Points of each sample.
|
100 |
+
Defaults to None.
|
101 |
+
img_metas (list[dict], optional): Meta information of each sample.
|
102 |
+
Defaults to None.
|
103 |
+
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`], optional):
|
104 |
+
Ground truth 3D boxes. Defaults to None.
|
105 |
+
gt_labels_3d (list[torch.Tensor], optional): Ground truth labels
|
106 |
+
of 3D boxes. Defaults to None.
|
107 |
+
gt_labels (list[torch.Tensor], optional): Ground truth labels
|
108 |
+
of 2D boxes in images. Defaults to None.
|
109 |
+
gt_bboxes (list[torch.Tensor], optional): Ground truth 2D boxes in
|
110 |
+
images. Defaults to None.
|
111 |
+
img (torch.Tensor optional): Images of each sample with shape
|
112 |
+
(N, C, H, W). Defaults to None.
|
113 |
+
proposals ([list[torch.Tensor], optional): Predicted proposals
|
114 |
+
used for training Fast RCNN. Defaults to None.
|
115 |
+
gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth
|
116 |
+
2D boxes in images to be ignored. Defaults to None.
|
117 |
+
|
118 |
+
Returns:
|
119 |
+
dict: Losses of different branches.
|
120 |
+
"""
|
121 |
+
img_feats, pts_feats = self.extract_feat(
|
122 |
+
points, img=img_inputs, img_metas=img_metas)
|
123 |
+
img_feats_bev = \
|
124 |
+
self.img_view_transformer(img_feats + img_inputs[1:7],
|
125 |
+
depth_from_lidar=kwargs['gt_depth'])
|
126 |
+
|
127 |
+
losses = dict()
|
128 |
+
losses_pts = \
|
129 |
+
self.forward_pts_train([img_feats, pts_feats, img_feats_bev],
|
130 |
+
gt_bboxes_3d, gt_labels_3d, img_metas,
|
131 |
+
gt_bboxes_ignore)
|
132 |
+
losses.update(losses_pts)
|
133 |
+
losses_img_auxiliary = \
|
134 |
+
self.forward_img_auxiliary_train(img_feats, img_metas,
|
135 |
+
gt_bboxes_3d, gt_labels_3d,
|
136 |
+
gt_bboxes_ignore,
|
137 |
+
**kwargs)
|
138 |
+
losses.update(losses_img_auxiliary)
|
139 |
+
return losses
|
140 |
+
|
141 |
+
def simple_test(self,
|
142 |
+
points,
|
143 |
+
img_metas,
|
144 |
+
img_inputs=None,
|
145 |
+
rescale=False,
|
146 |
+
**kwargs):
|
147 |
+
"""Test function without augmentaiton."""
|
148 |
+
img_feats, pts_feats = self.extract_feat(
|
149 |
+
points, img=img_inputs, img_metas=img_metas)
|
150 |
+
img_feats_bev = \
|
151 |
+
self.img_view_transformer(img_feats + img_inputs[1:7],
|
152 |
+
depth_from_lidar=kwargs['gt_depth'][0])
|
153 |
+
|
154 |
+
bbox_list = [dict() for _ in range(len(img_metas))]
|
155 |
+
bbox_pts = self.simple_test_pts([img_feats, pts_feats, img_feats_bev],
|
156 |
+
img_metas, rescale=rescale)
|
157 |
+
for result_dict, pts_bbox in zip(bbox_list, bbox_pts):
|
158 |
+
result_dict['pts_bbox'] = pts_bbox
|
159 |
+
return bbox_list
|
det_map/det/dal/mmdet3d/__init__.py
ADDED
File without changes
|
det_map/det/dal/mmdet3d/core/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
from .bbox import * # noqa: F401, F403
|
3 |
+
from .points import * # noqa: F401, F403
|
4 |
+
from .post_processing import * # noqa: F401, F403
|
5 |
+
from .utils import * # noqa: F401, F403
|
6 |
+
from .samplers import *
|
det_map/det/dal/mmdet3d/core/bbox/__init__.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner
|
3 |
+
# from .bbox_target import bbox_target
|
4 |
+
from .iou_calculators import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D,
|
5 |
+
|
6 |
+
axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d,
|
7 |
+
)
|
8 |
+
|
9 |
+
from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes,
|
10 |
+
Coord3DMode, DepthInstance3DBoxes,
|
11 |
+
LiDARInstance3DBoxes, get_box_type, limit_period,
|
12 |
+
mono_cam_box2vis, points_cam2img, points_img2cam,
|
13 |
+
xywhr2xyxyr)
|
14 |
+
from .transforms import bbox3d2result, bbox3d2roi, bbox3d_mapping_back
|
15 |
+
from .coders import *
|
16 |
+
__all__ = [
|
17 |
+
'AssignResult', 'BaseAssigner', 'MaxIoUAssigner','TransFusionBBoxCoder'
|
18 |
+
, 'bbox_overlaps_3d',
|
19 |
+
'AxisAlignedBboxOverlaps3D', 'axis_aligned_bbox_overlaps_3d', 'Box3DMode',
|
20 |
+
'LiDARInstance3DBoxes', 'CameraInstance3DBoxes', 'bbox3d2roi',
|
21 |
+
'bbox3d2result', 'DepthInstance3DBoxes', 'BaseInstance3DBoxes',
|
22 |
+
'bbox3d_mapping_back', 'xywhr2xyxyr', 'limit_period', 'points_cam2img',
|
23 |
+
'points_img2cam', 'get_box_type', 'Coord3DMode', 'mono_cam_box2vis'
|
24 |
+
]
|
det_map/det/dal/mmdet3d/core/bbox/assigners/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
from mmdet.core.bbox import AssignResult, BaseAssigner, MaxIoUAssigner
|
3 |
+
from .hungarian_assigner_3d import HungarianAssigner3D
|
4 |
+
|
5 |
+
__all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult',
|
6 |
+
'HungarianAssigner3D']
|
det_map/det/dal/mmdet3d/core/bbox/assigners/hungarian_assigner_3d.py
ADDED
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mmdet.core.bbox.builder import BBOX_ASSIGNERS
|
2 |
+
from mmdet.core.bbox.assigners import AssignResult, BaseAssigner
|
3 |
+
from mmdet.core.bbox.match_costs import build_match_cost
|
4 |
+
from mmdet.core.bbox.match_costs.builder import MATCH_COST
|
5 |
+
from mmdet.core.bbox.iou_calculators import build_iou_calculator
|
6 |
+
import torch
|
7 |
+
|
8 |
+
try:
|
9 |
+
from scipy.optimize import linear_sum_assignment
|
10 |
+
except ImportError:
|
11 |
+
linear_sum_assignment = None
|
12 |
+
|
13 |
+
@MATCH_COST.register_module()
|
14 |
+
class BBoxBEVL1Cost(object):
|
15 |
+
def __init__(self, weight):
|
16 |
+
self.weight = weight
|
17 |
+
|
18 |
+
def __call__(self, bboxes, gt_bboxes, train_cfg):
|
19 |
+
pc_start = bboxes.new(train_cfg['point_cloud_range'][0:2])
|
20 |
+
pc_range = bboxes.new(train_cfg['point_cloud_range'][3:5]) - bboxes.new(train_cfg['point_cloud_range'][0:2])
|
21 |
+
# normalize the box center to [0, 1]
|
22 |
+
normalized_bboxes_xy = (bboxes[:, :2] - pc_start) / pc_range
|
23 |
+
normalized_gt_bboxes_xy = (gt_bboxes[:, :2] - pc_start) / pc_range
|
24 |
+
reg_cost = torch.cdist(normalized_bboxes_xy, normalized_gt_bboxes_xy, p=1)
|
25 |
+
return reg_cost * self.weight
|
26 |
+
|
27 |
+
|
28 |
+
@MATCH_COST.register_module()
|
29 |
+
class IoU3DCost(object):
|
30 |
+
def __init__(self, weight):
|
31 |
+
self.weight = weight
|
32 |
+
|
33 |
+
def __call__(self, iou):
|
34 |
+
iou_cost = - iou
|
35 |
+
return iou_cost * self.weight
|
36 |
+
|
37 |
+
|
38 |
+
@BBOX_ASSIGNERS.register_module()
|
39 |
+
class HeuristicAssigner3D(BaseAssigner):
|
40 |
+
def __init__(self,
|
41 |
+
dist_thre=100,
|
42 |
+
iou_calculator=dict(type='BboxOverlaps3D')
|
43 |
+
):
|
44 |
+
self.dist_thre = dist_thre # distance in meter
|
45 |
+
self.iou_calculator = build_iou_calculator(iou_calculator)
|
46 |
+
|
47 |
+
def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None, query_labels=None):
|
48 |
+
dist_thre = self.dist_thre
|
49 |
+
num_gts, num_bboxes = len(gt_bboxes), len(bboxes)
|
50 |
+
|
51 |
+
bev_dist = torch.norm(bboxes[:, 0:2][None, :, :] - gt_bboxes[:, 0:2][:, None, :], dim=-1) # [num_gts, num_bboxes]
|
52 |
+
if query_labels is not None:
|
53 |
+
# only match the gt box and query with same category
|
54 |
+
not_same_class = (query_labels[None] != gt_labels[:, None])
|
55 |
+
bev_dist += not_same_class * dist_thre
|
56 |
+
|
57 |
+
# for each gt box, assign it to the nearest pred box
|
58 |
+
nearest_values, nearest_indices = bev_dist.min(1) # [num_gts]
|
59 |
+
assigned_gt_inds = torch.ones([num_bboxes, ]).to(bboxes) * 0
|
60 |
+
assigned_gt_vals = torch.ones([num_bboxes, ]).to(bboxes) * 10000
|
61 |
+
assigned_gt_labels = torch.ones([num_bboxes, ]).to(bboxes) * -1
|
62 |
+
for idx_gts in range(num_gts):
|
63 |
+
# for idx_pred in torch.where(bev_dist[idx_gts] < dist_thre)[0]: # each gt match to all the pred box within some radius
|
64 |
+
idx_pred = nearest_indices[idx_gts] # each gt only match to the nearest pred box
|
65 |
+
if bev_dist[idx_gts, idx_pred] <= dist_thre:
|
66 |
+
if bev_dist[idx_gts, idx_pred] < assigned_gt_vals[idx_pred]: # if this pred box is assigned, then compare
|
67 |
+
assigned_gt_vals[idx_pred] = bev_dist[idx_gts, idx_pred]
|
68 |
+
assigned_gt_inds[idx_pred] = idx_gts + 1 # for AssignResult, 0 is negative, -1 is ignore, 1-based indices are positive
|
69 |
+
assigned_gt_labels[idx_pred] = gt_labels[idx_gts]
|
70 |
+
|
71 |
+
max_overlaps = torch.zeros([num_bboxes, ]).to(bboxes)
|
72 |
+
matched_indices = torch.where(assigned_gt_inds > 0)
|
73 |
+
matched_iou = self.iou_calculator(gt_bboxes[assigned_gt_inds[matched_indices].long() - 1], bboxes[matched_indices]).diag()
|
74 |
+
max_overlaps[matched_indices] = matched_iou
|
75 |
+
|
76 |
+
return AssignResult(
|
77 |
+
num_gts, assigned_gt_inds.long(), max_overlaps, labels=assigned_gt_labels
|
78 |
+
)
|
79 |
+
|
80 |
+
|
81 |
+
@BBOX_ASSIGNERS.register_module()
|
82 |
+
class HungarianAssigner3D(BaseAssigner):
|
83 |
+
def __init__(self,
|
84 |
+
cls_cost=dict(type='ClassificationCost', weight=1.),
|
85 |
+
reg_cost=dict(type='BBoxBEVL1Cost', weight=1.0),
|
86 |
+
iou_cost=dict(type='IoU3DCost', weight=1.0),
|
87 |
+
iou_calculator=dict(type='BboxOverlaps3D'),
|
88 |
+
):
|
89 |
+
self.cls_cost = build_match_cost(cls_cost)
|
90 |
+
self.reg_cost = build_match_cost(reg_cost)
|
91 |
+
self.iou_cost = build_match_cost(iou_cost)
|
92 |
+
self.iou_calculator = build_iou_calculator(iou_calculator)
|
93 |
+
|
94 |
+
def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg):
|
95 |
+
num_gts, num_bboxes = gt_bboxes.size(0), bboxes.size(0)
|
96 |
+
|
97 |
+
# 1. assign -1 by default
|
98 |
+
assigned_gt_inds = bboxes.new_full((num_bboxes,),
|
99 |
+
-1,
|
100 |
+
dtype=torch.long)
|
101 |
+
assigned_labels = bboxes.new_full((num_bboxes,),
|
102 |
+
-1,
|
103 |
+
dtype=torch.long)
|
104 |
+
if num_gts == 0 or num_bboxes == 0:
|
105 |
+
# No ground truth or boxes, return empty assignment
|
106 |
+
if num_gts == 0:
|
107 |
+
# No ground truth, assign all to background
|
108 |
+
assigned_gt_inds[:] = 0
|
109 |
+
return AssignResult(
|
110 |
+
num_gts, assigned_gt_inds, None, labels=assigned_labels)
|
111 |
+
|
112 |
+
# 2. compute the weighted costs
|
113 |
+
# see mmdetection/mmdet/core/bbox/match_costs/match_cost.py
|
114 |
+
cls_cost = self.cls_cost(cls_pred[0].T, gt_labels)
|
115 |
+
reg_cost = self.reg_cost(bboxes, gt_bboxes, train_cfg)
|
116 |
+
|
117 |
+
iou = self.iou_calculator(bboxes, gt_bboxes)
|
118 |
+
iou_cost = self.iou_cost(iou)
|
119 |
+
|
120 |
+
# weighted sum of above three costs
|
121 |
+
cost = cls_cost + reg_cost + iou_cost
|
122 |
+
|
123 |
+
# 3. do Hungarian matching on CPU using linear_sum_assignment
|
124 |
+
cost = cost.detach().cpu()
|
125 |
+
if linear_sum_assignment is None:
|
126 |
+
raise ImportError('Please run "pip install scipy" '
|
127 |
+
'to install scipy first.')
|
128 |
+
try:
|
129 |
+
matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
|
130 |
+
except:
|
131 |
+
assigned_gt_inds[:] = 0
|
132 |
+
return AssignResult(
|
133 |
+
num_gts, assigned_gt_inds, None, labels=assigned_labels)
|
134 |
+
matched_row_inds = torch.from_numpy(matched_row_inds).to(bboxes.device)
|
135 |
+
matched_col_inds = torch.from_numpy(matched_col_inds).to(bboxes.device)
|
136 |
+
|
137 |
+
# 4. assign backgrounds and foregrounds
|
138 |
+
# assign all indices to backgrounds first
|
139 |
+
assigned_gt_inds[:] = 0
|
140 |
+
# assign foregrounds based on matching results
|
141 |
+
assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
|
142 |
+
assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
|
143 |
+
|
144 |
+
max_overlaps = torch.zeros_like(iou.max(1).values)
|
145 |
+
max_overlaps[matched_row_inds] = iou[matched_row_inds, matched_col_inds]
|
146 |
+
# max_overlaps = iou.max(1).values
|
147 |
+
return AssignResult(
|
148 |
+
num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels)
|
det_map/det/dal/mmdet3d/core/bbox/box_np_ops.py
ADDED
@@ -0,0 +1,827 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
# TODO: clean the functions in this file and move the APIs into box structures
|
3 |
+
# in the future
|
4 |
+
# NOTICE: All functions in this file are valid for LiDAR or depth boxes only
|
5 |
+
# if we use default parameters.
|
6 |
+
|
7 |
+
import numba
|
8 |
+
import numpy as np
|
9 |
+
|
10 |
+
from .structures.utils import limit_period, points_cam2img, rotation_3d_in_axis
|
11 |
+
|
12 |
+
|
13 |
+
def camera_to_lidar(points, r_rect, velo2cam):
|
14 |
+
"""Convert points in camera coordinate to lidar coordinate.
|
15 |
+
|
16 |
+
Note:
|
17 |
+
This function is for KITTI only.
|
18 |
+
|
19 |
+
Args:
|
20 |
+
points (np.ndarray, shape=[N, 3]): Points in camera coordinate.
|
21 |
+
r_rect (np.ndarray, shape=[4, 4]): Matrix to project points in
|
22 |
+
specific camera coordinate (e.g. CAM2) to CAM0.
|
23 |
+
velo2cam (np.ndarray, shape=[4, 4]): Matrix to project points in
|
24 |
+
camera coordinate to lidar coordinate.
|
25 |
+
|
26 |
+
Returns:
|
27 |
+
np.ndarray, shape=[N, 3]: Points in lidar coordinate.
|
28 |
+
"""
|
29 |
+
points_shape = list(points.shape[0:-1])
|
30 |
+
if points.shape[-1] == 3:
|
31 |
+
points = np.concatenate([points, np.ones(points_shape + [1])], axis=-1)
|
32 |
+
lidar_points = points @ np.linalg.inv((r_rect @ velo2cam).T)
|
33 |
+
return lidar_points[..., :3]
|
34 |
+
|
35 |
+
|
36 |
+
def box_camera_to_lidar(data, r_rect, velo2cam):
|
37 |
+
"""Convert boxes in camera coordinate to lidar coordinate.
|
38 |
+
|
39 |
+
Note:
|
40 |
+
This function is for KITTI only.
|
41 |
+
|
42 |
+
Args:
|
43 |
+
data (np.ndarray, shape=[N, 7]): Boxes in camera coordinate.
|
44 |
+
r_rect (np.ndarray, shape=[4, 4]): Matrix to project points in
|
45 |
+
specific camera coordinate (e.g. CAM2) to CAM0.
|
46 |
+
velo2cam (np.ndarray, shape=[4, 4]): Matrix to project points in
|
47 |
+
camera coordinate to lidar coordinate.
|
48 |
+
|
49 |
+
Returns:
|
50 |
+
np.ndarray, shape=[N, 3]: Boxes in lidar coordinate.
|
51 |
+
"""
|
52 |
+
xyz = data[:, 0:3]
|
53 |
+
x_size, y_size, z_size = data[:, 3:4], data[:, 4:5], data[:, 5:6]
|
54 |
+
r = data[:, 6:7]
|
55 |
+
xyz_lidar = camera_to_lidar(xyz, r_rect, velo2cam)
|
56 |
+
# yaw and dims also needs to be converted
|
57 |
+
r_new = -r - np.pi / 2
|
58 |
+
r_new = limit_period(r_new, period=np.pi * 2)
|
59 |
+
return np.concatenate([xyz_lidar, x_size, z_size, y_size, r_new], axis=1)
|
60 |
+
|
61 |
+
|
62 |
+
def corners_nd(dims, origin=0.5):
|
63 |
+
"""Generate relative box corners based on length per dim and origin point.
|
64 |
+
|
65 |
+
Args:
|
66 |
+
dims (np.ndarray, shape=[N, ndim]): Array of length per dim
|
67 |
+
origin (list or array or float, optional): origin point relate to
|
68 |
+
smallest point. Defaults to 0.5
|
69 |
+
|
70 |
+
Returns:
|
71 |
+
np.ndarray, shape=[N, 2 ** ndim, ndim]: Returned corners.
|
72 |
+
point layout example: (2d) x0y0, x0y1, x1y0, x1y1;
|
73 |
+
(3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1
|
74 |
+
where x0 < x1, y0 < y1, z0 < z1.
|
75 |
+
"""
|
76 |
+
ndim = int(dims.shape[1])
|
77 |
+
corners_norm = np.stack(
|
78 |
+
np.unravel_index(np.arange(2**ndim), [2] * ndim),
|
79 |
+
axis=1).astype(dims.dtype)
|
80 |
+
# now corners_norm has format: (2d) x0y0, x0y1, x1y0, x1y1
|
81 |
+
# (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1
|
82 |
+
# so need to convert to a format which is convenient to do other computing.
|
83 |
+
# for 2d boxes, format is clockwise start with minimum point
|
84 |
+
# for 3d boxes, please draw lines by your hand.
|
85 |
+
if ndim == 2:
|
86 |
+
# generate clockwise box corners
|
87 |
+
corners_norm = corners_norm[[0, 1, 3, 2]]
|
88 |
+
elif ndim == 3:
|
89 |
+
corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
|
90 |
+
corners_norm = corners_norm - np.array(origin, dtype=dims.dtype)
|
91 |
+
corners = dims.reshape([-1, 1, ndim]) * corners_norm.reshape(
|
92 |
+
[1, 2**ndim, ndim])
|
93 |
+
return corners
|
94 |
+
|
95 |
+
|
96 |
+
def center_to_corner_box2d(centers, dims, angles=None, origin=0.5):
|
97 |
+
"""Convert kitti locations, dimensions and angles to corners.
|
98 |
+
format: center(xy), dims(xy), angles(counterclockwise when positive)
|
99 |
+
|
100 |
+
Args:
|
101 |
+
centers (np.ndarray): Locations in kitti label file with shape (N, 2).
|
102 |
+
dims (np.ndarray): Dimensions in kitti label file with shape (N, 2).
|
103 |
+
angles (np.ndarray, optional): Rotation_y in kitti label file with
|
104 |
+
shape (N). Defaults to None.
|
105 |
+
origin (list or array or float, optional): origin point relate to
|
106 |
+
smallest point. Defaults to 0.5.
|
107 |
+
|
108 |
+
Returns:
|
109 |
+
np.ndarray: Corners with the shape of (N, 4, 2).
|
110 |
+
"""
|
111 |
+
# 'length' in kitti format is in x axis.
|
112 |
+
# xyz(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar)
|
113 |
+
# center in kitti format is [0.5, 1.0, 0.5] in xyz.
|
114 |
+
corners = corners_nd(dims, origin=origin)
|
115 |
+
# corners: [N, 4, 2]
|
116 |
+
if angles is not None:
|
117 |
+
corners = rotation_3d_in_axis(corners, angles)
|
118 |
+
corners += centers.reshape([-1, 1, 2])
|
119 |
+
return corners
|
120 |
+
|
121 |
+
|
122 |
+
@numba.jit(nopython=True)
|
123 |
+
def depth_to_points(depth, trunc_pixel):
|
124 |
+
"""Convert depth map to points.
|
125 |
+
|
126 |
+
Args:
|
127 |
+
depth (np.array, shape=[H, W]): Depth map which
|
128 |
+
the row of [0~`trunc_pixel`] are truncated.
|
129 |
+
trunc_pixel (int): The number of truncated row.
|
130 |
+
|
131 |
+
Returns:
|
132 |
+
np.ndarray: Points in camera coordinates.
|
133 |
+
"""
|
134 |
+
num_pts = np.sum(depth[trunc_pixel:, ] > 0.1)
|
135 |
+
points = np.zeros((num_pts, 3), dtype=depth.dtype)
|
136 |
+
x = np.array([0, 0, 1], dtype=depth.dtype)
|
137 |
+
k = 0
|
138 |
+
for i in range(trunc_pixel, depth.shape[0]):
|
139 |
+
for j in range(depth.shape[1]):
|
140 |
+
if depth[i, j] > 0.1:
|
141 |
+
x = np.array([j, i, 1], dtype=depth.dtype)
|
142 |
+
points[k] = x * depth[i, j]
|
143 |
+
k += 1
|
144 |
+
return points
|
145 |
+
|
146 |
+
|
147 |
+
def depth_to_lidar_points(depth, trunc_pixel, P2, r_rect, velo2cam):
|
148 |
+
"""Convert depth map to points in lidar coordinate.
|
149 |
+
|
150 |
+
Args:
|
151 |
+
depth (np.array, shape=[H, W]): Depth map which
|
152 |
+
the row of [0~`trunc_pixel`] are truncated.
|
153 |
+
trunc_pixel (int): The number of truncated row.
|
154 |
+
P2 (p.array, shape=[4, 4]): Intrinsics of Camera2.
|
155 |
+
r_rect (np.ndarray, shape=[4, 4]): Matrix to project points in
|
156 |
+
specific camera coordinate (e.g. CAM2) to CAM0.
|
157 |
+
velo2cam (np.ndarray, shape=[4, 4]): Matrix to project points in
|
158 |
+
camera coordinate to lidar coordinate.
|
159 |
+
|
160 |
+
Returns:
|
161 |
+
np.ndarray: Points in lidar coordinates.
|
162 |
+
"""
|
163 |
+
pts = depth_to_points(depth, trunc_pixel)
|
164 |
+
points_shape = list(pts.shape[0:-1])
|
165 |
+
points = np.concatenate([pts, np.ones(points_shape + [1])], axis=-1)
|
166 |
+
points = points @ np.linalg.inv(P2.T)
|
167 |
+
lidar_points = camera_to_lidar(points, r_rect, velo2cam)
|
168 |
+
return lidar_points
|
169 |
+
|
170 |
+
|
171 |
+
def center_to_corner_box3d(centers,
|
172 |
+
dims,
|
173 |
+
angles=None,
|
174 |
+
origin=(0.5, 1.0, 0.5),
|
175 |
+
axis=1):
|
176 |
+
"""Convert kitti locations, dimensions and angles to corners.
|
177 |
+
|
178 |
+
Args:
|
179 |
+
centers (np.ndarray): Locations in kitti label file with shape (N, 3).
|
180 |
+
dims (np.ndarray): Dimensions in kitti label file with shape (N, 3).
|
181 |
+
angles (np.ndarray, optional): Rotation_y in kitti label file with
|
182 |
+
shape (N). Defaults to None.
|
183 |
+
origin (list or array or float, optional): Origin point relate to
|
184 |
+
smallest point. Use (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0)
|
185 |
+
in lidar. Defaults to (0.5, 1.0, 0.5).
|
186 |
+
axis (int, optional): Rotation axis. 1 for camera and 2 for lidar.
|
187 |
+
Defaults to 1.
|
188 |
+
|
189 |
+
Returns:
|
190 |
+
np.ndarray: Corners with the shape of (N, 8, 3).
|
191 |
+
"""
|
192 |
+
# 'length' in kitti format is in x axis.
|
193 |
+
# yzx(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(lwh)(lidar)
|
194 |
+
# center in kitti format is [0.5, 1.0, 0.5] in xyz.
|
195 |
+
corners = corners_nd(dims, origin=origin)
|
196 |
+
# corners: [N, 8, 3]
|
197 |
+
if angles is not None:
|
198 |
+
corners = rotation_3d_in_axis(corners, angles, axis=axis)
|
199 |
+
corners += centers.reshape([-1, 1, 3])
|
200 |
+
return corners
|
201 |
+
|
202 |
+
|
203 |
+
@numba.jit(nopython=True)
|
204 |
+
def box2d_to_corner_jit(boxes):
|
205 |
+
"""Convert box2d to corner.
|
206 |
+
|
207 |
+
Args:
|
208 |
+
boxes (np.ndarray, shape=[N, 5]): Boxes2d with rotation.
|
209 |
+
|
210 |
+
Returns:
|
211 |
+
box_corners (np.ndarray, shape=[N, 4, 2]): Box corners.
|
212 |
+
"""
|
213 |
+
num_box = boxes.shape[0]
|
214 |
+
corners_norm = np.zeros((4, 2), dtype=boxes.dtype)
|
215 |
+
corners_norm[1, 1] = 1.0
|
216 |
+
corners_norm[2] = 1.0
|
217 |
+
corners_norm[3, 0] = 1.0
|
218 |
+
corners_norm -= np.array([0.5, 0.5], dtype=boxes.dtype)
|
219 |
+
corners = boxes.reshape(num_box, 1, 5)[:, :, 2:4] * corners_norm.reshape(
|
220 |
+
1, 4, 2)
|
221 |
+
rot_mat_T = np.zeros((2, 2), dtype=boxes.dtype)
|
222 |
+
box_corners = np.zeros((num_box, 4, 2), dtype=boxes.dtype)
|
223 |
+
for i in range(num_box):
|
224 |
+
rot_sin = np.sin(boxes[i, -1])
|
225 |
+
rot_cos = np.cos(boxes[i, -1])
|
226 |
+
rot_mat_T[0, 0] = rot_cos
|
227 |
+
rot_mat_T[0, 1] = rot_sin
|
228 |
+
rot_mat_T[1, 0] = -rot_sin
|
229 |
+
rot_mat_T[1, 1] = rot_cos
|
230 |
+
box_corners[i] = corners[i] @ rot_mat_T + boxes[i, :2]
|
231 |
+
return box_corners
|
232 |
+
|
233 |
+
|
234 |
+
@numba.njit
|
235 |
+
def corner_to_standup_nd_jit(boxes_corner):
|
236 |
+
"""Convert boxes_corner to aligned (min-max) boxes.
|
237 |
+
|
238 |
+
Args:
|
239 |
+
boxes_corner (np.ndarray, shape=[N, 2**dim, dim]): Boxes corners.
|
240 |
+
|
241 |
+
Returns:
|
242 |
+
np.ndarray, shape=[N, dim*2]: Aligned (min-max) boxes.
|
243 |
+
"""
|
244 |
+
num_boxes = boxes_corner.shape[0]
|
245 |
+
ndim = boxes_corner.shape[-1]
|
246 |
+
result = np.zeros((num_boxes, ndim * 2), dtype=boxes_corner.dtype)
|
247 |
+
for i in range(num_boxes):
|
248 |
+
for j in range(ndim):
|
249 |
+
result[i, j] = np.min(boxes_corner[i, :, j])
|
250 |
+
for j in range(ndim):
|
251 |
+
result[i, j + ndim] = np.max(boxes_corner[i, :, j])
|
252 |
+
return result
|
253 |
+
|
254 |
+
|
255 |
+
@numba.jit(nopython=True)
|
256 |
+
def corner_to_surfaces_3d_jit(corners):
|
257 |
+
"""Convert 3d box corners from corner function above to surfaces that
|
258 |
+
normal vectors all direct to internal.
|
259 |
+
|
260 |
+
Args:
|
261 |
+
corners (np.ndarray): 3d box corners with the shape of (N, 8, 3).
|
262 |
+
|
263 |
+
Returns:
|
264 |
+
np.ndarray: Surfaces with the shape of (N, 6, 4, 3).
|
265 |
+
"""
|
266 |
+
# box_corners: [N, 8, 3], must from corner functions in this module
|
267 |
+
num_boxes = corners.shape[0]
|
268 |
+
surfaces = np.zeros((num_boxes, 6, 4, 3), dtype=corners.dtype)
|
269 |
+
corner_idxes = np.array([
|
270 |
+
0, 1, 2, 3, 7, 6, 5, 4, 0, 3, 7, 4, 1, 5, 6, 2, 0, 4, 5, 1, 3, 2, 6, 7
|
271 |
+
]).reshape(6, 4)
|
272 |
+
for i in range(num_boxes):
|
273 |
+
for j in range(6):
|
274 |
+
for k in range(4):
|
275 |
+
surfaces[i, j, k] = corners[i, corner_idxes[j, k]]
|
276 |
+
return surfaces
|
277 |
+
|
278 |
+
|
279 |
+
def rotation_points_single_angle(points, angle, axis=0):
|
280 |
+
"""Rotate points with a single angle.
|
281 |
+
|
282 |
+
Args:
|
283 |
+
points (np.ndarray, shape=[N, 3]]):
|
284 |
+
angle (np.ndarray, shape=[1]]):
|
285 |
+
axis (int, optional): Axis to rotate at. Defaults to 0.
|
286 |
+
|
287 |
+
Returns:
|
288 |
+
np.ndarray: Rotated points.
|
289 |
+
"""
|
290 |
+
# points: [N, 3]
|
291 |
+
rot_sin = np.sin(angle)
|
292 |
+
rot_cos = np.cos(angle)
|
293 |
+
if axis == 1:
|
294 |
+
rot_mat_T = np.array(
|
295 |
+
[[rot_cos, 0, rot_sin], [0, 1, 0], [-rot_sin, 0, rot_cos]],
|
296 |
+
dtype=points.dtype)
|
297 |
+
elif axis == 2 or axis == -1:
|
298 |
+
rot_mat_T = np.array(
|
299 |
+
[[rot_cos, rot_sin, 0], [-rot_sin, rot_cos, 0], [0, 0, 1]],
|
300 |
+
dtype=points.dtype)
|
301 |
+
elif axis == 0:
|
302 |
+
rot_mat_T = np.array(
|
303 |
+
[[1, 0, 0], [0, rot_cos, rot_sin], [0, -rot_sin, rot_cos]],
|
304 |
+
dtype=points.dtype)
|
305 |
+
else:
|
306 |
+
raise ValueError('axis should in range')
|
307 |
+
|
308 |
+
return points @ rot_mat_T, rot_mat_T
|
309 |
+
|
310 |
+
|
311 |
+
def box3d_to_bbox(box3d, P2):
|
312 |
+
"""Convert box3d in camera coordinates to bbox in image coordinates.
|
313 |
+
|
314 |
+
Args:
|
315 |
+
box3d (np.ndarray, shape=[N, 7]): Boxes in camera coordinate.
|
316 |
+
P2 (np.array, shape=[4, 4]): Intrinsics of Camera2.
|
317 |
+
|
318 |
+
Returns:
|
319 |
+
np.ndarray, shape=[N, 4]: Boxes 2d in image coordinates.
|
320 |
+
"""
|
321 |
+
box_corners = center_to_corner_box3d(
|
322 |
+
box3d[:, :3], box3d[:, 3:6], box3d[:, 6], [0.5, 1.0, 0.5], axis=1)
|
323 |
+
box_corners_in_image = points_cam2img(box_corners, P2)
|
324 |
+
# box_corners_in_image: [N, 8, 2]
|
325 |
+
minxy = np.min(box_corners_in_image, axis=1)
|
326 |
+
maxxy = np.max(box_corners_in_image, axis=1)
|
327 |
+
bbox = np.concatenate([minxy, maxxy], axis=1)
|
328 |
+
return bbox
|
329 |
+
|
330 |
+
|
331 |
+
def corner_to_surfaces_3d(corners):
|
332 |
+
"""convert 3d box corners from corner function above to surfaces that
|
333 |
+
normal vectors all direct to internal.
|
334 |
+
|
335 |
+
Args:
|
336 |
+
corners (np.ndarray): 3D box corners with shape of (N, 8, 3).
|
337 |
+
|
338 |
+
Returns:
|
339 |
+
np.ndarray: Surfaces with the shape of (N, 6, 4, 3).
|
340 |
+
"""
|
341 |
+
# box_corners: [N, 8, 3], must from corner functions in this module
|
342 |
+
surfaces = np.array([
|
343 |
+
[corners[:, 0], corners[:, 1], corners[:, 2], corners[:, 3]],
|
344 |
+
[corners[:, 7], corners[:, 6], corners[:, 5], corners[:, 4]],
|
345 |
+
[corners[:, 0], corners[:, 3], corners[:, 7], corners[:, 4]],
|
346 |
+
[corners[:, 1], corners[:, 5], corners[:, 6], corners[:, 2]],
|
347 |
+
[corners[:, 0], corners[:, 4], corners[:, 5], corners[:, 1]],
|
348 |
+
[corners[:, 3], corners[:, 2], corners[:, 6], corners[:, 7]],
|
349 |
+
]).transpose([2, 0, 1, 3])
|
350 |
+
return surfaces
|
351 |
+
|
352 |
+
|
353 |
+
def points_in_rbbox(points, rbbox, z_axis=2, origin=(0.5, 0.5, 0)):
|
354 |
+
"""Check points in rotated bbox and return indices.
|
355 |
+
|
356 |
+
Note:
|
357 |
+
This function is for counterclockwise boxes.
|
358 |
+
|
359 |
+
Args:
|
360 |
+
points (np.ndarray, shape=[N, 3+dim]): Points to query.
|
361 |
+
rbbox (np.ndarray, shape=[M, 7]): Boxes3d with rotation.
|
362 |
+
z_axis (int, optional): Indicate which axis is height.
|
363 |
+
Defaults to 2.
|
364 |
+
origin (tuple[int], optional): Indicate the position of
|
365 |
+
box center. Defaults to (0.5, 0.5, 0).
|
366 |
+
|
367 |
+
Returns:
|
368 |
+
np.ndarray, shape=[N, M]: Indices of points in each box.
|
369 |
+
"""
|
370 |
+
# TODO: this function is different from PointCloud3D, be careful
|
371 |
+
# when start to use nuscene, check the input
|
372 |
+
rbbox_corners = center_to_corner_box3d(
|
373 |
+
rbbox[:, :3], rbbox[:, 3:6], rbbox[:, 6], origin=origin, axis=z_axis)
|
374 |
+
surfaces = corner_to_surfaces_3d(rbbox_corners)
|
375 |
+
indices = points_in_convex_polygon_3d_jit(points[:, :3], surfaces)
|
376 |
+
return indices
|
377 |
+
|
378 |
+
|
379 |
+
def minmax_to_corner_2d(minmax_box):
|
380 |
+
"""Convert minmax box to corners2d.
|
381 |
+
|
382 |
+
Args:
|
383 |
+
minmax_box (np.ndarray, shape=[N, dims]): minmax boxes.
|
384 |
+
|
385 |
+
Returns:
|
386 |
+
np.ndarray: 2d corners of boxes
|
387 |
+
"""
|
388 |
+
ndim = minmax_box.shape[-1] // 2
|
389 |
+
center = minmax_box[..., :ndim]
|
390 |
+
dims = minmax_box[..., ndim:] - center
|
391 |
+
return center_to_corner_box2d(center, dims, origin=0.0)
|
392 |
+
|
393 |
+
|
394 |
+
def create_anchors_3d_range(feature_size,
|
395 |
+
anchor_range,
|
396 |
+
sizes=((3.9, 1.6, 1.56), ),
|
397 |
+
rotations=(0, np.pi / 2),
|
398 |
+
dtype=np.float32):
|
399 |
+
"""Create anchors 3d by range.
|
400 |
+
|
401 |
+
Args:
|
402 |
+
feature_size (list[float] | tuple[float]): Feature map size. It is
|
403 |
+
either a list of a tuple of [D, H, W](in order of z, y, and x).
|
404 |
+
anchor_range (torch.Tensor | list[float]): Range of anchors with
|
405 |
+
shape [6]. The order is consistent with that of anchors, i.e.,
|
406 |
+
(x_min, y_min, z_min, x_max, y_max, z_max).
|
407 |
+
sizes (list[list] | np.ndarray | torch.Tensor, optional):
|
408 |
+
Anchor size with shape [N, 3], in order of x, y, z.
|
409 |
+
Defaults to ((3.9, 1.6, 1.56), ).
|
410 |
+
rotations (list[float] | np.ndarray | torch.Tensor, optional):
|
411 |
+
Rotations of anchors in a single feature grid.
|
412 |
+
Defaults to (0, np.pi / 2).
|
413 |
+
dtype (type, optional): Data type. Defaults to np.float32.
|
414 |
+
|
415 |
+
Returns:
|
416 |
+
np.ndarray: Range based anchors with shape of
|
417 |
+
(*feature_size, num_sizes, num_rots, 7).
|
418 |
+
"""
|
419 |
+
anchor_range = np.array(anchor_range, dtype)
|
420 |
+
z_centers = np.linspace(
|
421 |
+
anchor_range[2], anchor_range[5], feature_size[0], dtype=dtype)
|
422 |
+
y_centers = np.linspace(
|
423 |
+
anchor_range[1], anchor_range[4], feature_size[1], dtype=dtype)
|
424 |
+
x_centers = np.linspace(
|
425 |
+
anchor_range[0], anchor_range[3], feature_size[2], dtype=dtype)
|
426 |
+
sizes = np.reshape(np.array(sizes, dtype=dtype), [-1, 3])
|
427 |
+
rotations = np.array(rotations, dtype=dtype)
|
428 |
+
rets = np.meshgrid(
|
429 |
+
x_centers, y_centers, z_centers, rotations, indexing='ij')
|
430 |
+
tile_shape = [1] * 5
|
431 |
+
tile_shape[-2] = int(sizes.shape[0])
|
432 |
+
for i in range(len(rets)):
|
433 |
+
rets[i] = np.tile(rets[i][..., np.newaxis, :], tile_shape)
|
434 |
+
rets[i] = rets[i][..., np.newaxis] # for concat
|
435 |
+
sizes = np.reshape(sizes, [1, 1, 1, -1, 1, 3])
|
436 |
+
tile_size_shape = list(rets[0].shape)
|
437 |
+
tile_size_shape[3] = 1
|
438 |
+
sizes = np.tile(sizes, tile_size_shape)
|
439 |
+
rets.insert(3, sizes)
|
440 |
+
ret = np.concatenate(rets, axis=-1)
|
441 |
+
return np.transpose(ret, [2, 1, 0, 3, 4, 5])
|
442 |
+
|
443 |
+
|
444 |
+
def center_to_minmax_2d(centers, dims, origin=0.5):
|
445 |
+
"""Center to minmax.
|
446 |
+
|
447 |
+
Args:
|
448 |
+
centers (np.ndarray): Center points.
|
449 |
+
dims (np.ndarray): Dimensions.
|
450 |
+
origin (list or array or float, optional): Origin point relate
|
451 |
+
to smallest point. Defaults to 0.5.
|
452 |
+
|
453 |
+
Returns:
|
454 |
+
np.ndarray: Minmax points.
|
455 |
+
"""
|
456 |
+
if origin == 0.5:
|
457 |
+
return np.concatenate([centers - dims / 2, centers + dims / 2],
|
458 |
+
axis=-1)
|
459 |
+
corners = center_to_corner_box2d(centers, dims, origin=origin)
|
460 |
+
return corners[:, [0, 2]].reshape([-1, 4])
|
461 |
+
|
462 |
+
|
463 |
+
def rbbox2d_to_near_bbox(rbboxes):
|
464 |
+
"""convert rotated bbox to nearest 'standing' or 'lying' bbox.
|
465 |
+
|
466 |
+
Args:
|
467 |
+
rbboxes (np.ndarray): Rotated bboxes with shape of
|
468 |
+
(N, 5(x, y, xdim, ydim, rad)).
|
469 |
+
|
470 |
+
Returns:
|
471 |
+
np.ndarray: Bounding boxes with the shape of
|
472 |
+
(N, 4(xmin, ymin, xmax, ymax)).
|
473 |
+
"""
|
474 |
+
rots = rbboxes[..., -1]
|
475 |
+
rots_0_pi_div_2 = np.abs(limit_period(rots, 0.5, np.pi))
|
476 |
+
cond = (rots_0_pi_div_2 > np.pi / 4)[..., np.newaxis]
|
477 |
+
bboxes_center = np.where(cond, rbboxes[:, [0, 1, 3, 2]], rbboxes[:, :4])
|
478 |
+
bboxes = center_to_minmax_2d(bboxes_center[:, :2], bboxes_center[:, 2:])
|
479 |
+
return bboxes
|
480 |
+
|
481 |
+
|
482 |
+
@numba.jit(nopython=True)
|
483 |
+
def iou_jit(boxes, query_boxes, mode='iou', eps=0.0):
|
484 |
+
"""Calculate box iou. Note that jit version runs ~10x faster than the
|
485 |
+
box_overlaps function in mmdet3d.core.evaluation.
|
486 |
+
|
487 |
+
Note:
|
488 |
+
This function is for counterclockwise boxes.
|
489 |
+
|
490 |
+
Args:
|
491 |
+
boxes (np.ndarray): Input bounding boxes with shape of (N, 4).
|
492 |
+
query_boxes (np.ndarray): Query boxes with shape of (K, 4).
|
493 |
+
mode (str, optional): IoU mode. Defaults to 'iou'.
|
494 |
+
eps (float, optional): Value added to denominator. Defaults to 0.
|
495 |
+
|
496 |
+
Returns:
|
497 |
+
np.ndarray: Overlap between boxes and query_boxes
|
498 |
+
with the shape of [N, K].
|
499 |
+
"""
|
500 |
+
N = boxes.shape[0]
|
501 |
+
K = query_boxes.shape[0]
|
502 |
+
overlaps = np.zeros((N, K), dtype=boxes.dtype)
|
503 |
+
for k in range(K):
|
504 |
+
box_area = ((query_boxes[k, 2] - query_boxes[k, 0] + eps) *
|
505 |
+
(query_boxes[k, 3] - query_boxes[k, 1] + eps))
|
506 |
+
for n in range(N):
|
507 |
+
iw = (
|
508 |
+
min(boxes[n, 2], query_boxes[k, 2]) -
|
509 |
+
max(boxes[n, 0], query_boxes[k, 0]) + eps)
|
510 |
+
if iw > 0:
|
511 |
+
ih = (
|
512 |
+
min(boxes[n, 3], query_boxes[k, 3]) -
|
513 |
+
max(boxes[n, 1], query_boxes[k, 1]) + eps)
|
514 |
+
if ih > 0:
|
515 |
+
if mode == 'iou':
|
516 |
+
ua = ((boxes[n, 2] - boxes[n, 0] + eps) *
|
517 |
+
(boxes[n, 3] - boxes[n, 1] + eps) + box_area -
|
518 |
+
iw * ih)
|
519 |
+
else:
|
520 |
+
ua = ((boxes[n, 2] - boxes[n, 0] + eps) *
|
521 |
+
(boxes[n, 3] - boxes[n, 1] + eps))
|
522 |
+
overlaps[n, k] = iw * ih / ua
|
523 |
+
return overlaps
|
524 |
+
|
525 |
+
|
526 |
+
def projection_matrix_to_CRT_kitti(proj):
|
527 |
+
"""Split projection matrix of KITTI.
|
528 |
+
|
529 |
+
Note:
|
530 |
+
This function is for KITTI only.
|
531 |
+
|
532 |
+
P = C @ [R|T]
|
533 |
+
C is upper triangular matrix, so we need to inverse CR and use QR
|
534 |
+
stable for all kitti camera projection matrix.
|
535 |
+
|
536 |
+
Args:
|
537 |
+
proj (p.array, shape=[4, 4]): Intrinsics of camera.
|
538 |
+
|
539 |
+
Returns:
|
540 |
+
tuple[np.ndarray]: Splited matrix of C, R and T.
|
541 |
+
"""
|
542 |
+
|
543 |
+
CR = proj[0:3, 0:3]
|
544 |
+
CT = proj[0:3, 3]
|
545 |
+
RinvCinv = np.linalg.inv(CR)
|
546 |
+
Rinv, Cinv = np.linalg.qr(RinvCinv)
|
547 |
+
C = np.linalg.inv(Cinv)
|
548 |
+
R = np.linalg.inv(Rinv)
|
549 |
+
T = Cinv @ CT
|
550 |
+
return C, R, T
|
551 |
+
|
552 |
+
|
553 |
+
def remove_outside_points(points, rect, Trv2c, P2, image_shape):
|
554 |
+
"""Remove points which are outside of image.
|
555 |
+
|
556 |
+
Note:
|
557 |
+
This function is for KITTI only.
|
558 |
+
|
559 |
+
Args:
|
560 |
+
points (np.ndarray, shape=[N, 3+dims]): Total points.
|
561 |
+
rect (np.ndarray, shape=[4, 4]): Matrix to project points in
|
562 |
+
specific camera coordinate (e.g. CAM2) to CAM0.
|
563 |
+
Trv2c (np.ndarray, shape=[4, 4]): Matrix to project points in
|
564 |
+
camera coordinate to lidar coordinate.
|
565 |
+
P2 (p.array, shape=[4, 4]): Intrinsics of Camera2.
|
566 |
+
image_shape (list[int]): Shape of image.
|
567 |
+
|
568 |
+
Returns:
|
569 |
+
np.ndarray, shape=[N, 3+dims]: Filtered points.
|
570 |
+
"""
|
571 |
+
# 5x faster than remove_outside_points_v1(2ms vs 10ms)
|
572 |
+
C, R, T = projection_matrix_to_CRT_kitti(P2)
|
573 |
+
image_bbox = [0, 0, image_shape[1], image_shape[0]]
|
574 |
+
frustum = get_frustum(image_bbox, C)
|
575 |
+
frustum -= T
|
576 |
+
frustum = np.linalg.inv(R) @ frustum.T
|
577 |
+
frustum = camera_to_lidar(frustum.T, rect, Trv2c)
|
578 |
+
frustum_surfaces = corner_to_surfaces_3d_jit(frustum[np.newaxis, ...])
|
579 |
+
indices = points_in_convex_polygon_3d_jit(points[:, :3], frustum_surfaces)
|
580 |
+
points = points[indices.reshape([-1])]
|
581 |
+
return points
|
582 |
+
|
583 |
+
|
584 |
+
def get_frustum(bbox_image, C, near_clip=0.001, far_clip=100):
|
585 |
+
"""Get frustum corners in camera coordinates.
|
586 |
+
|
587 |
+
Args:
|
588 |
+
bbox_image (list[int]): box in image coordinates.
|
589 |
+
C (np.ndarray): Intrinsics.
|
590 |
+
near_clip (float, optional): Nearest distance of frustum.
|
591 |
+
Defaults to 0.001.
|
592 |
+
far_clip (float, optional): Farthest distance of frustum.
|
593 |
+
Defaults to 100.
|
594 |
+
|
595 |
+
Returns:
|
596 |
+
np.ndarray, shape=[8, 3]: coordinates of frustum corners.
|
597 |
+
"""
|
598 |
+
fku = C[0, 0]
|
599 |
+
fkv = -C[1, 1]
|
600 |
+
u0v0 = C[0:2, 2]
|
601 |
+
z_points = np.array(
|
602 |
+
[near_clip] * 4 + [far_clip] * 4, dtype=C.dtype)[:, np.newaxis]
|
603 |
+
b = bbox_image
|
604 |
+
box_corners = np.array(
|
605 |
+
[[b[0], b[1]], [b[0], b[3]], [b[2], b[3]], [b[2], b[1]]],
|
606 |
+
dtype=C.dtype)
|
607 |
+
near_box_corners = (box_corners - u0v0) / np.array(
|
608 |
+
[fku / near_clip, -fkv / near_clip], dtype=C.dtype)
|
609 |
+
far_box_corners = (box_corners - u0v0) / np.array(
|
610 |
+
[fku / far_clip, -fkv / far_clip], dtype=C.dtype)
|
611 |
+
ret_xy = np.concatenate([near_box_corners, far_box_corners],
|
612 |
+
axis=0) # [8, 2]
|
613 |
+
ret_xyz = np.concatenate([ret_xy, z_points], axis=1)
|
614 |
+
return ret_xyz
|
615 |
+
|
616 |
+
|
617 |
+
def surface_equ_3d(polygon_surfaces):
|
618 |
+
"""
|
619 |
+
|
620 |
+
Args:
|
621 |
+
polygon_surfaces (np.ndarray): Polygon surfaces with shape of
|
622 |
+
[num_polygon, max_num_surfaces, max_num_points_of_surface, 3].
|
623 |
+
All surfaces' normal vector must direct to internal.
|
624 |
+
Max_num_points_of_surface must at least 3.
|
625 |
+
|
626 |
+
Returns:
|
627 |
+
tuple: normal vector and its direction.
|
628 |
+
"""
|
629 |
+
# return [a, b, c], d in ax+by+cz+d=0
|
630 |
+
# polygon_surfaces: [num_polygon, num_surfaces, num_points_of_polygon, 3]
|
631 |
+
surface_vec = polygon_surfaces[:, :, :2, :] - \
|
632 |
+
polygon_surfaces[:, :, 1:3, :]
|
633 |
+
# normal_vec: [..., 3]
|
634 |
+
normal_vec = np.cross(surface_vec[:, :, 0, :], surface_vec[:, :, 1, :])
|
635 |
+
# print(normal_vec.shape, points[..., 0, :].shape)
|
636 |
+
# d = -np.inner(normal_vec, points[..., 0, :])
|
637 |
+
d = np.einsum('aij, aij->ai', normal_vec, polygon_surfaces[:, :, 0, :])
|
638 |
+
return normal_vec, -d
|
639 |
+
|
640 |
+
|
641 |
+
@numba.njit
|
642 |
+
def _points_in_convex_polygon_3d_jit(points, polygon_surfaces, normal_vec, d,
|
643 |
+
num_surfaces):
|
644 |
+
"""
|
645 |
+
Args:
|
646 |
+
points (np.ndarray): Input points with shape of (num_points, 3).
|
647 |
+
polygon_surfaces (np.ndarray): Polygon surfaces with shape of
|
648 |
+
(num_polygon, max_num_surfaces, max_num_points_of_surface, 3).
|
649 |
+
All surfaces' normal vector must direct to internal.
|
650 |
+
Max_num_points_of_surface must at least 3.
|
651 |
+
normal_vec (np.ndarray): Normal vector of polygon_surfaces.
|
652 |
+
d (int): Directions of normal vector.
|
653 |
+
num_surfaces (np.ndarray): Number of surfaces a polygon contains
|
654 |
+
shape of (num_polygon).
|
655 |
+
|
656 |
+
Returns:
|
657 |
+
np.ndarray: Result matrix with the shape of [num_points, num_polygon].
|
658 |
+
"""
|
659 |
+
max_num_surfaces, max_num_points_of_surface = polygon_surfaces.shape[1:3]
|
660 |
+
num_points = points.shape[0]
|
661 |
+
num_polygons = polygon_surfaces.shape[0]
|
662 |
+
ret = np.ones((num_points, num_polygons), dtype=np.bool_)
|
663 |
+
sign = 0.0
|
664 |
+
for i in range(num_points):
|
665 |
+
for j in range(num_polygons):
|
666 |
+
for k in range(max_num_surfaces):
|
667 |
+
if k > num_surfaces[j]:
|
668 |
+
break
|
669 |
+
sign = (
|
670 |
+
points[i, 0] * normal_vec[j, k, 0] +
|
671 |
+
points[i, 1] * normal_vec[j, k, 1] +
|
672 |
+
points[i, 2] * normal_vec[j, k, 2] + d[j, k])
|
673 |
+
if sign >= 0:
|
674 |
+
ret[i, j] = False
|
675 |
+
break
|
676 |
+
return ret
|
677 |
+
|
678 |
+
|
679 |
+
def points_in_convex_polygon_3d_jit(points,
|
680 |
+
polygon_surfaces,
|
681 |
+
num_surfaces=None):
|
682 |
+
"""Check points is in 3d convex polygons.
|
683 |
+
|
684 |
+
Args:
|
685 |
+
points (np.ndarray): Input points with shape of (num_points, 3).
|
686 |
+
polygon_surfaces (np.ndarray): Polygon surfaces with shape of
|
687 |
+
(num_polygon, max_num_surfaces, max_num_points_of_surface, 3).
|
688 |
+
All surfaces' normal vector must direct to internal.
|
689 |
+
Max_num_points_of_surface must at least 3.
|
690 |
+
num_surfaces (np.ndarray, optional): Number of surfaces a polygon
|
691 |
+
contains shape of (num_polygon). Defaults to None.
|
692 |
+
|
693 |
+
Returns:
|
694 |
+
np.ndarray: Result matrix with the shape of [num_points, num_polygon].
|
695 |
+
"""
|
696 |
+
max_num_surfaces, max_num_points_of_surface = polygon_surfaces.shape[1:3]
|
697 |
+
# num_points = points.shape[0]
|
698 |
+
num_polygons = polygon_surfaces.shape[0]
|
699 |
+
if num_surfaces is None:
|
700 |
+
num_surfaces = np.full((num_polygons, ), 9999999, dtype=np.int64)
|
701 |
+
normal_vec, d = surface_equ_3d(polygon_surfaces[:, :, :3, :])
|
702 |
+
# normal_vec: [num_polygon, max_num_surfaces, 3]
|
703 |
+
# d: [num_polygon, max_num_surfaces]
|
704 |
+
return _points_in_convex_polygon_3d_jit(points, polygon_surfaces,
|
705 |
+
normal_vec, d, num_surfaces)
|
706 |
+
|
707 |
+
|
708 |
+
@numba.njit
|
709 |
+
def points_in_convex_polygon_jit(points, polygon, clockwise=False):
|
710 |
+
"""Check points is in 2d convex polygons. True when point in polygon.
|
711 |
+
|
712 |
+
Args:
|
713 |
+
points (np.ndarray): Input points with the shape of [num_points, 2].
|
714 |
+
polygon (np.ndarray): Input polygon with the shape of
|
715 |
+
[num_polygon, num_points_of_polygon, 2].
|
716 |
+
clockwise (bool, optional): Indicate polygon is clockwise. Defaults
|
717 |
+
to True.
|
718 |
+
|
719 |
+
Returns:
|
720 |
+
np.ndarray: Result matrix with the shape of [num_points, num_polygon].
|
721 |
+
"""
|
722 |
+
# first convert polygon to directed lines
|
723 |
+
num_points_of_polygon = polygon.shape[1]
|
724 |
+
num_points = points.shape[0]
|
725 |
+
num_polygons = polygon.shape[0]
|
726 |
+
# vec for all the polygons
|
727 |
+
if clockwise:
|
728 |
+
vec1 = polygon - polygon[:,
|
729 |
+
np.array([num_points_of_polygon - 1] + list(
|
730 |
+
range(num_points_of_polygon - 1))), :]
|
731 |
+
else:
|
732 |
+
vec1 = polygon[:,
|
733 |
+
np.array([num_points_of_polygon - 1] +
|
734 |
+
list(range(num_points_of_polygon -
|
735 |
+
1))), :] - polygon
|
736 |
+
ret = np.zeros((num_points, num_polygons), dtype=np.bool_)
|
737 |
+
success = True
|
738 |
+
cross = 0.0
|
739 |
+
for i in range(num_points):
|
740 |
+
for j in range(num_polygons):
|
741 |
+
success = True
|
742 |
+
for k in range(num_points_of_polygon):
|
743 |
+
vec = vec1[j, k]
|
744 |
+
cross = vec[1] * (polygon[j, k, 0] - points[i, 0])
|
745 |
+
cross -= vec[0] * (polygon[j, k, 1] - points[i, 1])
|
746 |
+
if cross >= 0:
|
747 |
+
success = False
|
748 |
+
break
|
749 |
+
ret[i, j] = success
|
750 |
+
return ret
|
751 |
+
|
752 |
+
|
753 |
+
def boxes3d_to_corners3d_lidar(boxes3d, bottom_center=True):
|
754 |
+
"""Convert kitti center boxes to corners.
|
755 |
+
|
756 |
+
7 -------- 4
|
757 |
+
/| /|
|
758 |
+
6 -------- 5 .
|
759 |
+
| | | |
|
760 |
+
. 3 -------- 0
|
761 |
+
|/ |/
|
762 |
+
2 -------- 1
|
763 |
+
|
764 |
+
Note:
|
765 |
+
This function is for LiDAR boxes only.
|
766 |
+
|
767 |
+
Args:
|
768 |
+
boxes3d (np.ndarray): Boxes with shape of (N, 7)
|
769 |
+
[x, y, z, x_size, y_size, z_size, ry] in LiDAR coords,
|
770 |
+
see the definition of ry in KITTI dataset.
|
771 |
+
bottom_center (bool, optional): Whether z is on the bottom center
|
772 |
+
of object. Defaults to True.
|
773 |
+
|
774 |
+
Returns:
|
775 |
+
np.ndarray: Box corners with the shape of [N, 8, 3].
|
776 |
+
"""
|
777 |
+
boxes_num = boxes3d.shape[0]
|
778 |
+
x_size, y_size, z_size = boxes3d[:, 3], boxes3d[:, 4], boxes3d[:, 5]
|
779 |
+
x_corners = np.array([
|
780 |
+
x_size / 2., -x_size / 2., -x_size / 2., x_size / 2., x_size / 2.,
|
781 |
+
-x_size / 2., -x_size / 2., x_size / 2.
|
782 |
+
],
|
783 |
+
dtype=np.float32).T
|
784 |
+
y_corners = np.array([
|
785 |
+
-y_size / 2., -y_size / 2., y_size / 2., y_size / 2., -y_size / 2.,
|
786 |
+
-y_size / 2., y_size / 2., y_size / 2.
|
787 |
+
],
|
788 |
+
dtype=np.float32).T
|
789 |
+
if bottom_center:
|
790 |
+
z_corners = np.zeros((boxes_num, 8), dtype=np.float32)
|
791 |
+
z_corners[:, 4:8] = z_size.reshape(boxes_num, 1).repeat(
|
792 |
+
4, axis=1) # (N, 8)
|
793 |
+
else:
|
794 |
+
z_corners = np.array([
|
795 |
+
-z_size / 2., -z_size / 2., -z_size / 2., -z_size / 2.,
|
796 |
+
z_size / 2., z_size / 2., z_size / 2., z_size / 2.
|
797 |
+
],
|
798 |
+
dtype=np.float32).T
|
799 |
+
|
800 |
+
ry = boxes3d[:, 6]
|
801 |
+
zeros, ones = np.zeros(
|
802 |
+
ry.size, dtype=np.float32), np.ones(
|
803 |
+
ry.size, dtype=np.float32)
|
804 |
+
rot_list = np.array([[np.cos(ry), np.sin(ry), zeros],
|
805 |
+
[-np.sin(ry), np.cos(ry), zeros],
|
806 |
+
[zeros, zeros, ones]]) # (3, 3, N)
|
807 |
+
R_list = np.transpose(rot_list, (2, 0, 1)) # (N, 3, 3)
|
808 |
+
|
809 |
+
temp_corners = np.concatenate((x_corners.reshape(
|
810 |
+
-1, 8, 1), y_corners.reshape(-1, 8, 1), z_corners.reshape(-1, 8, 1)),
|
811 |
+
axis=2) # (N, 8, 3)
|
812 |
+
rotated_corners = np.matmul(temp_corners, R_list) # (N, 8, 3)
|
813 |
+
x_corners = rotated_corners[:, :, 0]
|
814 |
+
y_corners = rotated_corners[:, :, 1]
|
815 |
+
z_corners = rotated_corners[:, :, 2]
|
816 |
+
|
817 |
+
x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2]
|
818 |
+
|
819 |
+
x = x_loc.reshape(-1, 1) + x_corners.reshape(-1, 8)
|
820 |
+
y = y_loc.reshape(-1, 1) + y_corners.reshape(-1, 8)
|
821 |
+
z = z_loc.reshape(-1, 1) + z_corners.reshape(-1, 8)
|
822 |
+
|
823 |
+
corners = np.concatenate(
|
824 |
+
(x.reshape(-1, 8, 1), y.reshape(-1, 8, 1), z.reshape(-1, 8, 1)),
|
825 |
+
axis=2)
|
826 |
+
|
827 |
+
return corners.astype(np.float32)
|
det_map/det/dal/mmdet3d/core/bbox/coders/__init__.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
from mmdet.core.bbox import build_bbox_coder
|
3 |
+
from .transfusion_bbox_coder import TransFusionBBoxCoder
|
4 |
+
|
5 |
+
__all__ = [
|
6 |
+
'build_bbox_coder','TransFusionBBoxCoder'
|
7 |
+
]
|
det_map/det/dal/mmdet3d/core/bbox/coders/transfusion_bbox_coder.py
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
from mmdet.core.bbox import BaseBBoxCoder
|
4 |
+
from mmdet.core.bbox.builder import BBOX_CODERS
|
5 |
+
|
6 |
+
|
7 |
+
@BBOX_CODERS.register_module()
|
8 |
+
class TransFusionBBoxCoder(BaseBBoxCoder):
|
9 |
+
def __init__(self,
|
10 |
+
pc_range,
|
11 |
+
out_size_factor,
|
12 |
+
voxel_size,
|
13 |
+
post_center_range=None,
|
14 |
+
score_threshold=None,
|
15 |
+
code_size=8,
|
16 |
+
):
|
17 |
+
self.pc_range = pc_range
|
18 |
+
self.out_size_factor = out_size_factor
|
19 |
+
self.voxel_size = voxel_size
|
20 |
+
self.post_center_range = post_center_range
|
21 |
+
self.score_threshold = score_threshold
|
22 |
+
self.code_size = code_size
|
23 |
+
|
24 |
+
def encode(self, dst_boxes):
|
25 |
+
targets = torch.zeros([dst_boxes.shape[0], self.code_size]).to(dst_boxes.device)
|
26 |
+
targets[:, 0] = (dst_boxes[:, 0] - self.pc_range[0]) / (self.out_size_factor * self.voxel_size[0])
|
27 |
+
targets[:, 1] = (dst_boxes[:, 1] - self.pc_range[1]) / (self.out_size_factor * self.voxel_size[1])
|
28 |
+
# targets[:, 2] = (dst_boxes[:, 2] - self.post_center_range[2]) / (self.post_center_range[5] - self.post_center_range[2])
|
29 |
+
targets[:, 3] = dst_boxes[:, 3].log()
|
30 |
+
targets[:, 4] = dst_boxes[:, 4].log()
|
31 |
+
targets[:, 5] = dst_boxes[:, 5].log()
|
32 |
+
targets[:, 2] = dst_boxes[:, 2] + dst_boxes[:, 5] * 0.5 # bottom center to gravity center
|
33 |
+
targets[:, 6] = torch.sin(dst_boxes[:, 6])
|
34 |
+
targets[:, 7] = torch.cos(dst_boxes[:, 6])
|
35 |
+
if self.code_size == 10:
|
36 |
+
targets[:, 8:10] = dst_boxes[:, 7:]
|
37 |
+
return targets
|
38 |
+
|
39 |
+
def decode(self, heatmap, rot, dim, center, height, vel, filter=False):
|
40 |
+
"""Decode bboxes.
|
41 |
+
Args:
|
42 |
+
heat (torch.Tensor): Heatmap with the shape of [B, num_cls, num_proposals].
|
43 |
+
rot (torch.Tensor): Rotation with the shape of
|
44 |
+
[B, 1, num_proposals].
|
45 |
+
dim (torch.Tensor): Dim of the boxes with the shape of
|
46 |
+
[B, 3, num_proposals].
|
47 |
+
center (torch.Tensor): bev center of the boxes with the shape of
|
48 |
+
[B, 2, num_proposals]. (in feature map metric)
|
49 |
+
hieght (torch.Tensor): height of the boxes with the shape of
|
50 |
+
[B, 2, num_proposals]. (in real world metric)
|
51 |
+
vel (torch.Tensor): Velocity with the shape of [B, 2, num_proposals].
|
52 |
+
filter: if False, return all box without checking score and center_range
|
53 |
+
Returns:
|
54 |
+
list[dict]: Decoded boxes.
|
55 |
+
"""
|
56 |
+
# class label
|
57 |
+
final_preds = heatmap.max(1, keepdims=False).indices
|
58 |
+
final_scores = heatmap.max(1, keepdims=False).values
|
59 |
+
|
60 |
+
# change size to real world metric
|
61 |
+
center[:, 0, :] = center[:, 0, :] * self.out_size_factor * self.voxel_size[0] + self.pc_range[0]
|
62 |
+
center[:, 1, :] = center[:, 1, :] * self.out_size_factor * self.voxel_size[1] + self.pc_range[1]
|
63 |
+
# center[:, 2, :] = center[:, 2, :] * (self.post_center_range[5] - self.post_center_range[2]) + self.post_center_range[2]
|
64 |
+
dim[:, 0, :] = dim[:, 0, :].exp()
|
65 |
+
dim[:, 1, :] = dim[:, 1, :].exp()
|
66 |
+
dim[:, 2, :] = dim[:, 2, :].exp()
|
67 |
+
height = height - dim[:, 2:3, :] * 0.5 # gravity center to bottom center
|
68 |
+
rots, rotc = rot[:, 0:1, :], rot[:, 1:2, :]
|
69 |
+
rot = torch.atan2(rots, rotc)
|
70 |
+
|
71 |
+
if vel is None:
|
72 |
+
final_box_preds = torch.cat([center, height, dim, rot], dim=1).permute(0, 2, 1)
|
73 |
+
else:
|
74 |
+
final_box_preds = torch.cat([center, height, dim, rot, vel], dim=1).permute(0, 2, 1)
|
75 |
+
|
76 |
+
predictions_dicts = []
|
77 |
+
for i in range(heatmap.shape[0]):
|
78 |
+
boxes3d = final_box_preds[i]
|
79 |
+
scores = final_scores[i]
|
80 |
+
labels = final_preds[i]
|
81 |
+
predictions_dict = {
|
82 |
+
'bboxes': boxes3d,
|
83 |
+
'scores': scores,
|
84 |
+
'labels': labels
|
85 |
+
}
|
86 |
+
predictions_dicts.append(predictions_dict)
|
87 |
+
|
88 |
+
if filter is False:
|
89 |
+
return predictions_dicts
|
90 |
+
|
91 |
+
# use score threshold
|
92 |
+
if self.score_threshold is not None:
|
93 |
+
thresh_mask = final_scores > self.score_threshold
|
94 |
+
|
95 |
+
if self.post_center_range is not None:
|
96 |
+
self.post_center_range = torch.tensor(
|
97 |
+
self.post_center_range, device=heatmap.device)
|
98 |
+
mask = (final_box_preds[..., :3] >=
|
99 |
+
self.post_center_range[:3]).all(2)
|
100 |
+
mask &= (final_box_preds[..., :3] <=
|
101 |
+
self.post_center_range[3:]).all(2)
|
102 |
+
|
103 |
+
predictions_dicts = []
|
104 |
+
for i in range(heatmap.shape[0]):
|
105 |
+
cmask = mask[i, :]
|
106 |
+
if self.score_threshold:
|
107 |
+
cmask &= thresh_mask[i]
|
108 |
+
|
109 |
+
boxes3d = final_box_preds[i, cmask]
|
110 |
+
scores = final_scores[i, cmask]
|
111 |
+
labels = final_preds[i, cmask]
|
112 |
+
predictions_dict = {
|
113 |
+
'bboxes': boxes3d,
|
114 |
+
'scores': scores,
|
115 |
+
'labels': labels
|
116 |
+
}
|
117 |
+
|
118 |
+
predictions_dicts.append(predictions_dict)
|
119 |
+
else:
|
120 |
+
raise NotImplementedError(
|
121 |
+
'Need to reorganize output as a batch, only '
|
122 |
+
'support post_center_range is not None for now!')
|
123 |
+
|
124 |
+
return predictions_dicts
|
det_map/det/dal/mmdet3d/core/bbox/iou_calculators/__init__.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
from .iou3d_calculator import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D,
|
3 |
+
axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d,
|
4 |
+
)
|
5 |
+
|
6 |
+
__all__ = [
|
7 |
+
'BboxOverlaps3D',
|
8 |
+
'bbox_overlaps_3d', 'AxisAlignedBboxOverlaps3D',
|
9 |
+
'axis_aligned_bbox_overlaps_3d'
|
10 |
+
]
|
det_map/det/dal/mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py
ADDED
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import torch
|
3 |
+
from mmdet.core.bbox import bbox_overlaps
|
4 |
+
from mmdet.core.bbox.iou_calculators.builder import IOU_CALCULATORS
|
5 |
+
|
6 |
+
from ..structures import get_box_type
|
7 |
+
|
8 |
+
|
9 |
+
@IOU_CALCULATORS.register_module()
|
10 |
+
class BboxOverlaps3D(object):
|
11 |
+
"""3D IoU Calculator.
|
12 |
+
|
13 |
+
Args:
|
14 |
+
coordinate (str): The coordinate system, valid options are
|
15 |
+
'camera', 'lidar', and 'depth'.
|
16 |
+
"""
|
17 |
+
|
18 |
+
def __init__(self, coordinate):
|
19 |
+
assert coordinate in ['camera', 'lidar', 'depth']
|
20 |
+
self.coordinate = coordinate
|
21 |
+
|
22 |
+
def __call__(self, bboxes1, bboxes2, mode='iou'):
|
23 |
+
"""Calculate 3D IoU using cuda implementation.
|
24 |
+
|
25 |
+
Note:
|
26 |
+
This function calculate the IoU of 3D boxes based on their volumes.
|
27 |
+
IoU calculator ``:class:BboxOverlaps3D`` uses this function to
|
28 |
+
calculate the actual 3D IoUs of boxes.
|
29 |
+
|
30 |
+
Args:
|
31 |
+
bboxes1 (torch.Tensor): with shape (N, 7+C),
|
32 |
+
(x, y, z, x_size, y_size, z_size, ry, v*).
|
33 |
+
bboxes2 (torch.Tensor): with shape (M, 7+C),
|
34 |
+
(x, y, z, x_size, y_size, z_size, ry, v*).
|
35 |
+
mode (str): "iou" (intersection over union) or
|
36 |
+
iof (intersection over foreground).
|
37 |
+
|
38 |
+
Return:
|
39 |
+
torch.Tensor: Bbox overlaps results of bboxes1 and bboxes2
|
40 |
+
with shape (M, N) (aligned mode is not supported currently).
|
41 |
+
"""
|
42 |
+
return bbox_overlaps_3d(bboxes1, bboxes2, mode, self.coordinate)
|
43 |
+
|
44 |
+
def __repr__(self):
|
45 |
+
"""str: return a string that describes the module"""
|
46 |
+
repr_str = self.__class__.__name__
|
47 |
+
repr_str += f'(coordinate={self.coordinate}'
|
48 |
+
return repr_str
|
49 |
+
|
50 |
+
|
51 |
+
def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'):
|
52 |
+
"""Calculate 3D IoU using cuda implementation.
|
53 |
+
|
54 |
+
Note:
|
55 |
+
This function calculates the IoU of 3D boxes based on their volumes.
|
56 |
+
IoU calculator :class:`BboxOverlaps3D` uses this function to
|
57 |
+
calculate the actual IoUs of boxes.
|
58 |
+
|
59 |
+
Args:
|
60 |
+
bboxes1 (torch.Tensor): with shape (N, 7+C),
|
61 |
+
(x, y, z, x_size, y_size, z_size, ry, v*).
|
62 |
+
bboxes2 (torch.Tensor): with shape (M, 7+C),
|
63 |
+
(x, y, z, x_size, y_size, z_size, ry, v*).
|
64 |
+
mode (str): "iou" (intersection over union) or
|
65 |
+
iof (intersection over foreground).
|
66 |
+
coordinate (str): 'camera' or 'lidar' coordinate system.
|
67 |
+
|
68 |
+
Return:
|
69 |
+
torch.Tensor: Bbox overlaps results of bboxes1 and bboxes2
|
70 |
+
with shape (M, N) (aligned mode is not supported currently).
|
71 |
+
"""
|
72 |
+
assert bboxes1.size(-1) == bboxes2.size(-1) >= 7
|
73 |
+
|
74 |
+
box_type, _ = get_box_type(coordinate)
|
75 |
+
|
76 |
+
bboxes1 = box_type(bboxes1, box_dim=bboxes1.shape[-1])
|
77 |
+
bboxes2 = box_type(bboxes2, box_dim=bboxes2.shape[-1])
|
78 |
+
|
79 |
+
return bboxes1.overlaps(bboxes1, bboxes2, mode=mode)
|
80 |
+
|
81 |
+
|
82 |
+
@IOU_CALCULATORS.register_module()
|
83 |
+
class AxisAlignedBboxOverlaps3D(object):
|
84 |
+
"""Axis-aligned 3D Overlaps (IoU) Calculator."""
|
85 |
+
|
86 |
+
def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False):
|
87 |
+
"""Calculate IoU between 2D bboxes.
|
88 |
+
|
89 |
+
Args:
|
90 |
+
bboxes1 (Tensor): shape (B, m, 6) in <x1, y1, z1, x2, y2, z2>
|
91 |
+
format or empty.
|
92 |
+
bboxes2 (Tensor): shape (B, n, 6) in <x1, y1, z1, x2, y2, z2>
|
93 |
+
format or empty.
|
94 |
+
B indicates the batch dim, in shape (B1, B2, ..., Bn).
|
95 |
+
If ``is_aligned`` is ``True``, then m and n must be equal.
|
96 |
+
mode (str): "iou" (intersection over union) or "giou" (generalized
|
97 |
+
intersection over union).
|
98 |
+
is_aligned (bool, optional): If True, then m and n must be equal.
|
99 |
+
Defaults to False.
|
100 |
+
Returns:
|
101 |
+
Tensor: shape (m, n) if ``is_aligned`` is False else shape (m,)
|
102 |
+
"""
|
103 |
+
assert bboxes1.size(-1) == bboxes2.size(-1) == 6
|
104 |
+
return axis_aligned_bbox_overlaps_3d(bboxes1, bboxes2, mode,
|
105 |
+
is_aligned)
|
106 |
+
|
107 |
+
def __repr__(self):
|
108 |
+
"""str: a string describing the module"""
|
109 |
+
repr_str = self.__class__.__name__ + '()'
|
110 |
+
return repr_str
|
111 |
+
|
112 |
+
|
113 |
+
def axis_aligned_bbox_overlaps_3d(bboxes1,
|
114 |
+
bboxes2,
|
115 |
+
mode='iou',
|
116 |
+
is_aligned=False,
|
117 |
+
eps=1e-6):
|
118 |
+
"""Calculate overlap between two set of axis aligned 3D bboxes. If
|
119 |
+
``is_aligned`` is ``False``, then calculate the overlaps between each bbox
|
120 |
+
of bboxes1 and bboxes2, otherwise the overlaps between each aligned pair of
|
121 |
+
bboxes1 and bboxes2.
|
122 |
+
|
123 |
+
Args:
|
124 |
+
bboxes1 (Tensor): shape (B, m, 6) in <x1, y1, z1, x2, y2, z2>
|
125 |
+
format or empty.
|
126 |
+
bboxes2 (Tensor): shape (B, n, 6) in <x1, y1, z1, x2, y2, z2>
|
127 |
+
format or empty.
|
128 |
+
B indicates the batch dim, in shape (B1, B2, ..., Bn).
|
129 |
+
If ``is_aligned`` is ``True``, then m and n must be equal.
|
130 |
+
mode (str): "iou" (intersection over union) or "giou" (generalized
|
131 |
+
intersection over union).
|
132 |
+
is_aligned (bool, optional): If True, then m and n must be equal.
|
133 |
+
Defaults to False.
|
134 |
+
eps (float, optional): A value added to the denominator for numerical
|
135 |
+
stability. Defaults to 1e-6.
|
136 |
+
|
137 |
+
Returns:
|
138 |
+
Tensor: shape (m, n) if ``is_aligned`` is False else shape (m,)
|
139 |
+
|
140 |
+
Example:
|
141 |
+
>>> bboxes1 = torch.FloatTensor([
|
142 |
+
>>> [0, 0, 0, 10, 10, 10],
|
143 |
+
>>> [10, 10, 10, 20, 20, 20],
|
144 |
+
>>> [32, 32, 32, 38, 40, 42],
|
145 |
+
>>> ])
|
146 |
+
>>> bboxes2 = torch.FloatTensor([
|
147 |
+
>>> [0, 0, 0, 10, 20, 20],
|
148 |
+
>>> [0, 10, 10, 10, 19, 20],
|
149 |
+
>>> [10, 10, 10, 20, 20, 20],
|
150 |
+
>>> ])
|
151 |
+
>>> overlaps = axis_aligned_bbox_overlaps_3d(bboxes1, bboxes2)
|
152 |
+
>>> assert overlaps.shape == (3, 3)
|
153 |
+
>>> overlaps = bbox_overlaps(bboxes1, bboxes2, is_aligned=True)
|
154 |
+
>>> assert overlaps.shape == (3, )
|
155 |
+
Example:
|
156 |
+
>>> empty = torch.empty(0, 6)
|
157 |
+
>>> nonempty = torch.FloatTensor([[0, 0, 0, 10, 9, 10]])
|
158 |
+
>>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1)
|
159 |
+
>>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0)
|
160 |
+
>>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0)
|
161 |
+
"""
|
162 |
+
|
163 |
+
assert mode in ['iou', 'giou'], f'Unsupported mode {mode}'
|
164 |
+
# Either the boxes are empty or the length of boxes's last dimension is 6
|
165 |
+
assert (bboxes1.size(-1) == 6 or bboxes1.size(0) == 0)
|
166 |
+
assert (bboxes2.size(-1) == 6 or bboxes2.size(0) == 0)
|
167 |
+
|
168 |
+
# Batch dim must be the same
|
169 |
+
# Batch dim: (B1, B2, ... Bn)
|
170 |
+
assert bboxes1.shape[:-2] == bboxes2.shape[:-2]
|
171 |
+
batch_shape = bboxes1.shape[:-2]
|
172 |
+
|
173 |
+
rows = bboxes1.size(-2)
|
174 |
+
cols = bboxes2.size(-2)
|
175 |
+
if is_aligned:
|
176 |
+
assert rows == cols
|
177 |
+
|
178 |
+
if rows * cols == 0:
|
179 |
+
if is_aligned:
|
180 |
+
return bboxes1.new(batch_shape + (rows,))
|
181 |
+
else:
|
182 |
+
return bboxes1.new(batch_shape + (rows, cols))
|
183 |
+
|
184 |
+
area1 = (bboxes1[..., 3] -
|
185 |
+
bboxes1[..., 0]) * (bboxes1[..., 4] - bboxes1[..., 1]) * (
|
186 |
+
bboxes1[..., 5] - bboxes1[..., 2])
|
187 |
+
area2 = (bboxes2[..., 3] -
|
188 |
+
bboxes2[..., 0]) * (bboxes2[..., 4] - bboxes2[..., 1]) * (
|
189 |
+
bboxes2[..., 5] - bboxes2[..., 2])
|
190 |
+
|
191 |
+
if is_aligned:
|
192 |
+
lt = torch.max(bboxes1[..., :3], bboxes2[..., :3]) # [B, rows, 3]
|
193 |
+
rb = torch.min(bboxes1[..., 3:], bboxes2[..., 3:]) # [B, rows, 3]
|
194 |
+
|
195 |
+
wh = (rb - lt).clamp(min=0) # [B, rows, 2]
|
196 |
+
overlap = wh[..., 0] * wh[..., 1] * wh[..., 2]
|
197 |
+
|
198 |
+
if mode in ['iou', 'giou']:
|
199 |
+
union = area1 + area2 - overlap
|
200 |
+
else:
|
201 |
+
union = area1
|
202 |
+
if mode == 'giou':
|
203 |
+
enclosed_lt = torch.min(bboxes1[..., :3], bboxes2[..., :3])
|
204 |
+
enclosed_rb = torch.max(bboxes1[..., 3:], bboxes2[..., 3:])
|
205 |
+
else:
|
206 |
+
lt = torch.max(bboxes1[..., :, None, :3],
|
207 |
+
bboxes2[..., None, :, :3]) # [B, rows, cols, 3]
|
208 |
+
rb = torch.min(bboxes1[..., :, None, 3:],
|
209 |
+
bboxes2[..., None, :, 3:]) # [B, rows, cols, 3]
|
210 |
+
|
211 |
+
wh = (rb - lt).clamp(min=0) # [B, rows, cols, 3]
|
212 |
+
overlap = wh[..., 0] * wh[..., 1] * wh[..., 2]
|
213 |
+
|
214 |
+
if mode in ['iou', 'giou']:
|
215 |
+
union = area1[..., None] + area2[..., None, :] - overlap
|
216 |
+
if mode == 'giou':
|
217 |
+
enclosed_lt = torch.min(bboxes1[..., :, None, :3],
|
218 |
+
bboxes2[..., None, :, :3])
|
219 |
+
enclosed_rb = torch.max(bboxes1[..., :, None, 3:],
|
220 |
+
bboxes2[..., None, :, 3:])
|
221 |
+
|
222 |
+
eps = union.new_tensor([eps])
|
223 |
+
union = torch.max(union, eps)
|
224 |
+
ious = overlap / union
|
225 |
+
if mode in ['iou']:
|
226 |
+
return ious
|
227 |
+
# calculate gious
|
228 |
+
enclose_wh = (enclosed_rb - enclosed_lt).clamp(min=0)
|
229 |
+
enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1] * enclose_wh[..., 2]
|
230 |
+
enclose_area = torch.max(enclose_area, eps)
|
231 |
+
gious = ious - (enclose_area - union) / enclose_area
|
232 |
+
return gious
|
det_map/det/dal/mmdet3d/core/bbox/structures/__init__.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
from .base_box3d import BaseInstance3DBoxes
|
3 |
+
from .box_3d_mode import Box3DMode
|
4 |
+
from .cam_box3d import CameraInstance3DBoxes
|
5 |
+
from .coord_3d_mode import Coord3DMode
|
6 |
+
from .depth_box3d import DepthInstance3DBoxes
|
7 |
+
from .lidar_box3d import LiDARInstance3DBoxes
|
8 |
+
from .utils import (get_box_type, get_proj_mat_by_coord_type, limit_period,
|
9 |
+
mono_cam_box2vis, points_cam2img, points_img2cam,
|
10 |
+
rotation_3d_in_axis, xywhr2xyxyr)
|
11 |
+
|
12 |
+
__all__ = [
|
13 |
+
'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes',
|
14 |
+
'CameraInstance3DBoxes', 'DepthInstance3DBoxes', 'xywhr2xyxyr',
|
15 |
+
'get_box_type', 'rotation_3d_in_axis', 'limit_period', 'points_cam2img',
|
16 |
+
'points_img2cam', 'Coord3DMode', 'mono_cam_box2vis',
|
17 |
+
'get_proj_mat_by_coord_type'
|
18 |
+
]
|
det_map/det/dal/mmdet3d/core/bbox/structures/base_box3d.py
ADDED
@@ -0,0 +1,578 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import warnings
|
3 |
+
from abc import abstractmethod
|
4 |
+
|
5 |
+
import numpy as np
|
6 |
+
import torch
|
7 |
+
from mmcv.ops import box_iou_rotated, points_in_boxes_all, points_in_boxes_part
|
8 |
+
|
9 |
+
from .utils import limit_period
|
10 |
+
|
11 |
+
|
12 |
+
class BaseInstance3DBoxes(object):
|
13 |
+
"""Base class for 3D Boxes.
|
14 |
+
|
15 |
+
Note:
|
16 |
+
The box is bottom centered, i.e. the relative position of origin in
|
17 |
+
the box is (0.5, 0.5, 0).
|
18 |
+
|
19 |
+
Args:
|
20 |
+
tensor (torch.Tensor | np.ndarray | list): a N x box_dim matrix.
|
21 |
+
box_dim (int): Number of the dimension of a box.
|
22 |
+
Each row is (x, y, z, x_size, y_size, z_size, yaw).
|
23 |
+
Defaults to 7.
|
24 |
+
with_yaw (bool): Whether the box is with yaw rotation.
|
25 |
+
If False, the value of yaw will be set to 0 as minmax boxes.
|
26 |
+
Defaults to True.
|
27 |
+
origin (tuple[float], optional): Relative position of the box origin.
|
28 |
+
Defaults to (0.5, 0.5, 0). This will guide the box be converted to
|
29 |
+
(0.5, 0.5, 0) mode.
|
30 |
+
|
31 |
+
Attributes:
|
32 |
+
tensor (torch.Tensor): Float matrix of N x box_dim.
|
33 |
+
box_dim (int): Integer indicating the dimension of a box.
|
34 |
+
Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
|
35 |
+
with_yaw (bool): If True, the value of yaw will be set to 0 as minmax
|
36 |
+
boxes.
|
37 |
+
"""
|
38 |
+
|
39 |
+
def __init__(self, tensor, box_dim=7, with_yaw=True, origin=(0.5, 0.5, 0)):
|
40 |
+
if isinstance(tensor, torch.Tensor):
|
41 |
+
device = tensor.device
|
42 |
+
else:
|
43 |
+
device = torch.device('cpu')
|
44 |
+
tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
|
45 |
+
if tensor.numel() == 0:
|
46 |
+
# Use reshape, so we don't end up creating a new tensor that
|
47 |
+
# does not depend on the inputs (and consequently confuses jit)
|
48 |
+
tensor = tensor.reshape((0, box_dim)).to(
|
49 |
+
dtype=torch.float32, device=device)
|
50 |
+
assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()
|
51 |
+
|
52 |
+
if tensor.shape[-1] == 6:
|
53 |
+
# If the dimension of boxes is 6, we expand box_dim by padding
|
54 |
+
# 0 as a fake yaw and set with_yaw to False.
|
55 |
+
assert box_dim == 6
|
56 |
+
fake_rot = tensor.new_zeros(tensor.shape[0], 1)
|
57 |
+
tensor = torch.cat((tensor, fake_rot), dim=-1)
|
58 |
+
self.box_dim = box_dim + 1
|
59 |
+
self.with_yaw = False
|
60 |
+
else:
|
61 |
+
self.box_dim = box_dim
|
62 |
+
self.with_yaw = with_yaw
|
63 |
+
self.tensor = tensor.clone()
|
64 |
+
|
65 |
+
if origin != (0.5, 0.5, 0):
|
66 |
+
dst = self.tensor.new_tensor((0.5, 0.5, 0))
|
67 |
+
src = self.tensor.new_tensor(origin)
|
68 |
+
self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)
|
69 |
+
|
70 |
+
@property
|
71 |
+
def volume(self):
|
72 |
+
"""torch.Tensor: A vector with volume of each box."""
|
73 |
+
return self.tensor[:, 3] * self.tensor[:, 4] * self.tensor[:, 5]
|
74 |
+
|
75 |
+
@property
|
76 |
+
def dims(self):
|
77 |
+
"""torch.Tensor: Size dimensions of each box in shape (N, 3)."""
|
78 |
+
return self.tensor[:, 3:6]
|
79 |
+
|
80 |
+
@property
|
81 |
+
def yaw(self):
|
82 |
+
"""torch.Tensor: A vector with yaw of each box in shape (N, )."""
|
83 |
+
return self.tensor[:, 6]
|
84 |
+
|
85 |
+
@property
|
86 |
+
def height(self):
|
87 |
+
"""torch.Tensor: A vector with height of each box in shape (N, )."""
|
88 |
+
return self.tensor[:, 5]
|
89 |
+
|
90 |
+
@property
|
91 |
+
def top_height(self):
|
92 |
+
"""torch.Tensor:
|
93 |
+
A vector with the top height of each box in shape (N, )."""
|
94 |
+
return self.bottom_height + self.height
|
95 |
+
|
96 |
+
@property
|
97 |
+
def bottom_height(self):
|
98 |
+
"""torch.Tensor:
|
99 |
+
A vector with bottom's height of each box in shape (N, )."""
|
100 |
+
return self.tensor[:, 2]
|
101 |
+
|
102 |
+
@property
|
103 |
+
def center(self):
|
104 |
+
"""Calculate the center of all the boxes.
|
105 |
+
|
106 |
+
Note:
|
107 |
+
In MMDetection3D's convention, the bottom center is
|
108 |
+
usually taken as the default center.
|
109 |
+
|
110 |
+
The relative position of the centers in different kinds of
|
111 |
+
boxes are different, e.g., the relative center of a boxes is
|
112 |
+
(0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0) in lidar.
|
113 |
+
It is recommended to use ``bottom_center`` or ``gravity_center``
|
114 |
+
for clearer usage.
|
115 |
+
|
116 |
+
Returns:
|
117 |
+
torch.Tensor: A tensor with center of each box in shape (N, 3).
|
118 |
+
"""
|
119 |
+
return self.bottom_center
|
120 |
+
|
121 |
+
@property
|
122 |
+
def bottom_center(self):
|
123 |
+
"""torch.Tensor: A tensor with center of each box in shape (N, 3)."""
|
124 |
+
return self.tensor[:, :3]
|
125 |
+
|
126 |
+
@property
|
127 |
+
def gravity_center(self):
|
128 |
+
"""torch.Tensor: A tensor with center of each box in shape (N, 3)."""
|
129 |
+
pass
|
130 |
+
|
131 |
+
@property
|
132 |
+
def corners(self):
|
133 |
+
"""torch.Tensor:
|
134 |
+
a tensor with 8 corners of each box in shape (N, 8, 3)."""
|
135 |
+
pass
|
136 |
+
|
137 |
+
@property
|
138 |
+
def bev(self):
|
139 |
+
"""torch.Tensor: 2D BEV box of each box with rotation
|
140 |
+
in XYWHR format, in shape (N, 5)."""
|
141 |
+
return self.tensor[:, [0, 1, 3, 4, 6]]
|
142 |
+
|
143 |
+
@property
|
144 |
+
def nearest_bev(self):
|
145 |
+
"""torch.Tensor: A tensor of 2D BEV box of each box
|
146 |
+
without rotation."""
|
147 |
+
# Obtain BEV boxes with rotation in XYWHR format
|
148 |
+
bev_rotated_boxes = self.bev
|
149 |
+
# convert the rotation to a valid range
|
150 |
+
rotations = bev_rotated_boxes[:, -1]
|
151 |
+
normed_rotations = torch.abs(limit_period(rotations, 0.5, np.pi))
|
152 |
+
|
153 |
+
# find the center of boxes
|
154 |
+
conditions = (normed_rotations > np.pi / 4)[..., None]
|
155 |
+
bboxes_xywh = torch.where(conditions, bev_rotated_boxes[:,
|
156 |
+
[0, 1, 3, 2]],
|
157 |
+
bev_rotated_boxes[:, :4])
|
158 |
+
|
159 |
+
centers = bboxes_xywh[:, :2]
|
160 |
+
dims = bboxes_xywh[:, 2:]
|
161 |
+
bev_boxes = torch.cat([centers - dims / 2, centers + dims / 2], dim=-1)
|
162 |
+
return bev_boxes
|
163 |
+
|
164 |
+
def in_range_bev(self, box_range):
|
165 |
+
"""Check whether the boxes are in the given range.
|
166 |
+
|
167 |
+
Args:
|
168 |
+
box_range (list | torch.Tensor): the range of box
|
169 |
+
(x_min, y_min, x_max, y_max)
|
170 |
+
|
171 |
+
Note:
|
172 |
+
The original implementation of SECOND checks whether boxes in
|
173 |
+
a range by checking whether the points are in a convex
|
174 |
+
polygon, we reduce the burden for simpler cases.
|
175 |
+
|
176 |
+
Returns:
|
177 |
+
torch.Tensor: Whether each box is inside the reference range.
|
178 |
+
"""
|
179 |
+
in_range_flags = ((self.bev[:, 0] > box_range[0])
|
180 |
+
& (self.bev[:, 1] > box_range[1])
|
181 |
+
& (self.bev[:, 0] < box_range[2])
|
182 |
+
& (self.bev[:, 1] < box_range[3]))
|
183 |
+
return in_range_flags
|
184 |
+
|
185 |
+
@abstractmethod
|
186 |
+
def rotate(self, angle, points=None):
|
187 |
+
"""Rotate boxes with points (optional) with the given angle or rotation
|
188 |
+
matrix.
|
189 |
+
|
190 |
+
Args:
|
191 |
+
angle (float | torch.Tensor | np.ndarray):
|
192 |
+
Rotation angle or rotation matrix.
|
193 |
+
points (torch.Tensor | numpy.ndarray |
|
194 |
+
:obj:`BasePoints`, optional):
|
195 |
+
Points to rotate. Defaults to None.
|
196 |
+
"""
|
197 |
+
pass
|
198 |
+
|
199 |
+
@abstractmethod
|
200 |
+
def flip(self, bev_direction='horizontal'):
|
201 |
+
"""Flip the boxes in BEV along given BEV direction.
|
202 |
+
|
203 |
+
Args:
|
204 |
+
bev_direction (str, optional): Direction by which to flip.
|
205 |
+
Can be chosen from 'horizontal' and 'vertical'.
|
206 |
+
Defaults to 'horizontal'.
|
207 |
+
"""
|
208 |
+
pass
|
209 |
+
|
210 |
+
def translate(self, trans_vector):
|
211 |
+
"""Translate boxes with the given translation vector.
|
212 |
+
|
213 |
+
Args:
|
214 |
+
trans_vector (torch.Tensor): Translation vector of size (1, 3).
|
215 |
+
"""
|
216 |
+
if not isinstance(trans_vector, torch.Tensor):
|
217 |
+
trans_vector = self.tensor.new_tensor(trans_vector)
|
218 |
+
self.tensor[:, :3] += trans_vector
|
219 |
+
|
220 |
+
def in_range_3d(self, box_range):
|
221 |
+
"""Check whether the boxes are in the given range.
|
222 |
+
|
223 |
+
Args:
|
224 |
+
box_range (list | torch.Tensor): The range of box
|
225 |
+
(x_min, y_min, z_min, x_max, y_max, z_max)
|
226 |
+
|
227 |
+
Note:
|
228 |
+
In the original implementation of SECOND, checking whether
|
229 |
+
a box in the range checks whether the points are in a convex
|
230 |
+
polygon, we try to reduce the burden for simpler cases.
|
231 |
+
|
232 |
+
Returns:
|
233 |
+
torch.Tensor: A binary vector indicating whether each box is
|
234 |
+
inside the reference range.
|
235 |
+
"""
|
236 |
+
in_range_flags = ((self.tensor[:, 0] > box_range[0])
|
237 |
+
& (self.tensor[:, 1] > box_range[1])
|
238 |
+
& (self.tensor[:, 2] > box_range[2])
|
239 |
+
& (self.tensor[:, 0] < box_range[3])
|
240 |
+
& (self.tensor[:, 1] < box_range[4])
|
241 |
+
& (self.tensor[:, 2] < box_range[5]))
|
242 |
+
return in_range_flags
|
243 |
+
|
244 |
+
@abstractmethod
|
245 |
+
def convert_to(self, dst, rt_mat=None):
|
246 |
+
"""Convert self to ``dst`` mode.
|
247 |
+
|
248 |
+
Args:
|
249 |
+
dst (:obj:`Box3DMode`): The target Box mode.
|
250 |
+
rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
|
251 |
+
translation matrix between different coordinates.
|
252 |
+
Defaults to None.
|
253 |
+
The conversion from `src` coordinates to `dst` coordinates
|
254 |
+
usually comes along the change of sensors, e.g., from camera
|
255 |
+
to LiDAR. This requires a transformation matrix.
|
256 |
+
|
257 |
+
Returns:
|
258 |
+
:obj:`BaseInstance3DBoxes`: The converted box of the same type
|
259 |
+
in the `dst` mode.
|
260 |
+
"""
|
261 |
+
pass
|
262 |
+
|
263 |
+
def scale(self, scale_factor):
|
264 |
+
"""Scale the box with horizontal and vertical scaling factors.
|
265 |
+
|
266 |
+
Args:
|
267 |
+
scale_factors (float): Scale factors to scale the boxes.
|
268 |
+
"""
|
269 |
+
self.tensor[:, :6] *= scale_factor
|
270 |
+
self.tensor[:, 7:] *= scale_factor # velocity
|
271 |
+
|
272 |
+
def limit_yaw(self, offset=0.5, period=np.pi):
|
273 |
+
"""Limit the yaw to a given period and offset.
|
274 |
+
|
275 |
+
Args:
|
276 |
+
offset (float, optional): The offset of the yaw. Defaults to 0.5.
|
277 |
+
period (float, optional): The expected period. Defaults to np.pi.
|
278 |
+
"""
|
279 |
+
self.tensor[:, 6] = limit_period(self.tensor[:, 6], offset, period)
|
280 |
+
|
281 |
+
def nonempty(self, threshold=0.0):
|
282 |
+
"""Find boxes that are non-empty.
|
283 |
+
|
284 |
+
A box is considered empty,
|
285 |
+
if either of its side is no larger than threshold.
|
286 |
+
|
287 |
+
Args:
|
288 |
+
threshold (float, optional): The threshold of minimal sizes.
|
289 |
+
Defaults to 0.0.
|
290 |
+
|
291 |
+
Returns:
|
292 |
+
torch.Tensor: A binary vector which represents whether each
|
293 |
+
box is empty (False) or non-empty (True).
|
294 |
+
"""
|
295 |
+
box = self.tensor
|
296 |
+
size_x = box[..., 3]
|
297 |
+
size_y = box[..., 4]
|
298 |
+
size_z = box[..., 5]
|
299 |
+
keep = ((size_x > threshold)
|
300 |
+
& (size_y > threshold) & (size_z > threshold))
|
301 |
+
return keep
|
302 |
+
|
303 |
+
def __getitem__(self, item):
|
304 |
+
"""
|
305 |
+
Note:
|
306 |
+
The following usage are allowed:
|
307 |
+
1. `new_boxes = boxes[3]`:
|
308 |
+
return a `Boxes` that contains only one box.
|
309 |
+
2. `new_boxes = boxes[2:10]`:
|
310 |
+
return a slice of boxes.
|
311 |
+
3. `new_boxes = boxes[vector]`:
|
312 |
+
where vector is a torch.BoolTensor with `length = len(boxes)`.
|
313 |
+
Nonzero elements in the vector will be selected.
|
314 |
+
Note that the returned Boxes might share storage with this Boxes,
|
315 |
+
subject to Pytorch's indexing semantics.
|
316 |
+
|
317 |
+
Returns:
|
318 |
+
:obj:`BaseInstance3DBoxes`: A new object of
|
319 |
+
:class:`BaseInstance3DBoxes` after indexing.
|
320 |
+
"""
|
321 |
+
original_type = type(self)
|
322 |
+
if isinstance(item, int):
|
323 |
+
return original_type(
|
324 |
+
self.tensor[item].view(1, -1),
|
325 |
+
box_dim=self.box_dim,
|
326 |
+
with_yaw=self.with_yaw)
|
327 |
+
b = self.tensor[item]
|
328 |
+
assert b.dim() == 2, \
|
329 |
+
f'Indexing on Boxes with {item} failed to return a matrix!'
|
330 |
+
return original_type(b, box_dim=self.box_dim, with_yaw=self.with_yaw)
|
331 |
+
|
332 |
+
def __len__(self):
|
333 |
+
"""int: Number of boxes in the current object."""
|
334 |
+
return self.tensor.shape[0]
|
335 |
+
|
336 |
+
def __repr__(self):
|
337 |
+
"""str: Return a strings that describes the object."""
|
338 |
+
return self.__class__.__name__ + '(\n ' + str(self.tensor) + ')'
|
339 |
+
|
340 |
+
@classmethod
|
341 |
+
def cat(cls, boxes_list):
|
342 |
+
"""Concatenate a list of Boxes into a single Boxes.
|
343 |
+
|
344 |
+
Args:
|
345 |
+
boxes_list (list[:obj:`BaseInstance3DBoxes`]): List of boxes.
|
346 |
+
|
347 |
+
Returns:
|
348 |
+
:obj:`BaseInstance3DBoxes`: The concatenated Boxes.
|
349 |
+
"""
|
350 |
+
assert isinstance(boxes_list, (list, tuple))
|
351 |
+
if len(boxes_list) == 0:
|
352 |
+
return cls(torch.empty(0))
|
353 |
+
assert all(isinstance(box, cls) for box in boxes_list)
|
354 |
+
|
355 |
+
# use torch.cat (v.s. layers.cat)
|
356 |
+
# so the returned boxes never share storage with input
|
357 |
+
cat_boxes = cls(
|
358 |
+
torch.cat([b.tensor for b in boxes_list], dim=0),
|
359 |
+
box_dim=boxes_list[0].tensor.shape[1],
|
360 |
+
with_yaw=boxes_list[0].with_yaw)
|
361 |
+
return cat_boxes
|
362 |
+
|
363 |
+
def to(self, device):
|
364 |
+
"""Convert current boxes to a specific device.
|
365 |
+
|
366 |
+
Args:
|
367 |
+
device (str | :obj:`torch.device`): The name of the device.
|
368 |
+
|
369 |
+
Returns:
|
370 |
+
:obj:`BaseInstance3DBoxes`: A new boxes object on the
|
371 |
+
specific device.
|
372 |
+
"""
|
373 |
+
original_type = type(self)
|
374 |
+
return original_type(
|
375 |
+
self.tensor.to(device),
|
376 |
+
box_dim=self.box_dim,
|
377 |
+
with_yaw=self.with_yaw)
|
378 |
+
|
379 |
+
def clone(self):
|
380 |
+
"""Clone the Boxes.
|
381 |
+
|
382 |
+
Returns:
|
383 |
+
:obj:`BaseInstance3DBoxes`: Box object with the same properties
|
384 |
+
as self.
|
385 |
+
"""
|
386 |
+
original_type = type(self)
|
387 |
+
return original_type(
|
388 |
+
self.tensor.clone(), box_dim=self.box_dim, with_yaw=self.with_yaw)
|
389 |
+
|
390 |
+
@property
|
391 |
+
def device(self):
|
392 |
+
"""str: The device of the boxes are on."""
|
393 |
+
return self.tensor.device
|
394 |
+
|
395 |
+
def __iter__(self):
|
396 |
+
"""Yield a box as a Tensor of shape (4,) at a time.
|
397 |
+
|
398 |
+
Returns:
|
399 |
+
torch.Tensor: A box of shape (4,).
|
400 |
+
"""
|
401 |
+
yield from self.tensor
|
402 |
+
|
403 |
+
@classmethod
|
404 |
+
def height_overlaps(cls, boxes1, boxes2, mode='iou'):
|
405 |
+
"""Calculate height overlaps of two boxes.
|
406 |
+
|
407 |
+
Note:
|
408 |
+
This function calculates the height overlaps between boxes1 and
|
409 |
+
boxes2, boxes1 and boxes2 should be in the same type.
|
410 |
+
|
411 |
+
Args:
|
412 |
+
boxes1 (:obj:`BaseInstance3DBoxes`): Boxes 1 contain N boxes.
|
413 |
+
boxes2 (:obj:`BaseInstance3DBoxes`): Boxes 2 contain M boxes.
|
414 |
+
mode (str, optional): Mode of IoU calculation. Defaults to 'iou'.
|
415 |
+
|
416 |
+
Returns:
|
417 |
+
torch.Tensor: Calculated iou of boxes.
|
418 |
+
"""
|
419 |
+
assert isinstance(boxes1, BaseInstance3DBoxes)
|
420 |
+
assert isinstance(boxes2, BaseInstance3DBoxes)
|
421 |
+
assert type(boxes1) == type(boxes2), '"boxes1" and "boxes2" should' \
|
422 |
+
f'be in the same type, got {type(boxes1)} and {type(boxes2)}.'
|
423 |
+
|
424 |
+
boxes1_top_height = boxes1.top_height.view(-1, 1)
|
425 |
+
boxes1_bottom_height = boxes1.bottom_height.view(-1, 1)
|
426 |
+
boxes2_top_height = boxes2.top_height.view(1, -1)
|
427 |
+
boxes2_bottom_height = boxes2.bottom_height.view(1, -1)
|
428 |
+
|
429 |
+
heighest_of_bottom = torch.max(boxes1_bottom_height,
|
430 |
+
boxes2_bottom_height)
|
431 |
+
lowest_of_top = torch.min(boxes1_top_height, boxes2_top_height)
|
432 |
+
overlaps_h = torch.clamp(lowest_of_top - heighest_of_bottom, min=0)
|
433 |
+
return overlaps_h
|
434 |
+
|
435 |
+
@classmethod
|
436 |
+
def overlaps(cls, boxes1, boxes2, mode='iou'):
|
437 |
+
"""Calculate 3D overlaps of two boxes.
|
438 |
+
|
439 |
+
Note:
|
440 |
+
This function calculates the overlaps between ``boxes1`` and
|
441 |
+
``boxes2``, ``boxes1`` and ``boxes2`` should be in the same type.
|
442 |
+
|
443 |
+
Args:
|
444 |
+
boxes1 (:obj:`BaseInstance3DBoxes`): Boxes 1 contain N boxes.
|
445 |
+
boxes2 (:obj:`BaseInstance3DBoxes`): Boxes 2 contain M boxes.
|
446 |
+
mode (str, optional): Mode of iou calculation. Defaults to 'iou'.
|
447 |
+
|
448 |
+
Returns:
|
449 |
+
torch.Tensor: Calculated 3D overlaps of the boxes.
|
450 |
+
"""
|
451 |
+
assert isinstance(boxes1, BaseInstance3DBoxes)
|
452 |
+
assert isinstance(boxes2, BaseInstance3DBoxes)
|
453 |
+
assert type(boxes1) == type(boxes2), '"boxes1" and "boxes2" should' \
|
454 |
+
f'be in the same type, got {type(boxes1)} and {type(boxes2)}.'
|
455 |
+
|
456 |
+
assert mode in ['iou', 'iof']
|
457 |
+
|
458 |
+
rows = len(boxes1)
|
459 |
+
cols = len(boxes2)
|
460 |
+
if rows * cols == 0:
|
461 |
+
return boxes1.tensor.new(rows, cols)
|
462 |
+
|
463 |
+
# height overlap
|
464 |
+
overlaps_h = cls.height_overlaps(boxes1, boxes2)
|
465 |
+
|
466 |
+
# bev overlap
|
467 |
+
iou2d = box_iou_rotated(boxes1.bev, boxes2.bev)
|
468 |
+
areas1 = (boxes1.bev[:, 2] * boxes1.bev[:, 3]).unsqueeze(1).expand(
|
469 |
+
rows, cols)
|
470 |
+
areas2 = (boxes2.bev[:, 2] * boxes2.bev[:, 3]).unsqueeze(0).expand(
|
471 |
+
rows, cols)
|
472 |
+
overlaps_bev = iou2d * (areas1 + areas2) / (1 + iou2d)
|
473 |
+
|
474 |
+
# 3d overlaps
|
475 |
+
overlaps_3d = overlaps_bev.to(boxes1.device) * overlaps_h
|
476 |
+
|
477 |
+
volume1 = boxes1.volume.view(-1, 1)
|
478 |
+
volume2 = boxes2.volume.view(1, -1)
|
479 |
+
|
480 |
+
if mode == 'iou':
|
481 |
+
# the clamp func is used to avoid division of 0
|
482 |
+
iou3d = overlaps_3d / torch.clamp(
|
483 |
+
volume1 + volume2 - overlaps_3d, min=1e-8)
|
484 |
+
else:
|
485 |
+
iou3d = overlaps_3d / torch.clamp(volume1, min=1e-8)
|
486 |
+
|
487 |
+
return iou3d
|
488 |
+
|
489 |
+
def new_box(self, data):
|
490 |
+
"""Create a new box object with data.
|
491 |
+
|
492 |
+
The new box and its tensor has the similar properties
|
493 |
+
as self and self.tensor, respectively.
|
494 |
+
|
495 |
+
Args:
|
496 |
+
data (torch.Tensor | numpy.array | list): Data to be copied.
|
497 |
+
|
498 |
+
Returns:
|
499 |
+
:obj:`BaseInstance3DBoxes`: A new bbox object with ``data``,
|
500 |
+
the object's other properties are similar to ``self``.
|
501 |
+
"""
|
502 |
+
new_tensor = self.tensor.new_tensor(data) \
|
503 |
+
if not isinstance(data, torch.Tensor) else data.to(self.device)
|
504 |
+
original_type = type(self)
|
505 |
+
return original_type(
|
506 |
+
new_tensor, box_dim=self.box_dim, with_yaw=self.with_yaw)
|
507 |
+
|
508 |
+
def points_in_boxes_part(self, points, boxes_override=None):
|
509 |
+
"""Find the box in which each point is.
|
510 |
+
|
511 |
+
Args:
|
512 |
+
points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),
|
513 |
+
3 dimensions are (x, y, z) in LiDAR or depth coordinate.
|
514 |
+
boxes_override (torch.Tensor, optional): Boxes to override
|
515 |
+
`self.tensor`. Defaults to None.
|
516 |
+
|
517 |
+
Returns:
|
518 |
+
torch.Tensor: The index of the first box that each point
|
519 |
+
is in, in shape (M, ). Default value is -1
|
520 |
+
(if the point is not enclosed by any box).
|
521 |
+
|
522 |
+
Note:
|
523 |
+
If a point is enclosed by multiple boxes, the index of the
|
524 |
+
first box will be returned.
|
525 |
+
"""
|
526 |
+
if boxes_override is not None:
|
527 |
+
boxes = boxes_override
|
528 |
+
else:
|
529 |
+
boxes = self.tensor
|
530 |
+
if points.dim() == 2:
|
531 |
+
points = points.unsqueeze(0)
|
532 |
+
box_idx = points_in_boxes_part(points,
|
533 |
+
boxes.unsqueeze(0).to(
|
534 |
+
points.device)).squeeze(0)
|
535 |
+
return box_idx
|
536 |
+
|
537 |
+
def points_in_boxes_all(self, points, boxes_override=None):
|
538 |
+
"""Find all boxes in which each point is.
|
539 |
+
|
540 |
+
Args:
|
541 |
+
points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),
|
542 |
+
3 dimensions are (x, y, z) in LiDAR or depth coordinate.
|
543 |
+
boxes_override (torch.Tensor, optional): Boxes to override
|
544 |
+
`self.tensor`. Defaults to None.
|
545 |
+
|
546 |
+
Returns:
|
547 |
+
torch.Tensor: A tensor indicating whether a point is in a box,
|
548 |
+
in shape (M, T). T is the number of boxes. Denote this
|
549 |
+
tensor as A, if the m^th point is in the t^th box, then
|
550 |
+
`A[m, t] == 1`, elsewise `A[m, t] == 0`.
|
551 |
+
"""
|
552 |
+
if boxes_override is not None:
|
553 |
+
boxes = boxes_override
|
554 |
+
else:
|
555 |
+
boxes = self.tensor
|
556 |
+
|
557 |
+
points_clone = points.clone()[..., :3]
|
558 |
+
if points_clone.dim() == 2:
|
559 |
+
points_clone = points_clone.unsqueeze(0)
|
560 |
+
else:
|
561 |
+
assert points_clone.dim() == 3 and points_clone.shape[0] == 1
|
562 |
+
|
563 |
+
boxes = boxes.to(points_clone.device).unsqueeze(0)
|
564 |
+
box_idxs_of_pts = points_in_boxes_all(points_clone, boxes)
|
565 |
+
|
566 |
+
return box_idxs_of_pts.squeeze(0)
|
567 |
+
|
568 |
+
def points_in_boxes(self, points, boxes_override=None):
|
569 |
+
warnings.warn('DeprecationWarning: points_in_boxes is a '
|
570 |
+
'deprecated method, please consider using '
|
571 |
+
'points_in_boxes_part.')
|
572 |
+
return self.points_in_boxes_part(points, boxes_override)
|
573 |
+
|
574 |
+
def points_in_boxes_batch(self, points, boxes_override=None):
|
575 |
+
warnings.warn('DeprecationWarning: points_in_boxes_batch is a '
|
576 |
+
'deprecated method, please consider using '
|
577 |
+
'points_in_boxes_all.')
|
578 |
+
return self.points_in_boxes_all(points, boxes_override)
|
det_map/det/dal/mmdet3d/core/bbox/structures/box_3d_mode.py
ADDED
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
from enum import IntEnum, unique
|
3 |
+
|
4 |
+
import numpy as np
|
5 |
+
import torch
|
6 |
+
|
7 |
+
from .base_box3d import BaseInstance3DBoxes
|
8 |
+
from .cam_box3d import CameraInstance3DBoxes
|
9 |
+
from .depth_box3d import DepthInstance3DBoxes
|
10 |
+
from .lidar_box3d import LiDARInstance3DBoxes
|
11 |
+
from .utils import limit_period
|
12 |
+
|
13 |
+
|
14 |
+
@unique
|
15 |
+
class Box3DMode(IntEnum):
|
16 |
+
r"""Enum of different ways to represent a box.
|
17 |
+
|
18 |
+
Coordinates in LiDAR:
|
19 |
+
|
20 |
+
.. code-block:: none
|
21 |
+
|
22 |
+
up z
|
23 |
+
^ x front
|
24 |
+
| /
|
25 |
+
| /
|
26 |
+
left y <------ 0
|
27 |
+
|
28 |
+
The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),
|
29 |
+
and the yaw is around the z axis, thus the rotation axis=2.
|
30 |
+
|
31 |
+
Coordinates in camera:
|
32 |
+
|
33 |
+
.. code-block:: none
|
34 |
+
|
35 |
+
z front
|
36 |
+
/
|
37 |
+
/
|
38 |
+
0 ------> x right
|
39 |
+
|
|
40 |
+
|
|
41 |
+
v
|
42 |
+
down y
|
43 |
+
|
44 |
+
The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],
|
45 |
+
and the yaw is around the y axis, thus the rotation axis=1.
|
46 |
+
|
47 |
+
Coordinates in Depth mode:
|
48 |
+
|
49 |
+
.. code-block:: none
|
50 |
+
|
51 |
+
up z
|
52 |
+
^ y front
|
53 |
+
| /
|
54 |
+
| /
|
55 |
+
0 ------> x right
|
56 |
+
|
57 |
+
The relative coordinate of bottom center in a DEPTH box is (0.5, 0.5, 0),
|
58 |
+
and the yaw is around the z axis, thus the rotation axis=2.
|
59 |
+
"""
|
60 |
+
|
61 |
+
LIDAR = 0
|
62 |
+
CAM = 1
|
63 |
+
DEPTH = 2
|
64 |
+
|
65 |
+
@staticmethod
|
66 |
+
def convert(box, src, dst, rt_mat=None, with_yaw=True):
|
67 |
+
"""Convert boxes from `src` mode to `dst` mode.
|
68 |
+
|
69 |
+
Args:
|
70 |
+
box (tuple | list | np.ndarray |
|
71 |
+
torch.Tensor | :obj:`BaseInstance3DBoxes`):
|
72 |
+
Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7.
|
73 |
+
src (:obj:`Box3DMode`): The src Box mode.
|
74 |
+
dst (:obj:`Box3DMode`): The target Box mode.
|
75 |
+
rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
|
76 |
+
translation matrix between different coordinates.
|
77 |
+
Defaults to None.
|
78 |
+
The conversion from `src` coordinates to `dst` coordinates
|
79 |
+
usually comes along the change of sensors, e.g., from camera
|
80 |
+
to LiDAR. This requires a transformation matrix.
|
81 |
+
with_yaw (bool, optional): If `box` is an instance of
|
82 |
+
:obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle.
|
83 |
+
Defaults to True.
|
84 |
+
|
85 |
+
Returns:
|
86 |
+
(tuple | list | np.ndarray | torch.Tensor |
|
87 |
+
:obj:`BaseInstance3DBoxes`):
|
88 |
+
The converted box of the same type.
|
89 |
+
"""
|
90 |
+
if src == dst:
|
91 |
+
return box
|
92 |
+
|
93 |
+
is_numpy = isinstance(box, np.ndarray)
|
94 |
+
is_Instance3DBoxes = isinstance(box, BaseInstance3DBoxes)
|
95 |
+
single_box = isinstance(box, (list, tuple))
|
96 |
+
if single_box:
|
97 |
+
assert len(box) >= 7, (
|
98 |
+
'Box3DMode.convert takes either a k-tuple/list or '
|
99 |
+
'an Nxk array/tensor, where k >= 7')
|
100 |
+
arr = torch.tensor(box)[None, :]
|
101 |
+
else:
|
102 |
+
# avoid modifying the input box
|
103 |
+
if is_numpy:
|
104 |
+
arr = torch.from_numpy(np.asarray(box)).clone()
|
105 |
+
elif is_Instance3DBoxes:
|
106 |
+
arr = box.tensor.clone()
|
107 |
+
else:
|
108 |
+
arr = box.clone()
|
109 |
+
|
110 |
+
if is_Instance3DBoxes:
|
111 |
+
with_yaw = box.with_yaw
|
112 |
+
|
113 |
+
# convert box from `src` mode to `dst` mode.
|
114 |
+
x_size, y_size, z_size = arr[..., 3:4], arr[..., 4:5], arr[..., 5:6]
|
115 |
+
if with_yaw:
|
116 |
+
yaw = arr[..., 6:7]
|
117 |
+
if src == Box3DMode.LIDAR and dst == Box3DMode.CAM:
|
118 |
+
if rt_mat is None:
|
119 |
+
rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])
|
120 |
+
xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
|
121 |
+
if with_yaw:
|
122 |
+
yaw = -yaw - np.pi / 2
|
123 |
+
yaw = limit_period(yaw, period=np.pi * 2)
|
124 |
+
elif src == Box3DMode.CAM and dst == Box3DMode.LIDAR:
|
125 |
+
if rt_mat is None:
|
126 |
+
rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])
|
127 |
+
xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
|
128 |
+
if with_yaw:
|
129 |
+
yaw = -yaw - np.pi / 2
|
130 |
+
yaw = limit_period(yaw, period=np.pi * 2)
|
131 |
+
elif src == Box3DMode.DEPTH and dst == Box3DMode.CAM:
|
132 |
+
if rt_mat is None:
|
133 |
+
rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
|
134 |
+
xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
|
135 |
+
if with_yaw:
|
136 |
+
yaw = -yaw
|
137 |
+
elif src == Box3DMode.CAM and dst == Box3DMode.DEPTH:
|
138 |
+
if rt_mat is None:
|
139 |
+
rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])
|
140 |
+
xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
|
141 |
+
if with_yaw:
|
142 |
+
yaw = -yaw
|
143 |
+
elif src == Box3DMode.LIDAR and dst == Box3DMode.DEPTH:
|
144 |
+
if rt_mat is None:
|
145 |
+
rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
|
146 |
+
xyz_size = torch.cat([x_size, y_size, z_size], dim=-1)
|
147 |
+
if with_yaw:
|
148 |
+
yaw = yaw + np.pi / 2
|
149 |
+
yaw = limit_period(yaw, period=np.pi * 2)
|
150 |
+
elif src == Box3DMode.DEPTH and dst == Box3DMode.LIDAR:
|
151 |
+
if rt_mat is None:
|
152 |
+
rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])
|
153 |
+
xyz_size = torch.cat([x_size, y_size, z_size], dim=-1)
|
154 |
+
if with_yaw:
|
155 |
+
yaw = yaw - np.pi / 2
|
156 |
+
yaw = limit_period(yaw, period=np.pi * 2)
|
157 |
+
else:
|
158 |
+
raise NotImplementedError(
|
159 |
+
f'Conversion from Box3DMode {src} to {dst} '
|
160 |
+
'is not supported yet')
|
161 |
+
|
162 |
+
if not isinstance(rt_mat, torch.Tensor):
|
163 |
+
rt_mat = arr.new_tensor(rt_mat)
|
164 |
+
if rt_mat.size(1) == 4:
|
165 |
+
extended_xyz = torch.cat(
|
166 |
+
[arr[..., :3], arr.new_ones(arr.size(0), 1)], dim=-1)
|
167 |
+
xyz = extended_xyz @ rt_mat.t()
|
168 |
+
else:
|
169 |
+
xyz = arr[..., :3] @ rt_mat.t()
|
170 |
+
|
171 |
+
if with_yaw:
|
172 |
+
remains = arr[..., 7:]
|
173 |
+
arr = torch.cat([xyz[..., :3], xyz_size, yaw, remains], dim=-1)
|
174 |
+
else:
|
175 |
+
remains = arr[..., 6:]
|
176 |
+
arr = torch.cat([xyz[..., :3], xyz_size, remains], dim=-1)
|
177 |
+
|
178 |
+
# convert arr to the original type
|
179 |
+
original_type = type(box)
|
180 |
+
if single_box:
|
181 |
+
return original_type(arr.flatten().tolist())
|
182 |
+
if is_numpy:
|
183 |
+
return arr.numpy()
|
184 |
+
elif is_Instance3DBoxes:
|
185 |
+
if dst == Box3DMode.CAM:
|
186 |
+
target_type = CameraInstance3DBoxes
|
187 |
+
elif dst == Box3DMode.LIDAR:
|
188 |
+
target_type = LiDARInstance3DBoxes
|
189 |
+
elif dst == Box3DMode.DEPTH:
|
190 |
+
target_type = DepthInstance3DBoxes
|
191 |
+
else:
|
192 |
+
raise NotImplementedError(
|
193 |
+
f'Conversion to {dst} through {original_type}'
|
194 |
+
' is not supported yet')
|
195 |
+
return target_type(arr, box_dim=arr.size(-1), with_yaw=with_yaw)
|
196 |
+
else:
|
197 |
+
return arr
|
det_map/det/dal/mmdet3d/core/bbox/structures/cam_box3d.py
ADDED
@@ -0,0 +1,354 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import numpy as np
|
3 |
+
import torch
|
4 |
+
|
5 |
+
from ...points import BasePoints
|
6 |
+
from .base_box3d import BaseInstance3DBoxes
|
7 |
+
from .utils import rotation_3d_in_axis, yaw2local
|
8 |
+
|
9 |
+
|
10 |
+
class CameraInstance3DBoxes(BaseInstance3DBoxes):
|
11 |
+
"""3D boxes of instances in CAM coordinates.
|
12 |
+
|
13 |
+
Coordinates in camera:
|
14 |
+
|
15 |
+
.. code-block:: none
|
16 |
+
|
17 |
+
z front (yaw=-0.5*pi)
|
18 |
+
/
|
19 |
+
/
|
20 |
+
0 ------> x right (yaw=0)
|
21 |
+
|
|
22 |
+
|
|
23 |
+
v
|
24 |
+
down y
|
25 |
+
|
26 |
+
The relative coordinate of bottom center in a CAM box is (0.5, 1.0, 0.5),
|
27 |
+
and the yaw is around the y axis, thus the rotation axis=1.
|
28 |
+
The yaw is 0 at the positive direction of x axis, and decreases from
|
29 |
+
the positive direction of x to the positive direction of z.
|
30 |
+
|
31 |
+
Attributes:
|
32 |
+
tensor (torch.Tensor): Float matrix in shape (N, box_dim).
|
33 |
+
box_dim (int): Integer indicating the dimension of a box
|
34 |
+
Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
|
35 |
+
with_yaw (bool): If True, the value of yaw will be set to 0 as
|
36 |
+
axis-aligned boxes tightly enclosing the original boxes.
|
37 |
+
"""
|
38 |
+
YAW_AXIS = 1
|
39 |
+
|
40 |
+
def __init__(self,
|
41 |
+
tensor,
|
42 |
+
box_dim=7,
|
43 |
+
with_yaw=True,
|
44 |
+
origin=(0.5, 1.0, 0.5)):
|
45 |
+
if isinstance(tensor, torch.Tensor):
|
46 |
+
device = tensor.device
|
47 |
+
else:
|
48 |
+
device = torch.device('cpu')
|
49 |
+
tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
|
50 |
+
if tensor.numel() == 0:
|
51 |
+
# Use reshape, so we don't end up creating a new tensor that
|
52 |
+
# does not depend on the inputs (and consequently confuses jit)
|
53 |
+
tensor = tensor.reshape((0, box_dim)).to(
|
54 |
+
dtype=torch.float32, device=device)
|
55 |
+
assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()
|
56 |
+
|
57 |
+
if tensor.shape[-1] == 6:
|
58 |
+
# If the dimension of boxes is 6, we expand box_dim by padding
|
59 |
+
# 0 as a fake yaw and set with_yaw to False.
|
60 |
+
assert box_dim == 6
|
61 |
+
fake_rot = tensor.new_zeros(tensor.shape[0], 1)
|
62 |
+
tensor = torch.cat((tensor, fake_rot), dim=-1)
|
63 |
+
self.box_dim = box_dim + 1
|
64 |
+
self.with_yaw = False
|
65 |
+
else:
|
66 |
+
self.box_dim = box_dim
|
67 |
+
self.with_yaw = with_yaw
|
68 |
+
self.tensor = tensor.clone()
|
69 |
+
|
70 |
+
if origin != (0.5, 1.0, 0.5):
|
71 |
+
dst = self.tensor.new_tensor((0.5, 1.0, 0.5))
|
72 |
+
src = self.tensor.new_tensor(origin)
|
73 |
+
self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)
|
74 |
+
|
75 |
+
@property
|
76 |
+
def height(self):
|
77 |
+
"""torch.Tensor: A vector with height of each box in shape (N, )."""
|
78 |
+
return self.tensor[:, 4]
|
79 |
+
|
80 |
+
@property
|
81 |
+
def top_height(self):
|
82 |
+
"""torch.Tensor:
|
83 |
+
A vector with the top height of each box in shape (N, )."""
|
84 |
+
# the positive direction is down rather than up
|
85 |
+
return self.bottom_height - self.height
|
86 |
+
|
87 |
+
@property
|
88 |
+
def bottom_height(self):
|
89 |
+
"""torch.Tensor:
|
90 |
+
A vector with bottom's height of each box in shape (N, )."""
|
91 |
+
return self.tensor[:, 1]
|
92 |
+
|
93 |
+
@property
|
94 |
+
def local_yaw(self):
|
95 |
+
"""torch.Tensor:
|
96 |
+
A vector with local yaw of each box in shape (N, ).
|
97 |
+
local_yaw equals to alpha in kitti, which is commonly
|
98 |
+
used in monocular 3D object detection task, so only
|
99 |
+
:obj:`CameraInstance3DBoxes` has the property.
|
100 |
+
"""
|
101 |
+
yaw = self.yaw
|
102 |
+
loc = self.gravity_center
|
103 |
+
local_yaw = yaw2local(yaw, loc)
|
104 |
+
|
105 |
+
return local_yaw
|
106 |
+
|
107 |
+
@property
|
108 |
+
def gravity_center(self):
|
109 |
+
"""torch.Tensor: A tensor with center of each box in shape (N, 3)."""
|
110 |
+
bottom_center = self.bottom_center
|
111 |
+
gravity_center = torch.zeros_like(bottom_center)
|
112 |
+
gravity_center[:, [0, 2]] = bottom_center[:, [0, 2]]
|
113 |
+
gravity_center[:, 1] = bottom_center[:, 1] - self.tensor[:, 4] * 0.5
|
114 |
+
return gravity_center
|
115 |
+
|
116 |
+
@property
|
117 |
+
def corners(self):
|
118 |
+
"""torch.Tensor: Coordinates of corners of all the boxes in
|
119 |
+
shape (N, 8, 3).
|
120 |
+
|
121 |
+
Convert the boxes to in clockwise order, in the form of
|
122 |
+
(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)
|
123 |
+
|
124 |
+
.. code-block:: none
|
125 |
+
|
126 |
+
front z
|
127 |
+
/
|
128 |
+
/
|
129 |
+
(x0, y0, z1) + ----------- + (x1, y0, z1)
|
130 |
+
/| / |
|
131 |
+
/ | / |
|
132 |
+
(x0, y0, z0) + ----------- + + (x1, y1, z1)
|
133 |
+
| / . | /
|
134 |
+
| / origin | /
|
135 |
+
(x0, y1, z0) + ----------- + -------> x right
|
136 |
+
| (x1, y1, z0)
|
137 |
+
|
|
138 |
+
v
|
139 |
+
down y
|
140 |
+
"""
|
141 |
+
if self.tensor.numel() == 0:
|
142 |
+
return torch.empty([0, 8, 3], device=self.tensor.device)
|
143 |
+
|
144 |
+
dims = self.dims
|
145 |
+
corners_norm = torch.from_numpy(
|
146 |
+
np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)).to(
|
147 |
+
device=dims.device, dtype=dims.dtype)
|
148 |
+
|
149 |
+
corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
|
150 |
+
# use relative origin [0.5, 1, 0.5]
|
151 |
+
corners_norm = corners_norm - dims.new_tensor([0.5, 1, 0.5])
|
152 |
+
corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])
|
153 |
+
|
154 |
+
corners = rotation_3d_in_axis(
|
155 |
+
corners, self.tensor[:, 6], axis=self.YAW_AXIS)
|
156 |
+
corners += self.tensor[:, :3].view(-1, 1, 3)
|
157 |
+
return corners
|
158 |
+
|
159 |
+
@property
|
160 |
+
def bev(self):
|
161 |
+
"""torch.Tensor: 2D BEV box of each box with rotation
|
162 |
+
in XYWHR format, in shape (N, 5)."""
|
163 |
+
bev = self.tensor[:, [0, 2, 3, 5, 6]].clone()
|
164 |
+
# positive direction of the gravity axis
|
165 |
+
# in cam coord system points to the earth
|
166 |
+
# so the bev yaw angle needs to be reversed
|
167 |
+
bev[:, -1] = -bev[:, -1]
|
168 |
+
return bev
|
169 |
+
|
170 |
+
def rotate(self, angle, points=None):
|
171 |
+
"""Rotate boxes with points (optional) with the given angle or rotation
|
172 |
+
matrix.
|
173 |
+
|
174 |
+
Args:
|
175 |
+
angle (float | torch.Tensor | np.ndarray):
|
176 |
+
Rotation angle or rotation matrix.
|
177 |
+
points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):
|
178 |
+
Points to rotate. Defaults to None.
|
179 |
+
|
180 |
+
Returns:
|
181 |
+
tuple or None: When ``points`` is None, the function returns
|
182 |
+
None, otherwise it returns the rotated points and the
|
183 |
+
rotation matrix ``rot_mat_T``.
|
184 |
+
"""
|
185 |
+
if not isinstance(angle, torch.Tensor):
|
186 |
+
angle = self.tensor.new_tensor(angle)
|
187 |
+
|
188 |
+
assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \
|
189 |
+
f'invalid rotation angle shape {angle.shape}'
|
190 |
+
|
191 |
+
if angle.numel() == 1:
|
192 |
+
self.tensor[:, 0:3], rot_mat_T = rotation_3d_in_axis(
|
193 |
+
self.tensor[:, 0:3],
|
194 |
+
angle,
|
195 |
+
axis=self.YAW_AXIS,
|
196 |
+
return_mat=True)
|
197 |
+
else:
|
198 |
+
rot_mat_T = angle
|
199 |
+
rot_sin = rot_mat_T[2, 0]
|
200 |
+
rot_cos = rot_mat_T[0, 0]
|
201 |
+
angle = np.arctan2(rot_sin, rot_cos)
|
202 |
+
self.tensor[:, 0:3] = self.tensor[:, 0:3] @ rot_mat_T
|
203 |
+
|
204 |
+
self.tensor[:, 6] += angle
|
205 |
+
|
206 |
+
if points is not None:
|
207 |
+
if isinstance(points, torch.Tensor):
|
208 |
+
points[:, :3] = points[:, :3] @ rot_mat_T
|
209 |
+
elif isinstance(points, np.ndarray):
|
210 |
+
rot_mat_T = rot_mat_T.cpu().numpy()
|
211 |
+
points[:, :3] = np.dot(points[:, :3], rot_mat_T)
|
212 |
+
elif isinstance(points, BasePoints):
|
213 |
+
points.rotate(rot_mat_T)
|
214 |
+
else:
|
215 |
+
raise ValueError
|
216 |
+
return points, rot_mat_T
|
217 |
+
|
218 |
+
def flip(self, bev_direction='horizontal', points=None):
|
219 |
+
"""Flip the boxes in BEV along given BEV direction.
|
220 |
+
|
221 |
+
In CAM coordinates, it flips the x (horizontal) or z (vertical) axis.
|
222 |
+
|
223 |
+
Args:
|
224 |
+
bev_direction (str): Flip direction (horizontal or vertical).
|
225 |
+
points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):
|
226 |
+
Points to flip. Defaults to None.
|
227 |
+
|
228 |
+
Returns:
|
229 |
+
torch.Tensor, numpy.ndarray or None: Flipped points.
|
230 |
+
"""
|
231 |
+
assert bev_direction in ('horizontal', 'vertical')
|
232 |
+
if bev_direction == 'horizontal':
|
233 |
+
self.tensor[:, 0::7] = -self.tensor[:, 0::7]
|
234 |
+
if self.with_yaw:
|
235 |
+
self.tensor[:, 6] = -self.tensor[:, 6] + np.pi
|
236 |
+
elif bev_direction == 'vertical':
|
237 |
+
self.tensor[:, 2::7] = -self.tensor[:, 2::7]
|
238 |
+
if self.with_yaw:
|
239 |
+
self.tensor[:, 6] = -self.tensor[:, 6]
|
240 |
+
|
241 |
+
if points is not None:
|
242 |
+
assert isinstance(points, (torch.Tensor, np.ndarray, BasePoints))
|
243 |
+
if isinstance(points, (torch.Tensor, np.ndarray)):
|
244 |
+
if bev_direction == 'horizontal':
|
245 |
+
points[:, 0] = -points[:, 0]
|
246 |
+
elif bev_direction == 'vertical':
|
247 |
+
points[:, 2] = -points[:, 2]
|
248 |
+
elif isinstance(points, BasePoints):
|
249 |
+
points.flip(bev_direction)
|
250 |
+
return points
|
251 |
+
|
252 |
+
@classmethod
|
253 |
+
def height_overlaps(cls, boxes1, boxes2, mode='iou'):
|
254 |
+
"""Calculate height overlaps of two boxes.
|
255 |
+
|
256 |
+
This function calculates the height overlaps between ``boxes1`` and
|
257 |
+
``boxes2``, where ``boxes1`` and ``boxes2`` should be in the same type.
|
258 |
+
|
259 |
+
Args:
|
260 |
+
boxes1 (:obj:`CameraInstance3DBoxes`): Boxes 1 contain N boxes.
|
261 |
+
boxes2 (:obj:`CameraInstance3DBoxes`): Boxes 2 contain M boxes.
|
262 |
+
mode (str, optional): Mode of iou calculation. Defaults to 'iou'.
|
263 |
+
|
264 |
+
Returns:
|
265 |
+
torch.Tensor: Calculated iou of boxes' heights.
|
266 |
+
"""
|
267 |
+
assert isinstance(boxes1, CameraInstance3DBoxes)
|
268 |
+
assert isinstance(boxes2, CameraInstance3DBoxes)
|
269 |
+
|
270 |
+
boxes1_top_height = boxes1.top_height.view(-1, 1)
|
271 |
+
boxes1_bottom_height = boxes1.bottom_height.view(-1, 1)
|
272 |
+
boxes2_top_height = boxes2.top_height.view(1, -1)
|
273 |
+
boxes2_bottom_height = boxes2.bottom_height.view(1, -1)
|
274 |
+
|
275 |
+
# positive direction of the gravity axis
|
276 |
+
# in cam coord system points to the earth
|
277 |
+
heighest_of_bottom = torch.min(boxes1_bottom_height,
|
278 |
+
boxes2_bottom_height)
|
279 |
+
lowest_of_top = torch.max(boxes1_top_height, boxes2_top_height)
|
280 |
+
overlaps_h = torch.clamp(heighest_of_bottom - lowest_of_top, min=0)
|
281 |
+
return overlaps_h
|
282 |
+
|
283 |
+
def convert_to(self, dst, rt_mat=None):
|
284 |
+
"""Convert self to ``dst`` mode.
|
285 |
+
|
286 |
+
Args:
|
287 |
+
dst (:obj:`Box3DMode`): The target Box mode.
|
288 |
+
rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
|
289 |
+
translation matrix between different coordinates.
|
290 |
+
Defaults to None.
|
291 |
+
The conversion from ``src`` coordinates to ``dst`` coordinates
|
292 |
+
usually comes along the change of sensors, e.g., from camera
|
293 |
+
to LiDAR. This requires a transformation matrix.
|
294 |
+
|
295 |
+
Returns:
|
296 |
+
:obj:`BaseInstance3DBoxes`:
|
297 |
+
The converted box of the same type in the ``dst`` mode.
|
298 |
+
"""
|
299 |
+
from .box_3d_mode import Box3DMode
|
300 |
+
return Box3DMode.convert(
|
301 |
+
box=self, src=Box3DMode.CAM, dst=dst, rt_mat=rt_mat)
|
302 |
+
|
303 |
+
def points_in_boxes_part(self, points, boxes_override=None):
|
304 |
+
"""Find the box in which each point is.
|
305 |
+
|
306 |
+
Args:
|
307 |
+
points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),
|
308 |
+
3 dimensions are (x, y, z) in LiDAR or depth coordinate.
|
309 |
+
boxes_override (torch.Tensor, optional): Boxes to override
|
310 |
+
`self.tensor `. Defaults to None.
|
311 |
+
|
312 |
+
Returns:
|
313 |
+
torch.Tensor: The index of the box in which
|
314 |
+
each point is, in shape (M, ). Default value is -1
|
315 |
+
(if the point is not enclosed by any box).
|
316 |
+
"""
|
317 |
+
from .coord_3d_mode import Coord3DMode
|
318 |
+
|
319 |
+
points_lidar = Coord3DMode.convert(points, Coord3DMode.CAM,
|
320 |
+
Coord3DMode.LIDAR)
|
321 |
+
if boxes_override is not None:
|
322 |
+
boxes_lidar = boxes_override
|
323 |
+
else:
|
324 |
+
boxes_lidar = Coord3DMode.convert(self.tensor, Coord3DMode.CAM,
|
325 |
+
Coord3DMode.LIDAR)
|
326 |
+
|
327 |
+
box_idx = super().points_in_boxes_part(points_lidar, boxes_lidar)
|
328 |
+
return box_idx
|
329 |
+
|
330 |
+
def points_in_boxes_all(self, points, boxes_override=None):
|
331 |
+
"""Find all boxes in which each point is.
|
332 |
+
|
333 |
+
Args:
|
334 |
+
points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),
|
335 |
+
3 dimensions are (x, y, z) in LiDAR or depth coordinate.
|
336 |
+
boxes_override (torch.Tensor, optional): Boxes to override
|
337 |
+
`self.tensor `. Defaults to None.
|
338 |
+
|
339 |
+
Returns:
|
340 |
+
torch.Tensor: The index of all boxes in which each point is,
|
341 |
+
in shape (B, M, T).
|
342 |
+
"""
|
343 |
+
from .coord_3d_mode import Coord3DMode
|
344 |
+
|
345 |
+
points_lidar = Coord3DMode.convert(points, Coord3DMode.CAM,
|
346 |
+
Coord3DMode.LIDAR)
|
347 |
+
if boxes_override is not None:
|
348 |
+
boxes_lidar = boxes_override
|
349 |
+
else:
|
350 |
+
boxes_lidar = Coord3DMode.convert(self.tensor, Coord3DMode.CAM,
|
351 |
+
Coord3DMode.LIDAR)
|
352 |
+
|
353 |
+
box_idx = super().points_in_boxes_all(points_lidar, boxes_lidar)
|
354 |
+
return box_idx
|