lkllkl commited on
Commit
da2e2ac
·
verified ·
1 Parent(s): 73c776c

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. .gitignore +27 -0
  3. Dockerfile +24 -0
  4. LICENSE +201 -0
  5. README.md +125 -0
  6. assets/ckpts.png +0 -0
  7. assets/navsim_transparent.png +3 -0
  8. det_map/__init__.py +0 -0
  9. det_map/agent_lightning.py +93 -0
  10. det_map/config/agent/det_agent.yaml +203 -0
  11. det_map/config/agent/map_agent.yaml +320 -0
  12. det_map/config/defaults/default_common.yaml +23 -0
  13. det_map/config/defaults/default_evaluation.yaml +7 -0
  14. det_map/config/defaults/ray_distributed_no_torch.yaml +8 -0
  15. det_map/config/scene_filter/det_all_scenes.yaml +12 -0
  16. det_map/config/scene_filter/navtiny.yaml +265 -0
  17. det_map/config/splits/default_train_val_test_log_split.yaml +0 -0
  18. det_map/config/train_det.yaml +48 -0
  19. det_map/data/__init__.py +0 -0
  20. det_map/data/datasets/__init__.py +0 -0
  21. det_map/data/datasets/dataclasses.py +521 -0
  22. det_map/data/datasets/dataloader.py +172 -0
  23. det_map/data/datasets/dataset.py +41 -0
  24. det_map/data/datasets/dataset_det.py +28 -0
  25. det_map/data/datasets/feature_builders.py +94 -0
  26. det_map/data/datasets/lidar_utils.py +66 -0
  27. det_map/data/pipelines/__init__.py +0 -0
  28. det_map/data/pipelines/color_utils.py +357 -0
  29. det_map/data/pipelines/filter_lidar.py +74 -0
  30. det_map/data/pipelines/lidar_aug.py +151 -0
  31. det_map/data/pipelines/point_shuffle.py +17 -0
  32. det_map/data/pipelines/prepare_depth.py +76 -0
  33. det_map/data/pipelines/prepare_img.py +218 -0
  34. det_map/det/__init__.py +0 -0
  35. det_map/det/dal/__init__.py +0 -0
  36. det_map/det/dal/dal.py +159 -0
  37. det_map/det/dal/mmdet3d/__init__.py +0 -0
  38. det_map/det/dal/mmdet3d/core/__init__.py +6 -0
  39. det_map/det/dal/mmdet3d/core/bbox/__init__.py +24 -0
  40. det_map/det/dal/mmdet3d/core/bbox/assigners/__init__.py +6 -0
  41. det_map/det/dal/mmdet3d/core/bbox/assigners/hungarian_assigner_3d.py +148 -0
  42. det_map/det/dal/mmdet3d/core/bbox/box_np_ops.py +827 -0
  43. det_map/det/dal/mmdet3d/core/bbox/coders/__init__.py +7 -0
  44. det_map/det/dal/mmdet3d/core/bbox/coders/transfusion_bbox_coder.py +124 -0
  45. det_map/det/dal/mmdet3d/core/bbox/iou_calculators/__init__.py +10 -0
  46. det_map/det/dal/mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py +232 -0
  47. det_map/det/dal/mmdet3d/core/bbox/structures/__init__.py +18 -0
  48. det_map/det/dal/mmdet3d/core/bbox/structures/base_box3d.py +578 -0
  49. det_map/det/dal/mmdet3d/core/bbox/structures/box_3d_mode.py +197 -0
  50. det_map/det/dal/mmdet3d/core/bbox/structures/cam_box3d.py +354 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/navsim_transparent.png filter=lfs diff=lfs merge=lfs -text
37
+ navsim/agents/backbones/ops_dcnv3/build/temp.linux-x86_64-cpython-39/zhenxinl_nuplan/navsim_workspace/navsim_ours/navsim/agents/backbones/ops_dcnv3/src/cuda/dcnv3_cuda.o filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # python
2
+ build/
3
+ vocab_score_local/
4
+ vocab_score_full/
5
+ vocab_score_full_8192/
6
+ vocab_score_local_8192/
7
+ models_local/
8
+ traj_local/
9
+ *.so
10
+ *.pyc
11
+ **/__pycache__/
12
+ dist/
13
+ .pytest_cache/*
14
+ .pydevproject
15
+ .idea/
16
+ debug/
17
+ # IDE
18
+ .vscode/*
19
+
20
+ # Pip
21
+ *.egg-info
22
+
23
+ # files
24
+ *.log
25
+
26
+ *.jpg
27
+ *.pcd
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvcr.io/nvidia/pytorch:23.05-py3
2
+ RUN apt-get update
3
+ RUN apt-get install -y tmux htop
4
+
5
+ RUN git clone https://[email protected]/woxihuanjiangguo/navsim_ours.git /navsim_ours
6
+ WORKDIR /navsim_ours
7
+
8
+ ENV HYDRA_FULL_ERROR=1
9
+ ENV NUPLAN_MAP_VERSION="nuplan-maps-v1.0"
10
+ ENV NUPLAN_MAPS_ROOT="/zhenxinl_nuplan/navsim_workspace/dataset/maps"
11
+ ENV NAVSIM_EXP_ROOT="/zhenxinl_nuplan/navsim_workspace/exp"
12
+ ENV NAVSIM_DEVKIT_ROOT="/navsim_ours"
13
+ ENV NAVSIM_TRAJPDM_ROOT="/zhenxinl_nuplan/navsim_workspace/dataset/traj_pdm"
14
+ ENV OPENSCENE_DATA_ROOT="/zhenxinl_nuplan/navsim_workspace/dataset"
15
+ ENV CUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME
16
+ ENV CFLAGS="-I$CUDA_HOME/include $CFLAGS"
17
+
18
+ RUN pip uninstall torch torchvision torchaudio -y
19
+ RUN pip3 install torch torchvision torchaudio
20
+ RUN pip install openmim
21
+ RUN mim install mmdet==2.28.2
22
+ RUN pip install spconv-cu120
23
+ RUN pip install numba
24
+ RUN pip install -e .
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright 2024 autonomousvision
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div id="top" align="center">
2
+
3
+ <p align="center">
4
+ <img src="assets/navsim_transparent.png" width="500">
5
+ </p>
6
+
7
+ **NAVSIM:** *Data-Driven **N**on-Reactive **A**utonomous **V**ehicle **Sim**ulation*
8
+
9
+ </div>
10
+
11
+
12
+ ## Highlights <a name="highlight"></a>
13
+
14
+ 🔥 NAVSIM gathers simulation-based metrics (such as progress and time to collision) for end-to-end driving by unrolling simplified bird's eye view abstractions of scenes for a short simulation horizon. It operates under the condition that the policy has no influence on the environment, which enables **efficient, open-loop metric computation** while being **better aligned with closed-loop** evaluations than traditional displacement errors.
15
+
16
+ > NAVSIM attempts to address some of the challenges faced by the community:
17
+ >
18
+ > 1. **Providing a principled evaluation** (by incorporating ideas + data from nuPlan)
19
+ > - Key Idea: **PDM Score**, a multi-dimensional metric implemented in open-loop with strong correlation to closed-loop metrics
20
+ > - Critical scenario sampling, focusing on situations with intention changes where the ego history cannot be extrapolated into a plan
21
+ > - Official leaderboard on HuggingFace that remains open and prevents ambiguity in metric definitions between projects
22
+ >
23
+ > 2. **Maintaining ease of use** (by emulating nuScenes)
24
+ > - Simple data format and reasonably-sized download (<nuPlan’s 20+ TB)
25
+ > - Large-scale publicly available test split for internal benchmarking
26
+ > - Continually-maintained devkit
27
+
28
+ 🏁 **NAVSIM** will serve as a main track in the **`CVPR 2024 Autonomous Grand Challenge`**. The leaderboard for the challenge is open! For further details, please [check the challenge website](https://opendrivelab.com/challenge2024/)!
29
+
30
+ <p align="center">
31
+ <img src="assets/navsim_cameras.gif" width="800">
32
+ </p>
33
+
34
+ ## Table of Contents
35
+ 1. [Highlights](#highlight)
36
+ 2. [Getting started](#gettingstarted)
37
+ 3. [Changelog](#changelog)
38
+ 4. [License and citation](#licenseandcitation)
39
+ 5. [Other resources](#otherresources)
40
+
41
+
42
+ ## Getting started <a name="gettingstarted"></a>
43
+
44
+ - [Download and installation](docs/install.md)
45
+ - [Understanding and creating agents](docs/agents.md)
46
+ - [Understanding the data format and classes](docs/cache.md)
47
+ - [Dataset splits vs. filtered training / test splits](docs/splits.md)
48
+ - [Understanding the PDM Score](docs/metrics.md)
49
+ - [Submitting to the Leaderboard](docs/submission.md)
50
+
51
+ <p align="right">(<a href="#top">back to top</a>)</p>
52
+
53
+
54
+ ## Changelog <a name="changelog"></a>
55
+ - **`[2024/04/21]`** NAVSIM v1.0 release (official devkit version for [AGC 2024](https://opendrivelab.com/challenge2024/))
56
+ - **IMPORTANT NOTE**: The name of the data split `competition_test` was changed to `private_test_e2e`. Please adapt your directory name accordingly. For details see [installation](docs/install.md).
57
+ - Parallelization of metric caching / evaluation
58
+ - Adds [Transfuser](https://arxiv.org/abs/2205.15997) baseline (see [agents](docs/agents.md#Baselines))
59
+ - Adds standardized training and test filtered splits (see [splits](docs/splits.md))
60
+ - Visualization tools (see [tutorial_visualization.ipynb](tutorial/tutorial_visualization.ipynb))
61
+ - Refactoring
62
+ - **`[2024/04/03]`** NAVSIM v0.4 release
63
+ - Support for test phase frames of competition
64
+ - Download script for trainval
65
+ - Egostatus MLP Agent and training pipeline
66
+ - Refactoring, Fixes, Documentation
67
+ - **`[2024/03/25]`** NAVSIM v0.3 release (official devkit version for warm-up phase)
68
+ - Changes env variable NUPLAN_EXP_ROOT to NAVSIM_EXP_ROOT
69
+ - Adds code for Leaderboard submission
70
+ - Major refactoring of dataloading and configs
71
+ - **`[2024/03/11]`** NAVSIM v0.2 release
72
+ - Easier installation and download
73
+ - mini and test data split integration
74
+ - Privileged `Human` agent
75
+ - **`[2024/02/20]`** NAVSIM v0.1 release (initial demo)
76
+ - OpenScene-mini sensor blobs and annotation logs
77
+ - Naive `ConstantVelocity` agent
78
+
79
+
80
+ <p align="right">(<a href="#top">back to top</a>)</p>
81
+
82
+
83
+ ## License and citation <a name="licenseandcitation"></a>
84
+ All assets and code in this repository are under the [Apache 2.0 license](./LICENSE) unless specified otherwise. The datasets (including nuPlan and OpenScene) inherit their own distribution licenses. Please consider citing our paper and project if they help your research.
85
+
86
+ ```BibTeX
87
+ @misc{Contributors2024navsim,
88
+ title={NAVSIM: Data-Driven Non-Reactive Autonomous Vehicle Simulation},
89
+ author={NAVSIM Contributors},
90
+ howpublished={\url{https://github.com/autonomousvision/navsim}},
91
+ year={2024}
92
+ }
93
+ ```
94
+
95
+ ```BibTeX
96
+ @inproceedings{Dauner2023CORL,
97
+ title = {Parting with Misconceptions about Learning-based Vehicle Motion Planning},
98
+ author = {Daniel Dauner and Marcel Hallgarten and Andreas Geiger and Kashyap Chitta},
99
+ booktitle = {Conference on Robot Learning (CoRL)},
100
+ year = {2023}
101
+ }
102
+ ```
103
+
104
+ <p align="right">(<a href="#top">back to top</a>)</p>
105
+
106
+
107
+ ## Other resources <a name="otherresources"></a>
108
+
109
+ <a href="https://twitter.com/AutoVisionGroup" target="_blank">
110
+ <img alt="Twitter Follow" src="https://img.shields.io/twitter/follow/Awesome Vision Group?style=social&color=brightgreen&logo=twitter" />
111
+ </a>
112
+ <a href="https://twitter.com/kashyap7x" target="_blank">
113
+ <img alt="Twitter Follow" src="https://img.shields.io/twitter/follow/Kashyap Chitta?style=social&color=brightgreen&logo=twitter" />
114
+ </a>
115
+ <a href="https://twitter.com/DanielDauner" target="_blank">
116
+ <img alt="Twitter Follow" src="https://img.shields.io/twitter/follow/Daniel Dauner?style=social&color=brightgreen&logo=twitter" />
117
+ </a>
118
+ <a href="https://twitter.com/MHallgarten0797" target="_blank">
119
+ <img alt="Twitter Follow" src="https://img.shields.io/twitter/follow/Marcel Hallgarten?style=social&color=brightgreen&logo=twitter" />
120
+ </a>
121
+
122
+ - [SLEDGE](https://github.com/autonomousvision/sledge) | [tuPlan garage](https://github.com/autonomousvision/tuplan_garage) | [CARLA garage](https://github.com/autonomousvision/carla_garage) | [Survey on E2EAD](https://github.com/OpenDriveLab/End-to-end-Autonomous-Driving)
123
+ - [PlanT](https://github.com/autonomousvision/plant) | [KING](https://github.com/autonomousvision/king) | [TransFuser](https://github.com/autonomousvision/transfuser) | [NEAT](https://github.com/autonomousvision/neat)
124
+
125
+ <p align="right">(<a href="#top">back to top</a>)</p>
assets/ckpts.png ADDED
assets/navsim_transparent.png ADDED

Git LFS Details

  • SHA256: 77619d3f762206401f7a1221e0999df257bd0b4f9c5793667ad21413ddd031b6
  • Pointer size: 132 Bytes
  • Size of remote file: 4.85 MB
det_map/__init__.py ADDED
File without changes
det_map/agent_lightning.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Tuple, List
2
+
3
+ import pytorch_lightning as pl
4
+ import torch
5
+ from nuplan.planning.simulation.trajectory.trajectory_sampling import TrajectorySampling
6
+ from torch import Tensor
7
+
8
+ from navsim.agents.abstract_agent import AbstractAgent
9
+ from navsim.agents.vadv2.vadv2_agent import Vadv2Agent
10
+ from navsim.common.dataclasses import Trajectory
11
+
12
+
13
+ class AgentLightningModuleMap(pl.LightningModule):
14
+ def __init__(
15
+ self,
16
+ agent: AbstractAgent,
17
+ ):
18
+ super().__init__()
19
+ self.agent = agent
20
+
21
+ def _step(
22
+ self,
23
+ batch: Tuple[Dict[str, Tensor], Dict[str, Tensor], List[str]],
24
+ logging_prefix: str,
25
+ ):
26
+ features, targets = batch
27
+ if logging_prefix in ['train', 'val'] and isinstance(self.agent, Vadv2Agent):
28
+ prediction = self.agent.forward_train(features, targets['interpolated_traj'])
29
+ else:
30
+ prediction = self.agent.forward(features)
31
+
32
+ loss, loss_dict = self.agent.compute_loss(features, targets, prediction)
33
+
34
+ for k, v in loss_dict.items():
35
+ self.log(f"{logging_prefix}/{k}", v, on_step=True, on_epoch=True, prog_bar=True, sync_dist=True)
36
+ self.log(f"{logging_prefix}/loss", loss, on_step=True, on_epoch=True, prog_bar=True, sync_dist=True)
37
+ return loss
38
+
39
+ def training_step(
40
+ self,
41
+ batch: Tuple[Dict[str, Tensor], Dict[str, Tensor]],
42
+ batch_idx: int
43
+ ):
44
+ return self._step(batch, "train")
45
+
46
+ def validation_step(
47
+ self,
48
+ batch: Tuple[Dict[str, Tensor], Dict[str, Tensor]],
49
+ batch_idx: int
50
+ ):
51
+ return self._step(batch, "val")
52
+
53
+ def configure_optimizers(self):
54
+ return self.agent.get_optimizers()
55
+
56
+ def predict_step(
57
+ self,
58
+ batch: Tuple[Dict[str, Tensor], Dict[str, Tensor]],
59
+ batch_idx: int
60
+ ):
61
+ features, targets, tokens = batch
62
+ self.agent.eval()
63
+ with torch.no_grad():
64
+ predictions = self.agent.forward(features)
65
+ poses = predictions["trajectory"].cpu().numpy()
66
+
67
+ imis = predictions["imi"].softmax(-1).log().cpu().numpy()
68
+ nocs = predictions["noc"].log().cpu().numpy()
69
+ das = predictions["da"].log().cpu().numpy()
70
+ ttcs = predictions["ttc"].log().cpu().numpy()
71
+ comforts = predictions["comfort"].log().cpu().numpy()
72
+ progresses = predictions["progress"].log().cpu().numpy()
73
+ if poses.shape[1] == 40:
74
+ interval_length = 0.1
75
+ else:
76
+ interval_length = 0.5
77
+
78
+ return {token: {
79
+ 'trajectory': Trajectory(pose, TrajectorySampling(time_horizon=4, interval_length=interval_length)),
80
+ 'imi': imi,
81
+ 'noc': noc,
82
+ 'da': da,
83
+ 'ttc': ttc,
84
+ 'comfort': comfort,
85
+ 'progress': progress
86
+ } for pose, imi, noc, da, ttc, comfort, progress, token in zip(poses, imis, nocs, das, ttcs, comforts, progresses,
87
+ tokens)}
88
+ # def on_after_backward(self) -> None:
89
+ # print("on_after_backward enter")
90
+ # for name, param in self.named_parameters():
91
+ # if param.grad is None:
92
+ # print(name)
93
+ # print("on_after_backward exit")
det_map/config/agent/det_agent.yaml ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: det_map.det.det_agent.DetAgent
2
+ _convert_: 'all'
3
+
4
+ is_train: &is_train
5
+ is_train: True
6
+
7
+ ranges: &ranges
8
+ x_range: (-54.0, 54.0)
9
+ y_range: (-54.0, 54.0)
10
+ z_range: (-10.0, 10.0)
11
+
12
+ point_cloud_range: &point_cloud_range
13
+ point_cloud_range: [ -54.0, -54.0, -10.0, 54.0, 54.0, 10.0 ]
14
+ voxel_size: &voxel_size
15
+ voxel_size: [0.075, 0.075, 0.2]
16
+
17
+
18
+ grid_config: &grid_config
19
+ grid_config:
20
+ x: (-54.0, 54.0, 0.6)
21
+ y: (-54.0, 54.0, 0.6)
22
+ z: (-10.0, 10.0, 20.0)
23
+ depth: (1.0, 60.0, 0.5)
24
+
25
+ model:
26
+ _target_: det_map.det.dal.dal.DAL
27
+ _convert_: 'all'
28
+ use_grid_mask: true
29
+ pts_voxel_layer:
30
+ max_num_points: 10
31
+ <<: *voxel_size
32
+ <<: *point_cloud_range
33
+ max_voxels: [ 120000, 160000 ]
34
+ pts_voxel_encoder:
35
+ type: HardSimpleVFE
36
+ num_features: 5
37
+ pts_middle_encoder:
38
+ type: SparseEncoder
39
+ in_channels: 5
40
+ base_channels: 24
41
+ sparse_shape: [ 41, 1440, 1440 ]
42
+ output_channels: 192
43
+ order: [ 'conv', 'norm', 'act' ]
44
+ encoder_channels: ((24, 24, 48), (48, 48, 96), (96, 96, 192), (192, 192))
45
+ encoder_paddings: ((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0))
46
+ block_type: basicblock
47
+ pts_backbone:
48
+ type: SECOND
49
+ in_channels: 384
50
+ out_channels: [ 192, 384 ]
51
+ layer_nums: [ 8, 8 ]
52
+ layer_strides: [ 1, 2 ]
53
+ norm_cfg:
54
+ type: BN
55
+ eps: 1e-3
56
+ momentum: 0.01
57
+ conv_cfg:
58
+ type: Conv2d
59
+ bias: false
60
+ pts_neck:
61
+ type: SECONDFPN
62
+ in_channels: [ 192, 384 ]
63
+ out_channels: [ 256, 256 ]
64
+ upsample_strides: [ 1, 2 ]
65
+ norm_cfg:
66
+ type: BN
67
+ eps: 1e-3
68
+ momentum: 0.01
69
+ upsample_cfg:
70
+ type: deconv
71
+ bias: false
72
+ use_conv_for_no_stride: true
73
+ img_backbone:
74
+ pretrained: 'torchvision://resnet18'
75
+ type: ResNet
76
+ depth: 18
77
+ num_stages: 4
78
+ out_indices: [ 1, 2, 3 ]
79
+ frozen_stages: -1
80
+ norm_cfg:
81
+ type: BN
82
+ requires_grad: true
83
+ norm_eval: false
84
+ with_cp: false
85
+ style: pytorch
86
+ img_neck:
87
+ type: CustomFPN
88
+ in_channels: [ 128, 256, 512 ]
89
+ out_channels: img_feat_dim
90
+ num_outs: 1
91
+ start_level: 0
92
+ out_ids: [ 0 ]
93
+ img_view_transformer:
94
+ type: LSSViewTransformer
95
+ <<: *grid_config
96
+ input_size: data_config['input_size']
97
+ in_channels: img_feat_dim
98
+ out_channels: feat_bev_img_dim
99
+ downsample: 8
100
+ with_depth_from_lidar: true
101
+ pts_bbox_head:
102
+ type: DALHead
103
+ feat_bev_img_dim: feat_bev_img_dim
104
+ img_feat_dim: img_feat_dim
105
+ sparse_fuse_layers: 2
106
+ dense_fuse_layers: 2
107
+ instance_attn: false
108
+ num_proposals: 200
109
+ in_channels: 512
110
+ hidden_channel: 128
111
+ num_classes: 10
112
+ num_decoder_layers: 1
113
+ num_heads: 8
114
+ nms_kernel_size: 3
115
+ ffn_channel: 256
116
+ dropout: 0.1
117
+ bn_momentum: 0.1
118
+ activation: relu
119
+ auxiliary: true
120
+ common_heads:
121
+ center: [ 2, 2 ]
122
+ height: [ 1, 2 ]
123
+ dim: [ 3, 2 ]
124
+ rot: [ 2, 2 ]
125
+ vel: [ 2, 2 ]
126
+ bbox_coder:
127
+ type: TransFusionBBoxCoder
128
+ pc_range: point_cloud_range[:2]
129
+ post_center_range: [ -61.2, -61.2, -10.0, 61.2, 61.2, 10.0 ]
130
+ score_threshold: 0.0
131
+ out_size_factor: 8
132
+ voxel_size: voxel_size[:2]
133
+ code_size: 10
134
+ loss_cls:
135
+ type: FocalLoss
136
+ use_sigmoid: true
137
+ gamma: 2.0
138
+ alpha: 0.25
139
+ reduction: mean
140
+ loss_weight: 1.0
141
+ loss_heatmap:
142
+ type: GaussianFocalLoss
143
+ reduction: mean
144
+
145
+ pipelines:
146
+ lidar_filter:
147
+ _target_: det_map.data.pipelines.filter_lidar.LiDARFilter
148
+ _convert_: 'all'
149
+ close_radius: 1.0
150
+ <<: *ranges
151
+
152
+ # only include in training
153
+ point_shuffle:
154
+ _target_: det_map.data.pipelines.point_shuffle.PointShuffle
155
+ <<: *is_train
156
+
157
+ lidar_aug:
158
+ _target_: det_map.data.pipelines.lidar_aug.LiDARAug
159
+ bda_aug_conf:
160
+ rot_lim: (-22.5 * 2, 22.5 * 2)
161
+ scale_lim: (0.9, 1.1)
162
+ flip_dx_ratio: 0.5
163
+ flip_dy_ratio: 0.5
164
+ tran_lim: (0.5, 0.5, 0.5)
165
+ <<: *ranges
166
+ # if no aug for map, set this is_train to False
167
+ <<: *is_train
168
+
169
+ depth:
170
+ _target_: det_map.data.pipelines.prepare_depth.LiDAR2Depth
171
+ <<: *grid_config
172
+
173
+ img:
174
+ _target_: det_map.data.pipelines.prepare_img.PrepareImageInputs
175
+ _convert_: 'all'
176
+ opencv_pp: True
177
+ # Flag should be False in Eval!!!!
178
+ <<: *is_train
179
+ data_config:
180
+ input_size: (256, 704)
181
+ src_size: (900, 1600)
182
+ # Augmentation
183
+ resize: (-0.06, 0.44)
184
+ rot: (-5.4, 5.4)
185
+ flip: True
186
+ crop_h: (0.0, 0.0)
187
+ random_crop_height: True
188
+ vflip: True
189
+ resize_test: 0.04
190
+ pmd:
191
+ brightness_delta: 32
192
+ contrast_lower: 0.5
193
+ contrast_upper: 1.5
194
+ saturation_lower: 0.5
195
+ saturation_upper: 1.5
196
+ hue_delta: 18
197
+ rate: 0.5
198
+
199
+
200
+ <<: *is_train
201
+ checkpoint_path: null
202
+ hidden_layer_dim: 512
203
+ lr: 1e-4
det_map/config/agent/map_agent.yaml ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: det_map.map.map_agent.MapAgent
2
+ _convert_: 'all'
3
+
4
+
5
+ is_train: &is_train
6
+ is_train: True
7
+
8
+ point_cloud_range: &point_cloud_range
9
+ pc_range: [ -15.0, -30.0, -10.0, 15.0, 30.0, 10.0 ]
10
+
11
+ lidar_filter_ranges: &lidar_filter_ranges
12
+ x_range: (-15.0, 15.0)
13
+ y_range: (-30.0, 30.0)
14
+ z_range: (-10.0, 10.0)
15
+
16
+ voxel_size: &voxel_size
17
+ voxel_size: [0.075, 0.075, 20.0]
18
+
19
+ img_voxel_size: &img_voxel_size
20
+ voxel_size: [0.3, 0.3, 20.0]
21
+
22
+
23
+ dbound: &dbound
24
+ dbound: [1.0, 35.0, 0.5]
25
+
26
+ grid_config: &grid_config
27
+ grid_config:
28
+ x: (-15.0, 15.0, 0.6)
29
+ y: (-30.0, 30.0, 0.6)
30
+ z: (-10.0, 10.0, 20.0)
31
+ depth: (1.0, 60.0, 0.5)
32
+
33
+ img_norm_cfg : &img_norm_cfg
34
+ img_norm_cfg:
35
+ mean: [123.675, 116.28, 103.53]
36
+ std: [58.395, 57.12, 57.375]
37
+ to_rgb: True
38
+
39
+ map_classes: &map_classes
40
+ map_classes: ['divider', 'ped_crossing','boundary', 'centerline']
41
+
42
+ #fixed_ptsnum_per_gt_line: &fixed_ptsnum_per_gt_line
43
+ # fixed_ptsnum_per_gt_line: 20
44
+
45
+ #fixed_ptsnum_per_pred_line: &fixed_ptsnum_per_pred_line
46
+ # fixed_ptsnum_per_pred_line: 20
47
+
48
+ eval_use_same_gt_sample_num_flag: &eval_use_same_gt_sample_num_flag
49
+ eval_use_same_gt_sample_num_flag: True
50
+
51
+
52
+
53
+ #_pos_dim_: &_pos_dim_
54
+ # _pos_dim_: 128
55
+
56
+ #_ffn_dim_: &_ffn_dim_
57
+ # _ffn_dim_: 512
58
+
59
+ #_num_levels_: &_num_levels_
60
+ # _num_levels_: 1
61
+
62
+ #bev_h_: &bev_h_
63
+ # bev_h_: 100
64
+
65
+ #bev_w_: &bev_w_
66
+ # bev_w_: 200
67
+
68
+ #queue_length: &queue_length
69
+ # queue_length: 1
70
+
71
+ aux_seg : &aux_seg_cfg
72
+ aux_seg:
73
+ use_aux_seg: False
74
+ bev_seg: False
75
+ pv_seg: False
76
+ seg_classes: 1
77
+ feat_down_sample: 32
78
+ pv_thickness: 1
79
+
80
+ #z_cfg : &z_cfg
81
+ #
82
+ # pred_z_flag: True
83
+ # gt_z_flag: True
84
+
85
+ model:
86
+ _target_: det_map.map.map_model.MapModel
87
+ _convert_: 'all'
88
+ use_grid_mask: True
89
+ video_test_mode: False
90
+ pretrained:
91
+ img: ckpts/resnet50-19c8e357.pth
92
+
93
+ img_backbone:
94
+ type: ResNet
95
+ depth: 50
96
+ num_stages: 4
97
+ out_indices: [3]
98
+ frozen_stages: 1
99
+ norm_cfg:
100
+ type: BN
101
+ requires_grad: False
102
+ norm_eval: True
103
+ style: pytorch
104
+ img_neck:
105
+ type: FPN
106
+ in_channels: [2048]
107
+ out_channels: 256
108
+ start_level: 0
109
+ add_extra_convs: on_output
110
+ num_outs: 1
111
+ relu_before_extra_convs: True
112
+ pts_bbox_head:
113
+ type: MapTRv2Head
114
+ <<: *point_cloud_range
115
+ bev_h: 100
116
+ bev_w: 50
117
+ num_query: 900
118
+ num_vec_one2one: 20
119
+ num_vec_one2many: 300
120
+ k_one2many: 6
121
+ num_pts_per_vec: 20
122
+ num_pts_per_gt_vec: 20
123
+ dir_interval: 1
124
+ query_embed_type: 'instance_pts'
125
+ transform_method: 'minmax'
126
+ gt_shift_pts_pattern: 'v2'
127
+ num_classes: 2
128
+ in_channels: 256
129
+ sync_cls_avg_factor: True
130
+ with_box_refine: True
131
+ as_two_stage: False
132
+ code_size: 2
133
+ code_weights: None
134
+ <<: *aux_seg_cfg
135
+ # z_cfg: *z_cfg
136
+ transformer:
137
+ type: MapTRPerceptionTransformer
138
+ bev_h: 100
139
+ bev_w: 50
140
+ # fuser:
141
+ # type: 'ConvFuser'
142
+ # in_channels: [256, 256]
143
+ # out_channels: 256
144
+ num_cams: 2
145
+ # z_cfg: *z_cfg
146
+ rotate_prev_bev: False
147
+ use_shift: True
148
+ use_can_bus: False
149
+ embed_dims: 256
150
+ encoder:
151
+ type: 'SpatialDecoder'
152
+ num_layers: 1
153
+ <<: *point_cloud_range
154
+ grid_config:
155
+ x: [-15.0, 15.0, 0.6]
156
+ y: [-30.0, 30.0, 0.6]
157
+ z: [ -10.0, 10.0, 20.0 ]
158
+ data_config:
159
+ input_size: [256, 704]
160
+ transformerlayers:
161
+ type: 'SpatialDecoderLayer'
162
+ attn_cfgs:
163
+ - type: 'SpatialCrossAttention'
164
+ <<: *point_cloud_range
165
+ num_cams: 2
166
+ dropout: 0.0
167
+ embed_dims: 256
168
+ deformable_attention:
169
+ type: 'MSDeformableAttention'
170
+ embed_dims: 256
171
+ num_points: 8
172
+ num_levels: 1
173
+ ffn_cfgs:
174
+ type: 'FFN'
175
+ embed_dims: 256
176
+ feedforward_channels: 1024
177
+ ffn_drop: 0.0
178
+ act_cfg:
179
+ type: 'ReLU'
180
+ inplace: True
181
+ feedforward_channels: 1024
182
+ ffn_dropout: 0.0
183
+ operation_order: ['cross_attn', 'norm' ,'ffn', 'norm']
184
+ decoder:
185
+ type: MapTRDecoder
186
+ num_layers: 6
187
+ return_intermediate: True
188
+ transformerlayers:
189
+ type: DecoupledDetrTransformerDecoderLayer
190
+ num_vec: 20
191
+ num_pts_per_vec: 20
192
+ attn_cfgs:
193
+ - type: MultiheadAttention
194
+ embed_dims: 256
195
+ num_heads: 8
196
+ dropout: 0.1
197
+ - type: MultiheadAttention
198
+ embed_dims: 256
199
+ num_heads: 8
200
+ dropout: 0.1
201
+ - type: CustomMSDeformableAttention
202
+ embed_dims: 256
203
+ num_levels: 1
204
+ feedforward_channels: 512
205
+ ffn_dropout: 0.1
206
+ operation_order: ['self_attn', 'norm', 'self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm']
207
+
208
+ positional_encoding:
209
+ type: LearnedPositionalEncoding
210
+ num_feats: 128
211
+ row_num_embed: 100
212
+ col_num_embed: 50
213
+ loss_cls:
214
+ type: FocalLoss
215
+ use_sigmoid: True
216
+ gamma: 2.0
217
+ alpha: 0.25
218
+ loss_weight: 2.0
219
+ loss_bbox:
220
+ type: L1Loss
221
+ loss_weight: 0.0
222
+ loss_iou:
223
+ type: GIoULoss
224
+ loss_weight: 0.0
225
+ loss_pts:
226
+ type: PtsL1Loss
227
+ loss_weight: 5.0
228
+ loss_dir:
229
+ type: PtsDirCosLoss
230
+ loss_weight: 0.005
231
+ loss_seg:
232
+ type: SimpleLoss
233
+ pos_weight: 4.0
234
+ loss_weight: 1.0
235
+ loss_pv_seg:
236
+ type: SimpleLoss
237
+ pos_weight: 1.0
238
+ loss_weight: 2.0
239
+ # train_cfg:
240
+ # pts:
241
+ # grid_size: [512, 512, 1]
242
+ # <<: *voxel_size
243
+ # point_cloud_range: [ -15.0, -30.0, -10.0, 15.0, 30.0, 10.0 ]
244
+ # out_size_factor: 4
245
+ # assigner:
246
+ # type: MapTRAssigner
247
+ # cls_cost:
248
+ # type: FocalLossCost
249
+ # weight: 2.0
250
+ # reg_cost:
251
+ # type: BBoxL1Cost
252
+ # weight: 0.0
253
+ # box_format: 'xywh'
254
+ # iou_cost:
255
+ # type: IoUCost
256
+ # iou_mode: 'giou'
257
+ # weight: 0.0
258
+ # pts_cost:
259
+ # type: OrderedPtsL1Cost
260
+ # weight: 5
261
+ # pc_range: [ -15.0, -30.0, -10.0, 15.0, 30.0, 10.0 ]
262
+
263
+ pipelines:
264
+ lidar_filter:
265
+ _target_: det_map.data.pipelines.filter_lidar.LiDARFilter
266
+ _convert_: 'all'
267
+ close_radius: 1.0
268
+ <<: *lidar_filter_ranges
269
+
270
+ # only include in training
271
+ point_shuffle:
272
+ _target_: det_map.data.pipelines.point_shuffle.PointShuffle
273
+ <<: *is_train
274
+
275
+ lidar_aug:
276
+ _target_: det_map.data.pipelines.lidar_aug.LiDARAug
277
+ bda_aug_conf:
278
+ rot_lim: (-22.5 * 2, 22.5 * 2)
279
+ scale_lim: (0.9, 1.1)
280
+ flip_dx_ratio: 0.5
281
+ flip_dy_ratio: 0.5
282
+ tran_lim: (0.5, 0.5, 0.5)
283
+ <<: *lidar_filter_ranges
284
+ # if no aug for map, set this is_train to False
285
+ <<: *is_train
286
+
287
+ depth:
288
+ _target_: det_map.data.pipelines.prepare_depth.LiDAR2Depth
289
+ <<: *grid_config
290
+
291
+ img:
292
+ _target_: det_map.data.pipelines.prepare_img.PrepareImageInputs
293
+ _convert_: 'all'
294
+ opencv_pp: True
295
+ # Flag should be False in Eval!!!!
296
+ <<: *is_train
297
+ data_config:
298
+ input_size: (256, 704)
299
+ src_size: (900, 1600)
300
+ # Augmentation
301
+ resize: (-0.06, 0.44)
302
+ rot: (-5.4, 5.4)
303
+ flip: True
304
+ crop_h: (0.0, 0.0)
305
+ random_crop_height: True
306
+ vflip: True
307
+ resize_test: 0.04
308
+ pmd:
309
+ brightness_delta: 32
310
+ contrast_lower: 0.5
311
+ contrast_upper: 1.5
312
+ saturation_lower: 0.5
313
+ saturation_upper: 1.5
314
+ hue_delta: 18
315
+ rate: 0.5
316
+
317
+ #<<: *is_train
318
+ checkpoint_path: null
319
+ hidden_layer_dim: 512
320
+ lr: 1e-4
det_map/config/defaults/default_common.yaml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Default common configs
2
+
3
+ defaults:
4
+ # Worker that is used to run simulations
5
+ # - ray_distributed_no_torch
6
+ - ray_distributed_no_torch
7
+
8
+ split: ???
9
+
10
+ distributed_timeout_seconds: 7200 # Sets how long to wait while synchronizing across worker nodes in a distributed context.
11
+
12
+ selected_simulation_metrics: null
13
+
14
+ # Sets verbosity level, in particular determines if progress bars are shown or not.
15
+ verbose: false
16
+
17
+ # Logger
18
+ logger_level: info # Level of logger
19
+ logger_format_string: null # Logger format string, set null to use the default format string
20
+
21
+ # Execution
22
+ max_number_of_workers: null # Set null to disable threading for simulation execution
23
+ gpu: true # Whether to use available GPUs during training/simulation
det_map/config/defaults/default_evaluation.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # Cache parameters
2
+ experiment_name: ???
3
+ navsim_log_path: ${oc.env:OPENSCENE_DATA_ROOT}/navsim_logs/${split} # path to log annotations
4
+ sensor_blobs_path: ${oc.env:OPENSCENE_DATA_ROOT}/sensor_blobs/${split} # path to sensor blobs
5
+ date_format: '%Y.%m.%d.%H.%M.%S'
6
+ experiment_uid: ${now:${date_format}}
7
+ output_dir: ${oc.env:NAVSIM_EXP_ROOT}/${experiment_name}/${experiment_uid} # path where output csv is saved
det_map/config/defaults/ray_distributed_no_torch.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ _target_: navsim.planning.utils.multithreading.worker_ray_no_torch.RayDistributedNoTorch
2
+ _convert_: 'all'
3
+ master_node_ip: null # Set to a master node IP if you desire to connect to cluster remotely
4
+ threads_per_node: null # Number of CPU threads to use per node, "null" means all threads available
5
+ debug_mode: false # If true all tasks will be executed serially, mainly for testing
6
+ log_to_driver: true # If true, all printouts from ray threads will be displayed in driver
7
+ logs_subdir: 'logs' # Subdirectory to store logs inside the experiment directory
8
+ use_distributed: false # Whether to use the built-in distributed mode of ray
det_map/config/scene_filter/det_all_scenes.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: det_map.data.datasets.dataloader.SceneFilter
2
+ _convert_: 'all'
3
+
4
+ num_history_frames: 4
5
+ num_future_frames: 10
6
+ # map has_route可能要设成 True
7
+ has_route: False
8
+
9
+ max_scenes: Null
10
+ log_names: Null
11
+
12
+ tokens: Null
det_map/config/scene_filter/navtiny.yaml ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: navsim.common.dataclasses.SceneFilter
2
+ _convert_: 'all'
3
+ num_history_frames: 4
4
+ num_future_frames: 10
5
+ frame_interval: 1
6
+ has_route: true
7
+ max_scenes: null
8
+
9
+ log_names: null # list of log names to extract scenes from, if null, all logs are extracted
10
+ tokens:
11
+ - 'ed4ac2dad0fa584b'
12
+ - '2111b648fcba5bb7'
13
+ - '1fc1dd0dc3d157ae'
14
+ - '76a69c9e9e375670'
15
+ - '4d3a4cbc9efb5337'
16
+ - '06df05f607855dbf'
17
+ - 'c3856d49ecf453f0'
18
+ - '09d3f08395e05d1c'
19
+ - '0593ddf8a1bb5a57'
20
+ - 'c0b386ab15db56f9'
21
+ - '0ef0f369529e54a9'
22
+ - 'c754b1af814a5f23'
23
+ - 'b214f8e744075e96'
24
+ - '5cbacc029a9f5cb3'
25
+ - 'cb46ac2ddfdf506e'
26
+ - '108d77bad2275975'
27
+ - '3978246a10a25ab0'
28
+ - '41bb74b4738f5a8b'
29
+ - '3a8375c20b615fce'
30
+ - '82dc3fff070b5f80'
31
+ - '8bfb2d59b82057e6'
32
+ - 'e36d3626a55e54f9'
33
+ - '5b1c0e44a5505c06'
34
+ - '78e6ea95b854551c'
35
+ - '76af8c24431855c3'
36
+ - '1a84e817c1875ec6'
37
+ - 'e7ea3ed9a30e5444'
38
+ - '8c837572950a5ac0'
39
+ - 'c18f8cfc41385d8c'
40
+ - '11aa12f4e5715b08'
41
+ - '702bdcfabe0755fe'
42
+ - 'c11854507e515b05'
43
+ - '828f0769bf365504'
44
+ - '1d2d2ddbbd5450a4'
45
+ - '640423c4ff21538a'
46
+ - '93fa463a455857f6'
47
+ - '79214a9a65225eda'
48
+ - 'cd9d78a1011c555f'
49
+ - '2a3f7fbaa10b5627'
50
+ - '5abf2148971855ad'
51
+ - 'd9200709d73756c3'
52
+ - 'cf94200201a75af8'
53
+ - 'c97bad66929c58d1'
54
+ - 'e45b782c83a550c1'
55
+ - 'e869951de22f5ecc'
56
+ - '9610b02bc4ec529c'
57
+ - '70ed6ff1471f5d74'
58
+ - 'f8a971a1e94553ce'
59
+ - '91e77e1873d75afe'
60
+ - 'dc86b9a3e2e05466'
61
+ - 'a3efdab7285751a6'
62
+ - 'ecca4f25f1cd5a85'
63
+ - '3c09e960d73758eb'
64
+ - '58fb7f78e39451bc'
65
+ - '0ce0aa336fe751a4'
66
+ - '759d96676b965349'
67
+ - 'e3b1564e52cd52db'
68
+ - '48333fc684d454a2'
69
+ - '62cae48b4e445254'
70
+ - 'e97256ddafa85705'
71
+ - '568aee30ea2655e2'
72
+ - '2b8645e05e8854f0'
73
+ - '1ce8022305ba565c'
74
+ - 'fd3f8f3310255030'
75
+ - 'f0b74302312b5241'
76
+ - 'd74e1e5648e35864'
77
+ - '5bff4e6fa9c95deb'
78
+ - '97d3764b7be652cf'
79
+ - 'de681a4826e35220'
80
+ - 'be2540e76b10519d'
81
+ - 'c7e91cc3157b5937'
82
+ - '12a68a4c440c5396'
83
+ - 'ac0c803827d65b80'
84
+ - 'c18771a3868f5868'
85
+ - 'a6340d3e28b95701'
86
+ - '24fff541744b573f'
87
+ - 'e7165cb777e65dac'
88
+ - '7c1553e7080b5a70'
89
+ - '6dffb4d149eb5089'
90
+ - '0773a8971c5e5e5a'
91
+ - '72dac45a812f56fb'
92
+ - '75c16dc4849b5726'
93
+ - '523eab76cc4653bd'
94
+ - 'f246f785c3455caa'
95
+ - 'baf59d54fb78575a'
96
+ - 'b29743e5885f5514'
97
+ - 'd213c35fc6055569'
98
+ - '3ba8190534b1554c'
99
+ - '26e297939af25760'
100
+ - 'da643d2d70785c76'
101
+ - '2137a540b5f05b48'
102
+ - 'ed795a36682f5728'
103
+ - '000afad751a95adb'
104
+ - '7543fb2f2dcf5c7e'
105
+ - '9b5c00687d4e590b'
106
+ - '16d0a19acfcd5668'
107
+ - 'd91da3c6f79b53f6'
108
+ - '154694dd0f6c565c'
109
+ - '9b4b3a0261595a47'
110
+ - '0df3061f21f4502a'
111
+ - '7e0b549208c75322'
112
+ - '74678e95029e52a2'
113
+ - '49196fecbe9a549f'
114
+ - '0decaed8d0f45b26'
115
+ - 'b3671d0ef61e5391'
116
+ - '7b990d22090f5a21'
117
+ - '4fea3406427a52de'
118
+ - 'e7ac9da207d05a7f'
119
+ - '69b772bf2aa15e8b'
120
+ - '09300186157e51e9'
121
+ - 'c61c26797b2d52f8'
122
+ - 'eac8efd956975d88'
123
+ - 'ad0ca9004c1e56c6'
124
+ - '9c48c3a7714e5850'
125
+ - '1bac9ad3b5795fb9'
126
+ - '5dad11490b425565'
127
+ - '1f6cea56be625f10'
128
+ - 'f2fa70a966055b14'
129
+ - '68520950dcca56d2'
130
+ - 'e905af2fb80f5802'
131
+ - 'e5445523551c573a'
132
+ - '5a3b197e54495443'
133
+ - '35d813d8de5854f9'
134
+ - '25e0169687d659c0'
135
+ - '88f7863088bc593e'
136
+ - '06767022b8445e7f'
137
+ - '4fcdad926f4a5568'
138
+ - '8f5b483a5dd956d3'
139
+ - 'a64cd79798845d53'
140
+ - 'de864917fc075773'
141
+ - '50418b03a9345e7f'
142
+ - 'e991b5b1ef9d5fcd'
143
+ - 'ea75df402b6a5d37'
144
+ - '17b4e23eb78b547b'
145
+ - '79388c5790cf5b02'
146
+ - '7b9cc1b02566583e'
147
+ - 'a8b415f811cb5bfa'
148
+ - 'f4e49919c3d35a1a'
149
+ - '79ca73b34554570a'
150
+ - 'f9902a62c80c511a'
151
+ - '71057951bf9a5e81'
152
+ - '411cc15794895e1e'
153
+ - '7c4fca218b0854d7'
154
+ - '8498fd37028051b7'
155
+ - '27decc74a57b53ac'
156
+ - '50480a33ca215770'
157
+ - '47f300be059c5734'
158
+ - '70f2ea8358ed55f1'
159
+ - '471f7ca3148659cd'
160
+ - '4800f9f234c050fa'
161
+ - '64c71ae3532a5efb'
162
+ - '5e8f9f6ab5695769'
163
+ - '2d9168675ce355a2'
164
+ - '3c077c8da4615b33'
165
+ - 'c7e8c07beb135247'
166
+ - '2f8055010b905651'
167
+ - '340d245e2ee854fe'
168
+ - '70df39aae7b05204'
169
+ - '388782e615ec5bba'
170
+ - '7cb3886f8bb557d3'
171
+ - 'b37a0e95ac4055ba'
172
+ - '8be138812f1459d2'
173
+ - '3ff2c6494d63527b'
174
+ - '05fab28931d55ff9'
175
+ - '333189d65a42540d'
176
+ - '73bb3d277424505f'
177
+ - 'cbe6088df42d55dc'
178
+ - 'aa784b6564cb56a3'
179
+ - 'cd30af3a16945a92'
180
+ - 'c3a15b9f7dd55cce'
181
+ - '44b6e898e157569a'
182
+ - '4e4062c303565251'
183
+ - 'd74f9dfdb4125eaf'
184
+ - 'c0365ee92dec511d'
185
+ - '4e98aff61c5e57b1'
186
+ - '7200dcdd4ad05210'
187
+ - 'c8124080125a5278'
188
+ - '1586145ff7ae5b89'
189
+ - '6b7f1a53f7d3524c'
190
+ - '3bf37bad40c55175'
191
+ - 'bdde0c029ec25326'
192
+ - 'cd0a777bac035272'
193
+ - '67b76696aa305cdc'
194
+ - '614111a5d6045ae7'
195
+ - 'f383acca25ff59eb'
196
+ - 'cea15449dc0356bd'
197
+ - 'b80387b22e0c55b5'
198
+ - '065a0963a4125096'
199
+ - 'c9e06d789998518d'
200
+ - '4615024da7765d62'
201
+ - 'ef336e8b83245733'
202
+ - 'be4ec4d7ce745612'
203
+ - '5169ec4362225b58'
204
+ - 'c6f905906f9654a2'
205
+ - '194216a5f85d592d'
206
+ - '6529aed422f35336'
207
+ - '497ac853176d59b6'
208
+ - 'f280ba623a7f5321'
209
+ - 'b5fe876937af504a'
210
+ - 'c6b62c299ccc5274'
211
+ - 'dcb2a35ae605510a'
212
+ - 'd1c281e277d1532d'
213
+ - '8f3366be46c05d5f'
214
+ - 'af9f5f6fa1ad5182'
215
+ - '5054593a6d795256'
216
+ - '159b9b7451195c9c'
217
+ - '7687f25bf8845686'
218
+ - '560f3ccbaa5b53ef'
219
+ - 'e5a146299341551a'
220
+ - 'b794c616319352c3'
221
+ - 'fb68b32ec8a251da'
222
+ - '9fce6f03ef0351b0'
223
+ - '046fd63cb514581a'
224
+ - '0ce82a1caffc56af'
225
+ - '7cc94c33bbe052d7'
226
+ - 'b5126e9ddea25889'
227
+ - 'c123273de19d5c2f'
228
+ - 'df570b3785a95295'
229
+ - 'a5efa651fec451b5'
230
+ - '216f7065c13c5ec9'
231
+ - '4754eb209bc452e4'
232
+ - 'ce28728cdb6f50c9'
233
+ - '33461776a24d554f'
234
+ - '0920187661745605'
235
+ - '0633cb3809935cb7'
236
+ - 'f3e9317326955421'
237
+ - '1c371291fdc1551a'
238
+ - '37185bcf00de5be6'
239
+ - '224510571ce95a3f'
240
+ - 'e38a6e1fd4c55393'
241
+ - '3a0b00f0840658e5'
242
+ - '0d6abcbad24652c0'
243
+ - '4789245424875682'
244
+ - 'fba38dd9492a5341'
245
+ - 'b649dcb158a75dcd'
246
+ - '1a5182ccbf1b5955'
247
+ - '1ac622ff2d2e5210'
248
+ - 'f63cff56784d5cb9'
249
+ - '0ea876c450bb5aa6'
250
+ - '6fc06c6e4d1752a1'
251
+ - '88396ca47dcf5361'
252
+ - '7e1f829a0de95258'
253
+ - '5f9a9890f1a75602'
254
+ - '5a60c57493885588'
255
+ - '67be2615438d55fb'
256
+ - 'bda2fb6ea7735b5a'
257
+ - '55aa596e131d5734'
258
+ - 'd1a786625a885023'
259
+ - '8ec0cd02d7705766'
260
+ - 'e378bb756641598d'
261
+ - 'c853ae7a361f54d9'
262
+ - 'b1db6a099fea55f5'
263
+ - 'ca8bc031163a5765'
264
+ - 'eee8261221df5048'
265
+ - 'b33131090ada5f2d'
det_map/config/splits/default_train_val_test_log_split.yaml ADDED
The diff for this file is too large to render. See raw diff
 
det_map/config/train_det.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${output_dir}
4
+ output_subdir: ${output_dir}/code/hydra # Store hydra's config breakdown here for debugging
5
+ searchpath: # Only <exp_dir> in these paths are discoverable
6
+ - det_map/config/defaults
7
+ - det_map/config
8
+ - det_map/config/splits
9
+ - det_map/config/agent
10
+ # - pkg://navsim.planning.script.config.training
11
+
12
+ defaults:
13
+ - default_common
14
+ - default_evaluation
15
+ - default_train_val_test_log_split
16
+ - agent: map_agent
17
+ - scene_filter: det_all_scenes
18
+
19
+ split: mini
20
+
21
+ dataloader:
22
+ params:
23
+ batch_size: 32 # number of samples per batch
24
+ num_workers: 4 # number of workers for data loading
25
+ pin_memory: true # pin memory for faster GPU transfer
26
+ prefetch_factor: 1
27
+
28
+ trainer:
29
+ params:
30
+ max_epochs: 20 # maximum number of training epochs
31
+ check_val_every_n_epoch: 1 # run validation set every n training epochs
32
+ val_check_interval: 1.0 # [%] run validation set every X% of training set
33
+
34
+ limit_train_batches: 1.0 # how much of training dataset to check (float = fraction, int = num_batches)
35
+ limit_val_batches: 1.0 # how much of validation dataset to check (float = fraction, int = num_batches)
36
+
37
+ accelerator: gpu # distribution method
38
+ strategy: ddp
39
+ precision: 32 # floating point precision
40
+ num_nodes: 1 # Number of nodes used for training
41
+
42
+ num_sanity_val_steps: 0 # number of validation steps to run before training begins
43
+ fast_dev_run: false # runs 1 batch of train/val/test for sanity
44
+
45
+ accumulate_grad_batches: 1 # accumulates gradients every n batches
46
+ # track_grad_norm: -1 # logs the p-norm for inspection
47
+ gradient_clip_val: 0.0 # value to clip gradients
48
+ gradient_clip_algorithm: norm # [value, norm] method to clip gradients
det_map/data/__init__.py ADDED
File without changes
det_map/data/datasets/__init__.py ADDED
File without changes
det_map/data/datasets/dataclasses.py ADDED
@@ -0,0 +1,521 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import io
4
+ import os
5
+ from dataclasses import dataclass, asdict
6
+ from pathlib import Path
7
+ from typing import Any, Dict, List, Optional, Tuple, BinaryIO, Union
8
+ from nuplan.database.maps_db.gpkg_mapsdb import MAP_LOCATIONS
9
+ from nuplan.common.maps.nuplan_map.map_factory import get_maps_api
10
+
11
+ import numpy as np
12
+ import numpy.typing as npt
13
+ from PIL import Image
14
+ from nuplan.common.actor_state.state_representation import StateSE2
15
+ from nuplan.common.maps.abstract_map import AbstractMap
16
+ from nuplan.database.utils.pointclouds.lidar import LidarPointCloud
17
+ from nuplan.planning.simulation.trajectory.trajectory_sampling import TrajectorySampling
18
+ from pyquaternion import Quaternion
19
+
20
+ from navsim.planning.simulation.planner.pdm_planner.utils.pdm_geometry_utils import (
21
+ convert_absolute_to_relative_se2_array,
22
+ )
23
+
24
+ NAVSIM_INTERVAL_LENGTH: float = 0.5
25
+ OPENSCENE_DATA_ROOT = os.environ.get("OPENSCENE_DATA_ROOT")
26
+ NUPLAN_MAPS_ROOT = os.environ.get("NUPLAN_MAPS_ROOT")
27
+
28
+
29
+ @dataclass
30
+ class Camera:
31
+ image: Optional[npt.NDArray[np.float32]] = None
32
+ canvas: Optional[npt.NDArray[np.float32]] = None
33
+
34
+ sensor2lidar_rotation: Optional[npt.NDArray[np.float32]] = None
35
+ sensor2lidar_translation: Optional[npt.NDArray[np.float32]] = None
36
+ intrinsics: Optional[npt.NDArray[np.float32]] = None
37
+ distortion: Optional[npt.NDArray[np.float32]] = None
38
+
39
+ post_rot: Optional[npt.NDArray[np.float32]] = None
40
+ post_tran: Optional[npt.NDArray[np.float32]] = None
41
+
42
+ def to_dict(self):
43
+ return {
44
+ 'image': self.image,
45
+ 'canvas': self.canvas,
46
+ 'sensor2lidar_rotation': self.sensor2lidar_rotation,
47
+ 'sensor2lidar_translation': self.sensor2lidar_translation,
48
+ 'intrinsics': self.intrinsics,
49
+ 'distortion': self.distortion,
50
+ 'post_rot': self.post_rot,
51
+ 'post_tran': self.post_tran
52
+ }
53
+
54
+
55
+ @dataclass
56
+ class Cameras:
57
+ cam_f0: Camera
58
+ cam_l0: Camera
59
+ cam_l1: Camera
60
+ cam_l2: Camera
61
+ cam_r0: Camera
62
+ cam_r1: Camera
63
+ cam_r2: Camera
64
+ cam_b0: Camera
65
+
66
+ @classmethod
67
+ def from_camera_dict(
68
+ cls,
69
+ sensor_blobs_path: Path,
70
+ camera_dict: Dict[str, Any],
71
+ sensor_names: List[str],
72
+ ) -> Cameras:
73
+
74
+ data_dict: Dict[str, Camera] = {}
75
+ for camera_name in camera_dict.keys():
76
+ camera_identifier = camera_name.lower()
77
+ if camera_identifier in sensor_names:
78
+ image_path = sensor_blobs_path / camera_dict[camera_name]["data_path"]
79
+ data_dict[camera_identifier] = Camera(
80
+ image=np.array(Image.open(image_path)),
81
+ sensor2lidar_rotation=camera_dict[camera_name]["sensor2lidar_rotation"],
82
+ sensor2lidar_translation=camera_dict[camera_name]["sensor2lidar_translation"],
83
+ intrinsics=camera_dict[camera_name]["cam_intrinsic"],
84
+ distortion=camera_dict[camera_name]["distortion"],
85
+ )
86
+ else:
87
+ data_dict[camera_identifier] = Camera() # empty camera
88
+
89
+ return Cameras(
90
+ cam_f0=data_dict["cam_f0"],
91
+ cam_l0=data_dict["cam_l0"],
92
+ cam_l1=data_dict["cam_l1"],
93
+ cam_l2=data_dict["cam_l2"],
94
+ cam_r0=data_dict["cam_r0"],
95
+ cam_r1=data_dict["cam_r1"],
96
+ cam_r2=data_dict["cam_r2"],
97
+ cam_b0=data_dict["cam_b0"],
98
+ )
99
+
100
+
101
+ @dataclass
102
+ class Lidar:
103
+ # merged lidar point cloud as (6,n) float32 array with n points
104
+ # first axis: (x, y, z, intensity, ring, lidar_id)
105
+ lidar_pc: Optional[npt.NDArray[np.float32]] = None
106
+
107
+ @staticmethod
108
+ def _load_bytes(lidar_path: Path) -> BinaryIO:
109
+ with open(lidar_path, "rb") as fp:
110
+ return io.BytesIO(fp.read())
111
+
112
+ @classmethod
113
+ def from_paths(
114
+ cls,
115
+ sensor_blobs_path: Path,
116
+ lidar_path: Path,
117
+ sensor_names: List[str],
118
+ ) -> Lidar:
119
+ # NOTE: this could be extended to load specific LiDARs in the merged pc
120
+ if "lidar_pc" in sensor_names:
121
+ global_lidar_path = sensor_blobs_path / lidar_path
122
+ lidar_pc = LidarPointCloud.from_buffer(cls._load_bytes(global_lidar_path), "pcd").points
123
+ return Lidar(lidar_pc)
124
+ return Lidar() # empty lidar
125
+
126
+
127
+ @dataclass
128
+ class EgoStatus:
129
+ ego_pose: npt.NDArray[np.float64]
130
+ ego_velocity: npt.NDArray[np.float32]
131
+ ego_acceleration: npt.NDArray[np.float32]
132
+ driving_command: npt.NDArray[np.int]
133
+ in_global_frame: bool = False # False for AgentInput
134
+
135
+
136
+ @dataclass
137
+ class AgentInput:
138
+ tokens: List[str]
139
+ timestamps: List[int]
140
+
141
+ ego_statuses: List[EgoStatus]
142
+ cameras: List[Cameras]
143
+ lidars: List[Lidar]
144
+ ego2globals: List[np.ndarray]
145
+
146
+ def __post_init__(self):
147
+ pass
148
+
149
+ @classmethod
150
+ def from_scene_dict_list(
151
+ cls,
152
+ scene_dict_list: List[Dict],
153
+ sensor_blobs_path: Path,
154
+ num_history_frames: int,
155
+ sensor_config: SensorConfig,
156
+ ) -> AgentInput:
157
+ assert len(scene_dict_list) > 0, "Scene list is empty!"
158
+
159
+ global_ego_poses = []
160
+ for frame_idx in range(num_history_frames):
161
+ ego_translation = scene_dict_list[frame_idx]["ego2global_translation"]
162
+ ego_quaternion = Quaternion(*scene_dict_list[frame_idx]["ego2global_rotation"])
163
+ global_ego_pose = np.array(
164
+ [ego_translation[0], ego_translation[1], ego_quaternion.yaw_pitch_roll[0]],
165
+ dtype=np.float64,
166
+ )
167
+ global_ego_poses.append(global_ego_pose)
168
+
169
+ local_ego_poses = convert_absolute_to_relative_se2_array(
170
+ StateSE2(*global_ego_poses[-1]), np.array(global_ego_poses, dtype=np.float64)
171
+ )
172
+
173
+ ego_statuses: List[EgoStatus] = []
174
+ cameras: List[Cameras] = []
175
+ lidars: List[Lidar] = []
176
+ ego2globals = []
177
+ tokens = []
178
+ timestamps = []
179
+
180
+ for frame_idx in range(num_history_frames):
181
+ tokens.append(scene_dict_list[frame_idx]['token'])
182
+ timestamps.append(scene_dict_list[frame_idx]['timestamp'])
183
+
184
+ ego_dynamic_state = scene_dict_list[frame_idx]["ego_dynamic_state"]
185
+ ego_status = EgoStatus(
186
+ ego_pose=np.array(local_ego_poses[frame_idx], dtype=np.float32),
187
+ ego_velocity=np.array(ego_dynamic_state[:2], dtype=np.float32),
188
+ ego_acceleration=np.array(ego_dynamic_state[2:], dtype=np.float32),
189
+ driving_command=scene_dict_list[frame_idx]["driving_command"],
190
+ )
191
+ ego_statuses.append(ego_status)
192
+
193
+ sensor_names = sensor_config.get_sensors_at_iteration(frame_idx)
194
+ cameras.append(
195
+ Cameras.from_camera_dict(
196
+ sensor_blobs_path=sensor_blobs_path,
197
+ camera_dict=scene_dict_list[frame_idx]["cams"],
198
+ sensor_names=sensor_names,
199
+ )
200
+ )
201
+
202
+ lidars.append(
203
+ Lidar.from_paths(
204
+ sensor_blobs_path=sensor_blobs_path,
205
+ lidar_path=Path(scene_dict_list[frame_idx]["lidar_path"]),
206
+ sensor_names=sensor_names,
207
+ )
208
+ )
209
+
210
+ ego2globals.append(scene_dict_list[frame_idx]['ego2global'])
211
+
212
+ return AgentInput(tokens, timestamps, ego_statuses, cameras, lidars, ego2globals)
213
+
214
+
215
+ @dataclass
216
+ class Annotations:
217
+ boxes: npt.NDArray[np.float32]
218
+ names: List[str]
219
+ velocity_3d: npt.NDArray[np.float32]
220
+ instance_tokens: List[str]
221
+ track_tokens: List[str]
222
+
223
+ def __post_init__(self):
224
+ annotation_lengths: Dict[str, int] = {
225
+ attribute_name: len(attribute) for attribute_name, attribute in vars(self).items()
226
+ }
227
+ assert (
228
+ len(set(annotation_lengths.values())) == 1
229
+ ), f"Annotations expects all attributes to have equal length, but got {annotation_lengths}"
230
+
231
+
232
+ @dataclass
233
+ class Trajectory:
234
+ poses: npt.NDArray[np.float32] # local coordinates
235
+ trajectory_sampling: TrajectorySampling = TrajectorySampling(
236
+ time_horizon=4, interval_length=0.5
237
+ )
238
+
239
+ def __post_init__(self):
240
+ assert (
241
+ self.poses.ndim == 2
242
+ ), "Trajectory poses should have two dimensions for samples and poses."
243
+ assert (
244
+ self.poses.shape[0] == self.trajectory_sampling.num_poses
245
+ ), "Trajectory poses and sampling have unequal number of poses."
246
+ assert self.poses.shape[1] == 3, "Trajectory requires (x, y, heading) at last dim."
247
+
248
+
249
+ @dataclass
250
+ class SceneMetadata:
251
+ log_name: str
252
+ scene_token: str
253
+ map_name: str
254
+ initial_token: str
255
+
256
+ num_history_frames: int
257
+ num_future_frames: int
258
+
259
+
260
+ @dataclass
261
+ class Frame:
262
+ token: str
263
+ timestamp: int
264
+ roadblock_ids: List[str]
265
+ traffic_lights: List[Tuple[str, bool]]
266
+ annotations: Annotations
267
+
268
+ ego_status: EgoStatus
269
+ lidar: Lidar
270
+ cameras: Cameras
271
+ ego2global: np.ndarray
272
+
273
+
274
+ @dataclass
275
+ class Scene:
276
+ # Ground truth information
277
+ scene_metadata: SceneMetadata
278
+ map_api: AbstractMap
279
+ frames: List[Frame]
280
+
281
+ def get_future_trajectory(self, num_trajectory_frames: Optional[int] = None) -> Trajectory:
282
+
283
+ if num_trajectory_frames is None:
284
+ num_trajectory_frames = self.scene_metadata.num_future_frames
285
+
286
+ start_frame_idx = self.scene_metadata.num_history_frames - 1
287
+
288
+ global_ego_poses = []
289
+ for frame_idx in range(start_frame_idx, start_frame_idx + num_trajectory_frames + 1):
290
+ global_ego_poses.append(self.frames[frame_idx].ego_status.ego_pose)
291
+
292
+ local_ego_poses = convert_absolute_to_relative_se2_array(
293
+ StateSE2(*global_ego_poses[0]), np.array(global_ego_poses[1:], dtype=np.float64)
294
+ )
295
+
296
+ return Trajectory(
297
+ local_ego_poses,
298
+ TrajectorySampling(
299
+ num_poses=len(local_ego_poses),
300
+ interval_length=NAVSIM_INTERVAL_LENGTH,
301
+ ),
302
+ )
303
+
304
+ def get_history_trajectory(self, num_trajectory_frames: Optional[int] = None) -> Trajectory:
305
+
306
+ if num_trajectory_frames is None:
307
+ num_trajectory_frames = self.scene_metadata.num_history_frames
308
+
309
+ global_ego_poses = []
310
+ for frame_idx in range(num_trajectory_frames):
311
+ global_ego_poses.append(self.frames[frame_idx].ego_status.ego_pose)
312
+
313
+ origin = StateSE2(*global_ego_poses[-1])
314
+ local_ego_poses = convert_absolute_to_relative_se2_array(
315
+ origin, np.array(global_ego_poses, dtype=np.float64)
316
+ )
317
+
318
+ return Trajectory(
319
+ local_ego_poses,
320
+ TrajectorySampling(
321
+ num_poses=len(local_ego_poses),
322
+ interval_length=NAVSIM_INTERVAL_LENGTH,
323
+ ),
324
+ )
325
+
326
+ def get_agent_input(self) -> AgentInput:
327
+ # NOTE: this function is unused and might be removed.
328
+
329
+ local_ego_poses = self.get_history_trajectory().poses
330
+
331
+ ego_statuses: List[EgoStatus] = []
332
+ cameras: List[Cameras] = []
333
+ lidars: List[Lidar] = []
334
+ ego2globals = []
335
+ tokens, timestamps = [], []
336
+ for frame_idx in range(self.scene_metadata.num_history_frames):
337
+ frame_ego_status = self.frames[frame_idx].ego_status
338
+ tokens.append(self.frames[frame_idx].token)
339
+ timestamps.append(self.frames[frame_idx].timestamp)
340
+ ego_statuses.append(
341
+ EgoStatus(
342
+ ego_pose=local_ego_poses[frame_idx],
343
+ ego_velocity=frame_ego_status.ego_velocity,
344
+ ego_acceleration=frame_ego_status.ego_acceleration,
345
+ driving_command=frame_ego_status.driving_command,
346
+ )
347
+ )
348
+ cameras.append(self.frames[frame_idx].cameras)
349
+ lidars.append(self.frames[frame_idx].lidar)
350
+ ego2globals.append(self.frames[frame_idx].ego2global)
351
+
352
+ return AgentInput(tokens, timestamps, ego_statuses, cameras, lidars, ego2globals)
353
+
354
+ @classmethod
355
+ def _build_annotations(
356
+ cls,
357
+ scene_frame: Dict,
358
+ ) -> Annotations:
359
+ return Annotations(
360
+ boxes=scene_frame["anns"]["gt_boxes"],
361
+ names=scene_frame["anns"]["gt_names"],
362
+ velocity_3d=scene_frame["anns"]["gt_velocity_3d"],
363
+ instance_tokens=scene_frame["anns"]["instance_tokens"],
364
+ track_tokens=scene_frame["anns"]["track_tokens"],
365
+ )
366
+
367
+ @classmethod
368
+ def _build_ego_status(
369
+ cls,
370
+ scene_frame: Dict,
371
+ ) -> EgoStatus:
372
+ ego_translation = scene_frame["ego2global_translation"]
373
+ ego_quaternion = Quaternion(*scene_frame["ego2global_rotation"])
374
+ global_ego_pose = np.array(
375
+ [ego_translation[0], ego_translation[1], ego_quaternion.yaw_pitch_roll[0]],
376
+ dtype=np.float64,
377
+ )
378
+ ego_dynamic_state = scene_frame["ego_dynamic_state"]
379
+ return EgoStatus(
380
+ ego_pose=global_ego_pose,
381
+ ego_velocity=np.array(ego_dynamic_state[:2], dtype=np.float32),
382
+ ego_acceleration=np.array(ego_dynamic_state[2:], dtype=np.float32),
383
+ driving_command=scene_frame["driving_command"],
384
+ in_global_frame=True,
385
+ )
386
+
387
+ @classmethod
388
+ def _build_map_api(cls, map_name: str) -> AbstractMap:
389
+ assert (
390
+ map_name in MAP_LOCATIONS
391
+ ), f"The map name {map_name} is invalid, must be in {MAP_LOCATIONS}"
392
+ return get_maps_api(NUPLAN_MAPS_ROOT, "nuplan-maps-v1.0", map_name)
393
+
394
+ @classmethod
395
+ def from_scene_dict_list(
396
+ cls,
397
+ scene_dict_list: List[Dict],
398
+ sensor_blobs_path: Path,
399
+ num_history_frames: int,
400
+ num_future_frames: int,
401
+ sensor_config: SensorConfig,
402
+ ) -> Scene:
403
+ assert len(scene_dict_list) >= 0, "Scene list is empty!"
404
+
405
+ scene_metadata = SceneMetadata(
406
+ log_name=scene_dict_list[num_history_frames - 1]["log_name"],
407
+ scene_token=scene_dict_list[num_history_frames - 1]["scene_token"],
408
+ map_name=scene_dict_list[num_history_frames - 1]["map_location"],
409
+ initial_token=scene_dict_list[num_history_frames - 1]["token"],
410
+ num_history_frames=num_history_frames,
411
+ num_future_frames=num_future_frames,
412
+ )
413
+ map_api = cls._build_map_api(scene_metadata.map_name)
414
+
415
+ frames: List[Frame] = []
416
+ for frame_idx in range(len(scene_dict_list)):
417
+ global_ego_status = cls._build_ego_status(scene_dict_list[frame_idx])
418
+ annotations = cls._build_annotations(scene_dict_list[frame_idx])
419
+
420
+ sensor_names = sensor_config.get_sensors_at_iteration(frame_idx)
421
+
422
+ cameras = Cameras.from_camera_dict(
423
+ sensor_blobs_path=sensor_blobs_path,
424
+ camera_dict=scene_dict_list[frame_idx]["cams"],
425
+ sensor_names=sensor_names,
426
+ )
427
+
428
+ lidar = Lidar.from_paths(
429
+ sensor_blobs_path=sensor_blobs_path,
430
+ lidar_path=Path(scene_dict_list[frame_idx]["lidar_path"]),
431
+ sensor_names=sensor_names,
432
+ )
433
+
434
+ frame = Frame(
435
+ token=scene_dict_list[frame_idx]["token"],
436
+ timestamp=scene_dict_list[frame_idx]["timestamp"],
437
+ roadblock_ids=scene_dict_list[frame_idx]["roadblock_ids"],
438
+ traffic_lights=scene_dict_list[frame_idx]["traffic_lights"],
439
+ annotations=annotations,
440
+ ego_status=global_ego_status,
441
+ lidar=lidar,
442
+ cameras=cameras,
443
+ ego2global=scene_dict_list[frame_idx]['ego2global']
444
+ )
445
+ frames.append(frame)
446
+
447
+ return Scene(scene_metadata=scene_metadata, frames=frames, map_api=map_api)
448
+
449
+
450
+ @dataclass
451
+ class SceneFilter:
452
+ num_history_frames: int = 4
453
+ num_future_frames: int = 10
454
+ has_route: bool = True
455
+
456
+ max_scenes: Optional[int] = None
457
+ log_names: Optional[List[str]] = None
458
+ tokens: Optional[List[str]] = None
459
+
460
+ @property
461
+ def num_frames(self) -> int:
462
+ return self.num_history_frames
463
+
464
+
465
+ @dataclass
466
+ class SensorConfig:
467
+ # Config values of sensors are either
468
+ # - bool: Whether to load history or not
469
+ # - List[int]: For loading specific history steps
470
+
471
+ cam_f0: Union[bool, List[int]]
472
+ cam_l0: Union[bool, List[int]]
473
+ cam_l1: Union[bool, List[int]]
474
+ cam_l2: Union[bool, List[int]]
475
+ cam_r0: Union[bool, List[int]]
476
+ cam_r1: Union[bool, List[int]]
477
+ cam_r2: Union[bool, List[int]]
478
+ cam_b0: Union[bool, List[int]]
479
+ lidar_pc: Union[bool, List[int]]
480
+
481
+ def get_sensors_at_iteration(self, iteration: int) -> List[str]:
482
+
483
+ sensors_at_iteration: List[str] = []
484
+ for sensor_name, sensor_include in asdict(self).items():
485
+ if isinstance(sensor_include, bool) and sensor_include:
486
+ sensors_at_iteration.append(sensor_name)
487
+ elif isinstance(sensor_include, list) and iteration in sensor_include:
488
+ sensors_at_iteration.append(sensor_name)
489
+
490
+ return sensors_at_iteration
491
+
492
+ @classmethod
493
+ def build_all_sensors(cls, include: Union[bool, List[int]] = True) -> SensorConfig:
494
+ return SensorConfig(
495
+ cam_f0=include,
496
+ cam_l0=include,
497
+ cam_l1=include,
498
+ cam_l2=include,
499
+ cam_r0=include,
500
+ cam_r1=include,
501
+ cam_r2=include,
502
+ cam_b0=include,
503
+ lidar_pc=include,
504
+ )
505
+
506
+ @classmethod
507
+ def build_no_sensors(cls) -> SensorConfig:
508
+ return cls.build_all_sensors(include=False)
509
+
510
+
511
+ @dataclass
512
+ class PDMResults:
513
+ no_at_fault_collisions: float
514
+ drivable_area_compliance: float
515
+ driving_direction_compliance: float
516
+
517
+ ego_progress: float
518
+ time_to_collision_within_bound: float
519
+ comfort: float
520
+
521
+ score: float
det_map/data/datasets/dataloader.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import lzma
4
+ import pickle
5
+
6
+ from pathlib import Path
7
+ from typing import Any, Dict, List
8
+ from tqdm import tqdm
9
+
10
+ from navsim.common.dataclasses import AgentInput, Scene, SceneFilter, SensorConfig
11
+ from navsim.planning.metric_caching.metric_cache import MetricCache
12
+
13
+
14
+ def filter_scenes(data_path: Path, scene_filter: SceneFilter) -> Dict[str, List[Dict[str, Any]]]:
15
+
16
+ def split_list(input_list: List[Any], num_frames: int, frame_interval: int) -> List[List[Any]]:
17
+ return [input_list[i : i + num_frames] for i in range(0, len(input_list), frame_interval)]
18
+
19
+ filtered_scenes: Dict[str, Scene] = {}
20
+ stop_loading: bool = False
21
+
22
+ # filter logs
23
+ log_files = list(data_path.iterdir())
24
+ if scene_filter.log_names is not None:
25
+ log_files = [
26
+ log_file
27
+ for log_file in log_files
28
+ if log_file.name.replace(".pkl", "") in scene_filter.log_names
29
+ ]
30
+
31
+ if scene_filter.tokens is not None:
32
+ filter_tokens = True
33
+ tokens = set(scene_filter.tokens)
34
+ else:
35
+ filter_tokens = False
36
+
37
+ for log_pickle_path in tqdm(log_files, desc="Loading logs"):
38
+
39
+ scene_dict_list = pickle.load(open(log_pickle_path, "rb"))
40
+ for frame_list in split_list(
41
+ scene_dict_list, scene_filter.num_frames, scene_filter.frame_interval
42
+ ):
43
+ # Filter scenes which are too short
44
+ if len(frame_list) < scene_filter.num_frames:
45
+ continue
46
+
47
+ # Filter scenes with no route
48
+ if (
49
+ scene_filter.has_route
50
+ and len(frame_list[scene_filter.num_history_frames - 1]["roadblock_ids"]) == 0
51
+ ):
52
+ continue
53
+
54
+ # Filter by token
55
+ token = frame_list[scene_filter.num_history_frames - 1]["token"]
56
+ if filter_tokens and token not in tokens:
57
+ continue
58
+
59
+ filtered_scenes[token] = frame_list
60
+
61
+ if (scene_filter.max_scenes is not None) and (
62
+ len(filtered_scenes) >= scene_filter.max_scenes
63
+ ):
64
+ stop_loading = True
65
+ break
66
+
67
+ if stop_loading:
68
+ break
69
+
70
+ return filtered_scenes
71
+
72
+
73
+ class SceneLoader:
74
+
75
+ def __init__(
76
+ self,
77
+ data_path: Path,
78
+ sensor_blobs_path: Path,
79
+ scene_filter: SceneFilter,
80
+ sensor_config: SensorConfig = SensorConfig.build_no_sensors(),
81
+ ):
82
+
83
+ self.scene_frames_dicts = filter_scenes(data_path, scene_filter)
84
+ self._sensor_blobs_path = sensor_blobs_path
85
+ self._scene_filter = scene_filter
86
+ self._sensor_config = sensor_config
87
+
88
+ @property
89
+ def tokens(self) -> List[str]:
90
+ return list(self.scene_frames_dicts.keys())
91
+
92
+ def __len__(self):
93
+ return len(self.tokens)
94
+
95
+ def __getitem__(self, idx) -> str:
96
+ return self.tokens[idx]
97
+
98
+ def get_scene_from_token(self, token: str) -> Scene:
99
+ assert token in self.tokens
100
+ return Scene.from_scene_dict_list(
101
+ self.scene_frames_dicts[token],
102
+ self._sensor_blobs_path,
103
+ num_history_frames=self._scene_filter.num_history_frames,
104
+ num_future_frames=self._scene_filter.num_future_frames,
105
+ sensor_config=self._sensor_config,
106
+ )
107
+
108
+ def get_agent_input_from_token(self, token: str) -> AgentInput:
109
+ assert token in self.tokens
110
+ return AgentInput.from_scene_dict_list(
111
+ self.scene_frames_dicts[token],
112
+ self._sensor_blobs_path,
113
+ num_history_frames=self._scene_filter.num_history_frames,
114
+ sensor_config=self._sensor_config,
115
+ )
116
+
117
+ def get_tokens_list_per_log(self) -> Dict[str, List[str]]:
118
+ # generate a dict that contains a list of tokens for each log-name
119
+ tokens_per_logs: Dict[str, List[str]] = {}
120
+ for token, scene_dict_list in self.scene_frames_dicts.items():
121
+ log_name = scene_dict_list[0]["log_name"]
122
+ if tokens_per_logs.get(log_name):
123
+ tokens_per_logs[log_name].append(token)
124
+ else:
125
+ tokens_per_logs.update({log_name: [token]})
126
+ return tokens_per_logs
127
+
128
+ class MetricCacheLoader:
129
+
130
+ def __init__(
131
+ self,
132
+ cache_path: Path,
133
+ file_name: str = "metric_cache.pkl",
134
+ ):
135
+
136
+ self._file_name = file_name
137
+ self.metric_cache_paths = self._load_metric_cache_paths(cache_path)
138
+
139
+ def _load_metric_cache_paths(self, cache_path: Path) -> Dict[str, Path]:
140
+ metadata_dir = cache_path / "metadata"
141
+ metadata_file = [file for file in metadata_dir.iterdir() if ".csv" in str(file)][0]
142
+ with open(str(metadata_file), "r") as f:
143
+ cache_paths=f.read().splitlines()[1:]
144
+ metric_cache_dict = {
145
+ cache_path.split("/")[-2]: cache_path
146
+ for cache_path in cache_paths
147
+ }
148
+ return metric_cache_dict
149
+
150
+ @property
151
+ def tokens(self) -> List[str]:
152
+ return list(self.metric_cache_paths.keys())
153
+
154
+ def __len__(self):
155
+ return len(self.metric_cache_paths)
156
+
157
+ def __getitem__(self, idx: int) -> MetricCache:
158
+ return self.get_from_token(self.tokens[idx])
159
+
160
+ def get_from_token(self, token: str) -> MetricCache:
161
+
162
+ with lzma.open(self.metric_cache_paths[token], "rb") as f:
163
+ metric_cache: MetricCache = pickle.load(f)
164
+
165
+ return metric_cache
166
+
167
+ def to_pickle(self, path: Path) -> None:
168
+ full_metric_cache = {}
169
+ for token in tqdm(self.tokens):
170
+ full_metric_cache[token] = self.get_from_token(token)
171
+ with open(path, "wb") as f:
172
+ pickle.dump(full_metric_cache, f)
det_map/data/datasets/dataset.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Tuple
2
+ import torch
3
+
4
+ from det_map.data.datasets.dataloader import SceneLoader
5
+ from navsim.planning.training.abstract_feature_target_builder import AbstractFeatureBuilder, AbstractTargetBuilder
6
+
7
+ class Dataset(torch.utils.data.Dataset):
8
+ def __init__(
9
+ self,
10
+ pipelines, is_train,
11
+ scene_loader: SceneLoader,
12
+ feature_builders: List[AbstractFeatureBuilder],
13
+ target_builders: List[AbstractTargetBuilder]
14
+ ):
15
+ super().__init__()
16
+ self._scene_loader = scene_loader
17
+ self._feature_builders = feature_builders
18
+ self._target_builders = target_builders
19
+ self.pipelines = pipelines
20
+ self.is_train = is_train
21
+
22
+ def __len__(self):
23
+ return len(self._scene_loader)
24
+
25
+ def __getitem__(self, idx: int) -> Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor]]:
26
+ scene = self._scene_loader.get_scene_from_token(self._scene_loader.tokens[idx])
27
+ features: Dict[str, torch.Tensor] = {}
28
+ for builder in self._feature_builders:
29
+ features.update(builder.compute_features(scene.get_agent_input()))
30
+ targets: Dict[str, torch.Tensor] = {}
31
+ for builder in self._target_builders:
32
+ targets.update(builder.compute_targets(scene))
33
+ # aug for four frames respectively
34
+ features, targets = self.pipelines['lidar_aug'](features, targets)
35
+ # project lidar at frame i to image i
36
+ features, targets = self.pipelines['depth'](features, targets)
37
+ # concat all lidar points, remove points too far/close
38
+ features, targets = self.pipelines['lidar_filter'](features, targets)
39
+ # shuffle all lidar points
40
+ features, targets = self.pipelines['point_shuffle'](features, targets)
41
+ return (features, targets)
det_map/data/datasets/dataset_det.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Tuple
2
+ import torch
3
+
4
+ from det_map.data.datasets.dataloader import SceneLoader
5
+ from det_map.data.datasets.dataset import Dataset
6
+ from navsim.planning.training.abstract_feature_target_builder import AbstractFeatureBuilder, AbstractTargetBuilder
7
+
8
+ class DetDataset(Dataset):
9
+ def __init__(
10
+ self, **kwargs
11
+ ):
12
+ super().__init__(**kwargs)
13
+
14
+ def __getitem__(self, idx: int) -> Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor]]:
15
+ scene = self._scene_loader.get_scene_from_token(self._scene_loader.tokens[idx])
16
+ features: Dict[str, torch.Tensor] = {}
17
+ for builder in self._feature_builders:
18
+ features.update(builder.compute_features(scene.get_agent_input()))
19
+ targets: Dict[str, torch.Tensor] = {}
20
+ for builder in self._target_builders:
21
+ targets.update(builder.compute_targets(scene))
22
+ # todo sampler
23
+ features, targets = self.pipelines['lidar_aug'](features, targets)
24
+ features, targets = self.pipelines['depth'](features, targets)
25
+ features, targets = self.pipelines['lidar_filter'](features, targets)
26
+ features, targets = self.pipelines['point_shuffle'](features, targets)
27
+
28
+ return (features, targets)
det_map/data/datasets/feature_builders.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Dict
4
+
5
+ import numpy as np
6
+ import torch
7
+
8
+ from det_map.data.datasets.dataclasses import AgentInput, Camera
9
+ from det_map.data.datasets.lidar_utils import transform_points, render_image
10
+ from navsim.planning.training.abstract_feature_target_builder import AbstractFeatureBuilder
11
+ from mmcv.parallel import DataContainer as DC
12
+
13
+ class LiDARCameraFeatureBuilder(AbstractFeatureBuilder):
14
+ def __init__(self, pipelines):
15
+ super().__init__()
16
+ self.pipelines = pipelines
17
+
18
+ def compute_features(self, agent_input: AgentInput) -> Dict[str, torch.Tensor]:
19
+ img_pipeline = self.pipelines['img']
20
+ timestamps_ori = agent_input.timestamps
21
+ timestamps = [(timestamps_ori[-1] - tmp) / 1e6 for tmp in timestamps_ori]
22
+
23
+ lidars = [np.copy(tmp.lidar_pc) for tmp in agent_input.lidars]
24
+ ego2globals = [tmp for tmp in agent_input.ego2globals]
25
+
26
+ # last frame is the key frame
27
+ global2ego_key = np.linalg.inv(ego2globals[-1])
28
+ # ego2global, global2ego key frame
29
+ lidars_warped = [transform_points(transform_points(pts, mat), global2ego_key)
30
+ for pts, mat in zip(lidars[:-1], ego2globals[:-1])]
31
+ lidars_warped.append(lidars[-1])
32
+ for i, l in enumerate(lidars_warped):
33
+ # x,y,z,intensity,timestamp
34
+ l[4] = timestamps[i]
35
+ lidars_warped[i] = torch.from_numpy(l[:5]).t()
36
+
37
+
38
+ # debug visualize lidar pc
39
+ # for idx, lidar in enumerate(lidars_warped):
40
+ # render_image(lidar, str('warped'+ str(idx)))
41
+ # for idx, lidar in enumerate([tmp.lidar_pc for tmp in agent_input.lidars]):
42
+ # render_image(lidar, str('ori'+ str(idx)))
43
+
44
+ cams_all_frames = [[
45
+ tmp.cam_f0,
46
+ # tmp.cam_l0,
47
+ # tmp.cam_l1,
48
+ # tmp.cam_l2,
49
+ # tmp.cam_r0,
50
+ # tmp.cam_r1,
51
+ # tmp.cam_r2,
52
+ tmp.cam_b0
53
+ ] for tmp in agent_input.cameras]
54
+
55
+ image, canvas, sensor2lidar_rotation, sensor2lidar_translation, intrinsics, distortion, post_rot, post_tran = [], [], [], [], [], [], [], []
56
+ for cams_frame_t in cams_all_frames:
57
+ image_t, canvas_t, sensor2lidar_rotation_t, sensor2lidar_translation_t, intrinsics_t, distortion_t, post_rot_t, post_tran_t = [], [], [], [], [], [], [], []
58
+ for cam in cams_frame_t:
59
+ cam_processed: Camera = img_pipeline(cam)
60
+ image_t.append(cam_processed.image)
61
+ canvas_t.append(cam_processed.canvas)
62
+ sensor2lidar_rotation_t.append(cam_processed.sensor2lidar_rotation)
63
+ sensor2lidar_translation_t.append(cam_processed.sensor2lidar_translation)
64
+ intrinsics_t.append(cam_processed.intrinsics)
65
+ distortion_t.append(cam_processed.distortion)
66
+ post_rot_t.append(cam_processed.post_rot)
67
+ post_tran_t.append(cam_processed.post_tran)
68
+ image.append(torch.stack(image_t))
69
+ canvas.append(torch.stack(canvas_t))
70
+ sensor2lidar_rotation.append(torch.stack(sensor2lidar_rotation_t))
71
+ sensor2lidar_translation.append(torch.stack(sensor2lidar_translation_t))
72
+ intrinsics.append(torch.stack(intrinsics_t))
73
+ distortion.append(torch.stack(distortion_t))
74
+ post_rot.append(torch.stack(post_rot_t))
75
+ post_tran.append(torch.stack(post_tran_t))
76
+
77
+
78
+ # img: T, N_CAM, C, H, W
79
+ # imgs = DC(torch.stack(image), cpu_only=False, stack=True)
80
+ #combine = torch.matmul(sensor2lidar_rotation, torch.inverse(intrinsics))
81
+ #coords = torch.matmul(combine, coords)
82
+ #coords += sensor2lidar_translation
83
+ imgs = torch.stack(image)
84
+ return {
85
+ "image": imgs,
86
+ 'canvas': torch.stack(canvas).to(imgs),
87
+ 'sensor2lidar_rotation': torch.stack(sensor2lidar_rotation).to(imgs),
88
+ 'sensor2lidar_translation': torch.stack(sensor2lidar_translation).to(imgs),
89
+ 'intrinsics': torch.stack(intrinsics).to(imgs),
90
+ 'distortion': torch.stack(distortion).to(imgs),
91
+ 'post_rot': torch.stack(post_rot).to(imgs),
92
+ 'post_tran': torch.stack(post_tran).to(imgs),
93
+ "lidars_warped": lidars_warped
94
+ }
det_map/data/datasets/lidar_utils.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Tuple
4
+
5
+ import numpy as np
6
+ import numpy.typing as npt
7
+ from PIL import Image
8
+ from matplotlib import cm
9
+ from nuplan.database.utils.geometry import view_points
10
+
11
+
12
+ def transform_points(points, transf_matrix: npt.NDArray[np.float64]):
13
+ """
14
+ Applies a homogeneous transform.
15
+ :param transf_matrix: <np.float: 4, 4>. Homogeneous transformation matrix.
16
+ """
17
+ transf_matrix = transf_matrix.astype(np.float32)
18
+ points[:3, :] = transf_matrix[:3, :3] @ points[:3] + transf_matrix[:3, 3].reshape((-1, 1))
19
+ return points
20
+
21
+
22
+ def render_image(
23
+ points, name,
24
+ canvas_size: Tuple[int, int] = (1001, 1001),
25
+ view: npt.NDArray[np.float64] = np.array([[10, 0, 0, 500], [0, 10, 0, 500], [0, 0, 10, 0]]),
26
+ color_dim: int = 2,
27
+ ):
28
+ """
29
+ Renders pointcloud to an array with 3 channels appropriate for viewing as an image. The image is color coded
30
+ according the color_dim dimension of points (typically the height).
31
+ :param canvas_size: (width, height). Size of the canvas on which to render the image.
32
+ :param view: <np.float: n, n>. Defines an arbitrary projection (n <= 4).
33
+ :param color_dim: The dimension of the points to be visualized as color. Default is 2 for height.
34
+ :return: A Image instance.
35
+ """
36
+ # Apply desired transformation to the point cloud. (height is here considered independent of the view).
37
+ heights = points[2, :]
38
+ points = view_points(points[:3, :], view, normalize=False)
39
+ points[2, :] = heights
40
+
41
+ # Remove points that fall outside the canvas.
42
+ mask = np.ones(points.shape[1], dtype=bool) # type: ignore
43
+ mask = np.logical_and(mask, points[0, :] < canvas_size[0] - 1)
44
+ mask = np.logical_and(mask, points[0, :] > 0)
45
+ mask = np.logical_and(mask, points[1, :] < canvas_size[1] - 1)
46
+ mask = np.logical_and(mask, points[1, :] > 0)
47
+ points = points[:, mask]
48
+
49
+ # Scale color_values to be between 0 and 255.
50
+ color_values = points[color_dim, :]
51
+ color_values = 255.0 * (color_values - np.amin(color_values)) / (np.amax(color_values) - np.amin(color_values))
52
+
53
+ # Rounds to ints and generate colors that will be used in the image.
54
+ points = np.int16(np.round(points[:2, :]))
55
+ color_values = np.int16(np.round(color_values))
56
+ cmap = [cm.jet(i / 255, bytes=True)[:3] for i in range(256)]
57
+
58
+ # Populate canvas, use maximum color_value for each bin
59
+ render = np.tile(np.expand_dims(np.zeros(canvas_size, dtype=np.uint8), axis=2), [1, 1, 3]) # type: ignore
60
+ color_value_array: npt.NDArray[np.float64] = -1 * np.ones(canvas_size, dtype=float) # type: ignore
61
+ for (col, row), color_value in zip(points.T, color_values.T):
62
+ if color_value > color_value_array[row, col]:
63
+ color_value_array[row, col] = color_value
64
+ render[row, col] = cmap[color_value]
65
+
66
+ Image.fromarray(render).save(f'/mnt/f/e2e/navsim_ours/debug/{name}.png')
det_map/data/pipelines/__init__.py ADDED
File without changes
det_map/data/pipelines/color_utils.py ADDED
@@ -0,0 +1,357 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from typing import Callable, Union
3
+
4
+ import cv2
5
+ import numpy as np
6
+ import torch
7
+
8
+ def imnormalize_(img, mean, std, to_rgb=True):
9
+ """Inplace normalize an image with mean and std.
10
+
11
+ Args:
12
+ img (ndarray): Image to be normalized.
13
+ mean (ndarray): The mean to be used for normalize.
14
+ std (ndarray): The std to be used for normalize.
15
+ to_rgb (bool): Whether to convert to rgb.
16
+
17
+ Returns:
18
+ ndarray: The normalized image.
19
+ """
20
+ # cv2 inplace normalization does not accept uint8
21
+ assert img.dtype != np.uint8
22
+ mean = np.float64(mean.reshape(1, -1))
23
+ stdinv = 1 / np.float64(std.reshape(1, -1))
24
+ if to_rgb:
25
+ cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace
26
+ cv2.subtract(img, mean, img) # inplace
27
+ cv2.multiply(img, stdinv, img) # inplace
28
+ return img
29
+
30
+
31
+ def imnormalize(img, mean, std, to_rgb=True):
32
+ """Normalize an image with mean and std.
33
+
34
+ Args:
35
+ img (ndarray): Image to be normalized.
36
+ mean (ndarray): The mean to be used for normalize.
37
+ std (ndarray): The std to be used for normalize.
38
+ to_rgb (bool): Whether to convert to rgb.
39
+
40
+ Returns:
41
+ ndarray: The normalized image.
42
+ """
43
+ img = img.copy().astype(np.float32)
44
+ return imnormalize_(img, mean, std, to_rgb)
45
+
46
+
47
+ def mmlabNormalize(img):
48
+ mean = np.array([123.675, 116.28, 103.53], dtype=np.float32)
49
+ std = np.array([58.395, 57.12, 57.375], dtype=np.float32)
50
+ to_rgb = True
51
+ img = imnormalize(np.array(img), mean, std, to_rgb)
52
+ img = torch.tensor(img).float().permute(2, 0, 1).contiguous()
53
+ return img
54
+
55
+
56
+ def imconvert(img: np.ndarray, src: str, dst: str) -> np.ndarray:
57
+ """Convert an image from the src colorspace to dst colorspace.
58
+
59
+ Args:
60
+ img (ndarray): The input image.
61
+ src (str): The source colorspace, e.g., 'rgb', 'hsv'.
62
+ dst (str): The destination colorspace, e.g., 'rgb', 'hsv'.
63
+
64
+ Returns:
65
+ ndarray: The converted image.
66
+ """
67
+ code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}')
68
+ out_img = cv2.cvtColor(img, code)
69
+ return out_img
70
+
71
+
72
+ def bgr2gray(img: np.ndarray, keepdim: bool = False) -> np.ndarray:
73
+ """Convert a BGR image to grayscale image.
74
+
75
+ Args:
76
+ img (ndarray): The input image.
77
+ keepdim (bool): If False (by default), then return the grayscale image
78
+ with 2 dims, otherwise 3 dims.
79
+
80
+ Returns:
81
+ ndarray: The converted grayscale image.
82
+ """
83
+ out_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
84
+ if keepdim:
85
+ out_img = out_img[..., None]
86
+ return out_img
87
+
88
+
89
+ def rgb2gray(img: np.ndarray, keepdim: bool = False) -> np.ndarray:
90
+ """Convert a RGB image to grayscale image.
91
+
92
+ Args:
93
+ img (ndarray): The input image.
94
+ keepdim (bool): If False (by default), then return the grayscale image
95
+ with 2 dims, otherwise 3 dims.
96
+
97
+ Returns:
98
+ ndarray: The converted grayscale image.
99
+ """
100
+ out_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
101
+ if keepdim:
102
+ out_img = out_img[..., None]
103
+ return out_img
104
+
105
+
106
+ def gray2bgr(img: np.ndarray) -> np.ndarray:
107
+ """Convert a grayscale image to BGR image.
108
+
109
+ Args:
110
+ img (ndarray): The input image.
111
+
112
+ Returns:
113
+ ndarray: The converted BGR image.
114
+ """
115
+ img = img[..., None] if img.ndim == 2 else img
116
+ out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
117
+ return out_img
118
+
119
+
120
+ def gray2rgb(img: np.ndarray) -> np.ndarray:
121
+ """Convert a grayscale image to RGB image.
122
+
123
+ Args:
124
+ img (ndarray): The input image.
125
+
126
+ Returns:
127
+ ndarray: The converted RGB image.
128
+ """
129
+ img = img[..., None] if img.ndim == 2 else img
130
+ out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
131
+ return out_img
132
+
133
+
134
+ def _convert_input_type_range(img: np.ndarray) -> np.ndarray:
135
+ """Convert the type and range of the input image.
136
+
137
+ It converts the input image to np.float32 type and range of [0, 1].
138
+ It is mainly used for pre-processing the input image in colorspace
139
+ conversion functions such as rgb2ycbcr and ycbcr2rgb.
140
+
141
+ Args:
142
+ img (ndarray): The input image. It accepts:
143
+ 1. np.uint8 type with range [0, 255];
144
+ 2. np.float32 type with range [0, 1].
145
+
146
+ Returns:
147
+ (ndarray): The converted image with type of np.float32 and range of
148
+ [0, 1].
149
+ """
150
+ img_type = img.dtype
151
+ img = img.astype(np.float32)
152
+ if img_type == np.float32:
153
+ pass
154
+ elif img_type == np.uint8:
155
+ img /= 255.
156
+ else:
157
+ raise TypeError('The img type should be np.float32 or np.uint8, '
158
+ f'but got {img_type}')
159
+ return img
160
+
161
+
162
+ def _convert_output_type_range(
163
+ img: np.ndarray, dst_type: Union[np.uint8, np.float32]) -> np.ndarray:
164
+ """Convert the type and range of the image according to dst_type.
165
+
166
+ It converts the image to desired type and range. If `dst_type` is np.uint8,
167
+ images will be converted to np.uint8 type with range [0, 255]. If
168
+ `dst_type` is np.float32, it converts the image to np.float32 type with
169
+ range [0, 1].
170
+ It is mainly used for post-processing images in colorspace conversion
171
+ functions such as rgb2ycbcr and ycbcr2rgb.
172
+
173
+ Args:
174
+ img (ndarray): The image to be converted with np.float32 type and
175
+ range [0, 255].
176
+ dst_type (np.uint8 | np.float32): If dst_type is np.uint8, it
177
+ converts the image to np.uint8 type with range [0, 255]. If
178
+ dst_type is np.float32, it converts the image to np.float32 type
179
+ with range [0, 1].
180
+
181
+ Returns:
182
+ (ndarray): The converted image with desired type and range.
183
+ """
184
+ if dst_type not in (np.uint8, np.float32):
185
+ raise TypeError('The dst_type should be np.float32 or np.uint8, '
186
+ f'but got {dst_type}')
187
+ if dst_type == np.uint8:
188
+ img = img.round()
189
+ else:
190
+ img /= 255.
191
+ return img.astype(dst_type)
192
+
193
+
194
+ def rgb2ycbcr(img: np.ndarray, y_only: bool = False) -> np.ndarray:
195
+ """Convert a RGB image to YCbCr image.
196
+
197
+ This function produces the same results as Matlab's `rgb2ycbcr` function.
198
+ It implements the ITU-R BT.601 conversion for standard-definition
199
+ television. See more details in
200
+ https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
201
+
202
+ It differs from a similar function in cv2.cvtColor: `RGB <-> YCrCb`.
203
+ In OpenCV, it implements a JPEG conversion. See more details in
204
+ https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
205
+
206
+ Args:
207
+ img (ndarray): The input image. It accepts:
208
+ 1. np.uint8 type with range [0, 255];
209
+ 2. np.float32 type with range [0, 1].
210
+ y_only (bool): Whether to only return Y channel. Default: False.
211
+
212
+ Returns:
213
+ ndarray: The converted YCbCr image. The output image has the same type
214
+ and range as input image.
215
+ """
216
+ img_type = img.dtype
217
+ img = _convert_input_type_range(img)
218
+ if y_only:
219
+ out_img = np.dot(img, [65.481, 128.553, 24.966]) + 16.0
220
+ else:
221
+ out_img = np.matmul(
222
+ img, [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786],
223
+ [24.966, 112.0, -18.214]]) + [16, 128, 128]
224
+ out_img = _convert_output_type_range(out_img, img_type)
225
+ return out_img
226
+
227
+
228
+ def bgr2ycbcr(img: np.ndarray, y_only: bool = False) -> np.ndarray:
229
+ """Convert a BGR image to YCbCr image.
230
+
231
+ The bgr version of rgb2ycbcr.
232
+ It implements the ITU-R BT.601 conversion for standard-definition
233
+ television. See more details in
234
+ https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
235
+
236
+ It differs from a similar function in cv2.cvtColor: `BGR <-> YCrCb`.
237
+ In OpenCV, it implements a JPEG conversion. See more details in
238
+ https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
239
+
240
+ Args:
241
+ img (ndarray): The input image. It accepts:
242
+ 1. np.uint8 type with range [0, 255];
243
+ 2. np.float32 type with range [0, 1].
244
+ y_only (bool): Whether to only return Y channel. Default: False.
245
+
246
+ Returns:
247
+ ndarray: The converted YCbCr image. The output image has the same type
248
+ and range as input image.
249
+ """
250
+ img_type = img.dtype
251
+ img = _convert_input_type_range(img)
252
+ if y_only:
253
+ out_img = np.dot(img, [24.966, 128.553, 65.481]) + 16.0
254
+ else:
255
+ out_img = np.matmul(
256
+ img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786],
257
+ [65.481, -37.797, 112.0]]) + [16, 128, 128]
258
+ out_img = _convert_output_type_range(out_img, img_type)
259
+ return out_img
260
+
261
+
262
+ def ycbcr2rgb(img: np.ndarray) -> np.ndarray:
263
+ """Convert a YCbCr image to RGB image.
264
+
265
+ This function produces the same results as Matlab's ycbcr2rgb function.
266
+ It implements the ITU-R BT.601 conversion for standard-definition
267
+ television. See more details in
268
+ https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
269
+
270
+ It differs from a similar function in cv2.cvtColor: `YCrCb <-> RGB`.
271
+ In OpenCV, it implements a JPEG conversion. See more details in
272
+ https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
273
+
274
+ Args:
275
+ img (ndarray): The input image. It accepts:
276
+ 1. np.uint8 type with range [0, 255];
277
+ 2. np.float32 type with range [0, 1].
278
+
279
+ Returns:
280
+ ndarray: The converted RGB image. The output image has the same type
281
+ and range as input image.
282
+ """
283
+ img_type = img.dtype
284
+ img = _convert_input_type_range(img) * 255
285
+ out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621],
286
+ [0, -0.00153632, 0.00791071],
287
+ [0.00625893, -0.00318811, 0]]) * 255.0 + [
288
+ -222.921, 135.576, -276.836
289
+ ]
290
+ out_img = _convert_output_type_range(out_img, img_type)
291
+ return out_img
292
+
293
+
294
+ def ycbcr2bgr(img: np.ndarray) -> np.ndarray:
295
+ """Convert a YCbCr image to BGR image.
296
+
297
+ The bgr version of ycbcr2rgb.
298
+ It implements the ITU-R BT.601 conversion for standard-definition
299
+ television. See more details in
300
+ https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
301
+
302
+ It differs from a similar function in cv2.cvtColor: `YCrCb <-> BGR`.
303
+ In OpenCV, it implements a JPEG conversion. See more details in
304
+ https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
305
+
306
+ Args:
307
+ img (ndarray): The input image. It accepts:
308
+ 1. np.uint8 type with range [0, 255];
309
+ 2. np.float32 type with range [0, 1].
310
+
311
+ Returns:
312
+ ndarray: The converted BGR image. The output image has the same type
313
+ and range as input image.
314
+ """
315
+ img_type = img.dtype
316
+ img = _convert_input_type_range(img) * 255
317
+ out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621],
318
+ [0.00791071, -0.00153632, 0],
319
+ [0, -0.00318811, 0.00625893]]) * 255.0 + [
320
+ -276.836, 135.576, -222.921
321
+ ]
322
+ out_img = _convert_output_type_range(out_img, img_type)
323
+ return out_img
324
+
325
+
326
+ def convert_color_factory(src: str, dst: str) -> Callable:
327
+
328
+ code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}')
329
+
330
+ def convert_color(img: np.ndarray) -> np.ndarray:
331
+ out_img = cv2.cvtColor(img, code)
332
+ return out_img
333
+
334
+ convert_color.__doc__ = f"""Convert a {src.upper()} image to {dst.upper()}
335
+ image.
336
+
337
+ Args:
338
+ img (ndarray or str): The input image.
339
+
340
+ Returns:
341
+ ndarray: The converted {dst.upper()} image.
342
+ """
343
+
344
+ return convert_color
345
+
346
+
347
+ bgr2rgb = convert_color_factory('bgr', 'rgb')
348
+
349
+ rgb2bgr = convert_color_factory('rgb', 'bgr')
350
+
351
+ bgr2hsv = convert_color_factory('bgr', 'hsv')
352
+
353
+ hsv2bgr = convert_color_factory('hsv', 'bgr')
354
+
355
+ bgr2hls = convert_color_factory('bgr', 'hls')
356
+
357
+ hls2bgr = convert_color_factory('hls', 'bgr')
det_map/data/pipelines/filter_lidar.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from typing import Tuple
3
+
4
+ import torch
5
+
6
+
7
+ class LiDARFilter(object):
8
+ def __init__(self,
9
+ close_radius=1.0,
10
+ x_range='(-50.0, 50.0)',
11
+ y_range='(-50.0, 50.0)',
12
+ z_range='(-5, 20)',
13
+ ):
14
+ self.radius = close_radius
15
+ self.x_range = eval(x_range)
16
+ self.y_range = eval(y_range)
17
+ self.z_range = eval(z_range)
18
+
19
+ def _remove_close(self, points, radius=1.0):
20
+ """Removes point too close within a certain radius from origin.
21
+
22
+ Args:
23
+ points (np.ndarray | :obj:`BasePoints`): Sweep points.
24
+ radius (float, optional): Radius below which points are removed.
25
+ Defaults to 1.0.
26
+
27
+ Returns:
28
+ np.ndarray: Points after removing.
29
+ """
30
+ x_filt = torch.abs(points[:, 0]) < radius
31
+ y_filt = torch.abs(points[:, 1]) < radius
32
+ not_close = torch.logical_not(torch.logical_and(x_filt, y_filt))
33
+ return points[not_close]
34
+
35
+ def range_filter(
36
+ self,
37
+ points,
38
+ xrange: Tuple[float, float] = (-np.inf, np.inf),
39
+ yrange: Tuple[float, float] = (-np.inf, np.inf),
40
+ zrange: Tuple[float, float] = (-np.inf, np.inf),
41
+ ) -> None:
42
+ """
43
+ Restricts points to specified ranges.
44
+ :param xrange: (xmin, xmax).
45
+ :param yrange: (ymin, ymax).
46
+ :param zrange: (zmin, zmax).
47
+ """
48
+ # Figure out which points to keep.
49
+ keep_x = torch.logical_and(xrange[0] <= points[:, 0], points[:, 0] <= xrange[1])
50
+ keep_y = torch.logical_and(yrange[0] <= points[:, 1], points[:, 1] <= yrange[1])
51
+ keep_z = torch.logical_and(zrange[0] <= points[:, 2], points[:, 2] <= zrange[1])
52
+ keep = torch.logical_and(keep_x, torch.logical_and(keep_y, keep_z))
53
+ return points[keep]
54
+
55
+
56
+ def __call__(self, features, targets):
57
+ """Call function to load multi-sweep point clouds from files.
58
+
59
+ Args:
60
+ results (dict): Result dict containing multi-sweep point cloud
61
+ filenames.
62
+
63
+ Returns:
64
+ dict: The result dict containing the multi-sweep points data.
65
+ Added key and value are described below.
66
+
67
+ - points (np.ndarray | :obj:`BasePoints`): Multi-sweep point
68
+ cloud arrays.
69
+ """
70
+ points = torch.cat(features['lidars_warped'], 0)
71
+ points = self._remove_close(points, self.radius)
72
+ points = self.range_filter(points, self.x_range, self.y_range, self.z_range)
73
+ features['lidar'] = points
74
+ return features, targets
det_map/data/pipelines/lidar_aug.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ from nuplan.common.actor_state.tracked_objects_types import (
4
+ TrackedObjectType,
5
+ )
6
+
7
+ OBJECT_TYPE_DICT = {
8
+ "vehicle": TrackedObjectType.VEHICLE,
9
+ "pedestrian": TrackedObjectType.PEDESTRIAN,
10
+ "bicycle": TrackedObjectType.BICYCLE,
11
+ "traffic_cone": TrackedObjectType.TRAFFIC_CONE,
12
+ "barrier": TrackedObjectType.BARRIER,
13
+ "czone_sign": TrackedObjectType.CZONE_SIGN,
14
+ "generic_object": TrackedObjectType.GENERIC_OBJECT,
15
+ }
16
+
17
+
18
+ def limit_period(val, offset=0.5, period=2 * np.pi):
19
+ """Limit the value into a period for periodic function.
20
+
21
+ Args:
22
+ val (torch.Tensor | np.ndarray): The value to be converted.
23
+ offset (float, optional): Offset to set the value range.
24
+ Defaults to 0.5.
25
+ period ([type], optional): Period of the value. Defaults to np.pi.
26
+
27
+ Returns:
28
+ (torch.Tensor | np.ndarray): Value in the range of
29
+ [-offset * period, (1-offset) * period]
30
+ """
31
+ limited_val = val - torch.floor(val / period + offset) * period
32
+ return limited_val
33
+
34
+
35
+ class LiDARAug(object):
36
+ def __init__(self,
37
+ bda_aug_conf, is_train,
38
+ x_range='(-50.0, 50.0)',
39
+ y_range='(-50.0, 50.0)',
40
+ z_range='(-5, 20)',
41
+ ):
42
+ for k in ['rot_lim', 'scale_lim', 'tran_lim']:
43
+ bda_aug_conf[k] = eval(bda_aug_conf[k])
44
+ self.bda_aug_conf = bda_aug_conf
45
+ self.is_train = False
46
+ self.x_range = eval(x_range)
47
+ self.y_range = eval(y_range)
48
+ self.z_range = eval(z_range)
49
+
50
+ def sample_bda_augmentation(self):
51
+ """Generate bda augmentation values based on bda_config."""
52
+ if self.is_train:
53
+ rotate_bda = np.random.uniform(*self.bda_aug_conf['rot_lim'])
54
+ scale_bda = np.random.uniform(*self.bda_aug_conf['scale_lim'])
55
+ flip_dx = np.random.uniform() < self.bda_aug_conf['flip_dx_ratio']
56
+ flip_dy = np.random.uniform() < self.bda_aug_conf['flip_dy_ratio']
57
+ translation_std = self.bda_aug_conf.get('tran_lim', [0.0, 0.0, 0.0])
58
+ tran_bda = np.random.normal(scale=translation_std, size=3).T
59
+ else:
60
+ rotate_bda = 0
61
+ scale_bda = 1.0
62
+ flip_dx = False
63
+ flip_dy = False
64
+ tran_bda = np.zeros((1, 3), dtype=np.float32)
65
+ return rotate_bda, scale_bda, flip_dx, flip_dy, tran_bda
66
+
67
+ def bev_transform(self, gt_boxes, rotate_angle, scale_ratio, flip_dx,
68
+ flip_dy, tran_bda, rot_mat):
69
+ if gt_boxes.shape[0] > 0:
70
+ gt_boxes[:, :3] = (
71
+ rot_mat @ gt_boxes[:, :3].unsqueeze(-1)).squeeze(-1)
72
+ gt_boxes[:, 3:6] *= scale_ratio
73
+ gt_boxes[:, 6] += rotate_angle
74
+ if flip_dx:
75
+ gt_boxes[:,
76
+ 6] = 2 * torch.asin(torch.tensor(1.0)) - gt_boxes[:,
77
+ 6]
78
+ if flip_dy:
79
+ gt_boxes[:, 6] = -gt_boxes[:, 6]
80
+ gt_boxes[:, 7:] = (
81
+ rot_mat[:2, :2] @ gt_boxes[:, 7:].unsqueeze(-1)).squeeze(-1)
82
+ gt_boxes[:, :3] = gt_boxes[:, :3] + tran_bda
83
+ return gt_boxes
84
+
85
+ def __call__(self, features, targets):
86
+ # 1. filter box based on ranges
87
+ # 2. filter label based on classes
88
+ if 'dets' in targets and 'labels' in targets:
89
+ boxes = targets['dets']
90
+ labels = targets['labels']
91
+
92
+ for t, (box, label) in enumerate(zip(boxes, labels)):
93
+ label_mask = np.array([n in OBJECT_TYPE_DICT for n in label], dtype=np.bool_)
94
+ label_mask = torch.from_numpy(label_mask)
95
+ range_mask = ((box[:, 0] > self.x_range[0]) &
96
+ (box[:, 0] < self.x_range[1]) &
97
+ (box[:, 1] > self.y_range[0]) &
98
+ (box[:, 1] < self.y_range[1]))
99
+ mask = range_mask & label_mask
100
+ box_of_interest = box[mask]
101
+ box_of_interest[:, 6] = limit_period(box_of_interest[:, 6])
102
+ boxes[t] = box_of_interest.float()
103
+
104
+ labels[t] = torch.from_numpy(np.array([OBJECT_TYPE_DICT[x].value for
105
+ x in label], dtype=np.int64))[mask]
106
+ targets['dets'] = boxes
107
+ targets['labels'] = labels
108
+
109
+ rotate_bda, scale_bda, flip_dx, flip_dy, tran_bda = \
110
+ self.sample_bda_augmentation()
111
+ bda_mat = torch.zeros(4, 4)
112
+ bda_mat[3, 3] = 1
113
+ rotate_angle = torch.tensor(rotate_bda / 180 * np.pi)
114
+ rot_sin = torch.sin(rotate_angle)
115
+ rot_cos = torch.cos(rotate_angle)
116
+ rot_mat = torch.Tensor([[rot_cos, -rot_sin, 0], [rot_sin, rot_cos, 0],
117
+ [0, 0, 1]])
118
+ scale_mat = torch.Tensor([[scale_bda, 0, 0], [0, scale_bda, 0],
119
+ [0, 0, scale_bda]])
120
+ flip_mat = torch.Tensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
121
+ if flip_dx:
122
+ flip_mat = flip_mat @ torch.Tensor([[-1, 0, 0], [0, 1, 0],
123
+ [0, 0, 1]])
124
+ if flip_dy:
125
+ flip_mat = flip_mat @ torch.Tensor([[1, 0, 0], [0, -1, 0],
126
+ [0, 0, 1]])
127
+ bda_rot = flip_mat @ (scale_mat @ rot_mat)
128
+
129
+ if 'dets' in targets:
130
+ for idx, boxes in enumerate(targets['dets']):
131
+ targets['dets'][idx] = self.bev_transform(boxes, rotate_bda, scale_bda,
132
+ flip_dx, flip_dy, tran_bda, bda_rot)
133
+ # print('before bda')
134
+ # print(features['lidars_warped'][-1][:, 0].max())
135
+ # print(features['lidars_warped'][-1][:, 0].min())
136
+ # print(features['lidars_warped'][-1][:, 1].max())
137
+ # print(features['lidars_warped'][-1][:, 1].min())
138
+ for idx, points in enumerate(features['lidars_warped']):
139
+ points_aug = (bda_rot @ points[:, :3].unsqueeze(-1)).squeeze(-1)
140
+ points[:, :3] = points_aug + tran_bda
141
+ features['lidars_warped'][idx] = points
142
+
143
+ # print('after bda')
144
+ # print(features['lidars_warped'][-1][:, 0].max())
145
+ # print(features['lidars_warped'][-1][:, 0].min())
146
+ # print(features['lidars_warped'][-1][:, 1].max())
147
+ # print(features['lidars_warped'][-1][:, 1].min())
148
+ bda_mat[:3, :3] = bda_rot
149
+ bda_mat[:3, 3] = torch.from_numpy(tran_bda)
150
+ features['bda'] = bda_mat
151
+ return features, targets
det_map/data/pipelines/point_shuffle.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from typing import Tuple
3
+
4
+ import torch
5
+
6
+
7
+ class PointShuffle(object):
8
+ def __init__(self, is_train):
9
+ self.is_train = is_train
10
+
11
+ def __call__(self, features, targets):
12
+ if self.is_train:
13
+ points = features['lidar']
14
+ cnt = points.shape[0]
15
+ idx = torch.randperm(cnt, device=points.device)
16
+ features['lidar'] = points[idx]
17
+ return features, targets
det_map/data/pipelines/prepare_depth.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ import PIL.Image as Image
4
+
5
+ class LiDAR2Depth(object):
6
+
7
+ def __init__(self,
8
+ grid_config,
9
+ ):
10
+ self.x = eval(grid_config['x'])
11
+ self.y = eval(grid_config['y'])
12
+ self.z = eval(grid_config['z'])
13
+ self.depth = eval(grid_config['depth'])
14
+
15
+ def points2depthmap(self, points, height, width):
16
+ height, width = height, width
17
+ depth_map = torch.zeros((height, width), dtype=torch.float32)
18
+ coor = torch.round(points[:, :2])
19
+ depth = points[:, 2]
20
+ kept1 = (coor[:, 0] >= 0) & (coor[:, 0] < width) & (
21
+ coor[:, 1] >= 0) & (coor[:, 1] < height) & (
22
+ depth < self.depth[1]) & (
23
+ depth >= self.depth[0])
24
+ coor, depth = coor[kept1], depth[kept1]
25
+ ranks = coor[:, 0] + coor[:, 1] * width
26
+ sort = (ranks + depth / 100.).argsort()
27
+ coor, depth, ranks = coor[sort], depth[sort], ranks[sort]
28
+
29
+ kept2 = torch.ones(coor.shape[0], device=coor.device, dtype=torch.bool)
30
+ kept2[1:] = (ranks[1:] != ranks[:-1])
31
+ coor, depth = coor[kept2], depth[kept2]
32
+ coor = coor.to(torch.long)
33
+ depth_map[coor[:, 1], coor[:, 0]] = depth
34
+ return depth_map
35
+
36
+ def __call__(self, features, targets):
37
+ # points, img, sensor2lidar_rotation, sensor2lidar_translation, intrinsics,
38
+ # post_rot, post_tran
39
+ # List: length=frames
40
+ lidar_all_frames = features['lidars_warped']
41
+ # image: T, N_CAMS, C, H, W
42
+ T, N, _, H, W = features['image'].shape
43
+ rots, trans, intrinsics = (features['sensor2lidar_rotation'],
44
+ features['sensor2lidar_translation'],
45
+ features['intrinsics'])
46
+ post_rot, post_tran, bda = (features['post_rot'],
47
+ features['post_tran'], features['bda'])
48
+
49
+ t = -1
50
+ depth_t = []
51
+ lidar_t = lidar_all_frames[t][:, :3]
52
+ lidar_t = lidar_t - bda[:3, 3].view(1, 3)
53
+ lidar_t = lidar_t.matmul(torch.inverse(bda[:3, :3]).T)
54
+
55
+ # print('cancel bda')
56
+ # print(lidar_t[:, 0].max())
57
+ # print(lidar_t[:, 0].min())
58
+ # print(lidar_t[:, 1].max())
59
+ # print(lidar_t[:, 1].min())
60
+
61
+ for n in range(N):
62
+ points_img = lidar_t - trans[t, n:n + 1, :]
63
+ lidar2cam_rot = torch.inverse(rots[t, n])
64
+ # lidar2cam, cam2img
65
+ points_img = points_img.matmul(lidar2cam_rot.T).matmul(intrinsics[t, n].T)
66
+ points_img = torch.cat(
67
+ [points_img[:, :2] / points_img[:, 2:3], points_img[:, 2:3]],
68
+ 1)
69
+ points_img = points_img.matmul(
70
+ post_rot[t, n].T) + post_tran[t, n:n + 1, :]
71
+ depth_curr = self.points2depthmap(points_img, features['canvas'][-1, n].shape[0], features['canvas'][-1, n].shape[1])
72
+ depth_t.append(depth_curr)
73
+ # Image.fromarray((1- depth_curr.clamp(0,1)).cpu().numpy() * 255).convert('L').save(f'/mnt/f/e2e/navsim_ours/debug/depth{n}.png')
74
+ # Image.fromarray(features['canvas'][-1, n].cpu().numpy().astype(np.uint8)).convert('RGB').save(f'/mnt/f/e2e/navsim_ours/debug/canvas{n}.png')
75
+ features['gt_depth'] = torch.stack(depth_t)
76
+ return features, targets
det_map/data/pipelines/prepare_img.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import torch
4
+ from PIL import Image
5
+
6
+ from det_map.data.datasets.dataclasses import Camera
7
+ from det_map.data.pipelines.color_utils import bgr2hsv, hsv2bgr, mmlabNormalize
8
+
9
+
10
+ class PrepareImageInputs(object):
11
+ """Load multi channel images from a list of separate channel files.
12
+
13
+ Expects results['img_filename'] to be a list of filenames.
14
+
15
+ Args:
16
+ to_float32 (bool): Whether to convert the img to float32.
17
+ Defaults to False.
18
+ color_type (str): Color type of the file. Defaults to 'unchanged'.
19
+ """
20
+
21
+ def __init__(
22
+ self,
23
+ data_config,
24
+ is_train=False,
25
+ opencv_pp=False,
26
+ ):
27
+ self.is_train = is_train
28
+ self.data_config = data_config
29
+ self.normalize_img = mmlabNormalize
30
+ self.opencv_pp = opencv_pp
31
+
32
+ def get_rot(self, h):
33
+ return torch.Tensor([
34
+ [np.cos(h), np.sin(h)],
35
+ [-np.sin(h), np.cos(h)],
36
+ ])
37
+
38
+ def img_transform(self, img, post_rot, post_tran, resize, resize_dims,
39
+ crop, flip, rotate):
40
+ # adjust image
41
+ if not self.opencv_pp:
42
+ img = self.img_transform_core(img, resize_dims, crop, flip, rotate)
43
+
44
+ # post-homography transformation
45
+ post_rot *= resize
46
+ post_tran -= torch.Tensor(crop[:2])
47
+ if flip:
48
+ A = torch.Tensor([[-1, 0], [0, 1]])
49
+ b = torch.Tensor([crop[2] - crop[0], 0])
50
+ post_rot = A.matmul(post_rot)
51
+ post_tran = A.matmul(post_tran) + b
52
+ A = self.get_rot(rotate / 180 * np.pi)
53
+ b = torch.Tensor([crop[2] - crop[0], crop[3] - crop[1]]) / 2
54
+ b = A.matmul(-b) + b
55
+ post_rot = A.matmul(post_rot)
56
+ post_tran = A.matmul(post_tran) + b
57
+ if self.opencv_pp:
58
+ img = self.img_transform_core_opencv(img, post_rot, post_tran, crop)
59
+ return img, post_rot, post_tran
60
+
61
+ def img_transform_core_opencv(self, img, post_rot, post_tran,
62
+ crop):
63
+ img = np.array(img).astype(np.float32)
64
+ img = cv2.warpAffine(img,
65
+ np.concatenate([post_rot,
66
+ post_tran.reshape(2, 1)],
67
+ axis=1),
68
+ (crop[2] - crop[0], crop[3] - crop[1]),
69
+ flags=cv2.INTER_LINEAR)
70
+ return img
71
+
72
+ def img_transform_core(self, img, resize_dims, crop, flip, rotate):
73
+ # adjust image
74
+ img = img.resize(resize_dims)
75
+ img = img.crop(crop)
76
+ if flip:
77
+ img = img.transpose(method=Image.FLIP_LEFT_RIGHT)
78
+ img = img.rotate(rotate)
79
+ return img
80
+
81
+ def sample_augmentation(self, H, W, flip=None, scale=None):
82
+ fH, fW = eval(self.data_config['input_size'])
83
+ if self.is_train:
84
+ resize = float(fW) / float(W)
85
+ resize += np.random.uniform(*eval(self.data_config['resize']))
86
+ resize_dims = (int(W * resize), int(H * resize))
87
+ newW, newH = resize_dims
88
+ random_crop_height = \
89
+ self.data_config.get('random_crop_height', False)
90
+ if random_crop_height:
91
+ crop_h = int(np.random.uniform(max(0.3 * newH, newH - fH),
92
+ newH - fH))
93
+ else:
94
+ crop_h = \
95
+ int((1 - np.random.uniform(*eval(self.data_config['crop_h']))) *
96
+ newH) - fH
97
+ crop_w = int(np.random.uniform(0, max(0, newW - fW)))
98
+ crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)
99
+ flip = self.data_config['flip'] and np.random.choice([0, 1])
100
+ rotate = np.random.uniform(*eval(self.data_config['rot']))
101
+ if self.data_config.get('vflip', False) and np.random.choice([0, 1]):
102
+ rotate += 180
103
+ else:
104
+ resize = float(fW) / float(W)
105
+ if scale is not None:
106
+ resize += scale
107
+ else:
108
+ resize += self.data_config.get('resize_test', 0.0)
109
+ resize_dims = (int(W * resize), int(H * resize))
110
+ newW, newH = resize_dims
111
+ crop_h = int((1 - np.mean(eval(self.data_config['crop_h']))) * newH) - fH
112
+ crop_w = int(max(0, newW - fW) / 2)
113
+ crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)
114
+ flip = False if flip is None else flip
115
+ rotate = 0
116
+ return resize, resize_dims, crop, flip, rotate
117
+
118
+ def photo_metric_distortion(self, img, pmd):
119
+ """Call function to perform photometric distortion on images.
120
+ Args:
121
+ results (dict): Result dict from loading pipeline.
122
+ Returns:
123
+ dict: Result dict with images distorted.
124
+ """
125
+ if np.random.rand() > pmd.get('rate', 1.0):
126
+ return img
127
+
128
+ img = np.array(img).astype(np.float32)
129
+ assert img.dtype == np.float32, \
130
+ 'PhotoMetricDistortion needs the input image of dtype np.float32,' \
131
+ ' please set "to_float32=True" in "LoadImageFromFile" pipeline'
132
+ # random brightness
133
+ if np.random.randint(2):
134
+ delta = np.random.uniform(-pmd['brightness_delta'],
135
+ pmd['brightness_delta'])
136
+ img += delta
137
+
138
+ # mode == 0 --> do random contrast first
139
+ # mode == 1 --> do random contrast last
140
+ mode = np.random.randint(2)
141
+ if mode == 1:
142
+ if np.random.randint(2):
143
+ alpha = np.random.uniform(pmd['contrast_lower'],
144
+ pmd['contrast_upper'])
145
+ img *= alpha
146
+
147
+ # convert color from BGR to HSV
148
+ img = bgr2hsv(img)
149
+
150
+ # random saturation
151
+ if np.random.randint(2):
152
+ img[..., 1] *= np.random.uniform(pmd['saturation_lower'],
153
+ pmd['saturation_upper'])
154
+
155
+ # random hue
156
+ if np.random.randint(2):
157
+ img[..., 0] += np.random.uniform(-pmd['hue_delta'], pmd['hue_delta'])
158
+ img[..., 0][img[..., 0] > 360] -= 360
159
+ img[..., 0][img[..., 0] < 0] += 360
160
+
161
+ # convert color from HSV to BGR
162
+ img = hsv2bgr(img)
163
+
164
+ # random contrast
165
+ if mode == 0:
166
+ if np.random.randint(2):
167
+ alpha = np.random.uniform(pmd['contrast_lower'],
168
+ pmd['contrast_upper'])
169
+ img *= alpha
170
+
171
+ # randomly swap channels
172
+ if np.random.randint(2):
173
+ img = img[..., np.random.permutation(3)]
174
+ return Image.fromarray(img.astype(np.uint8))
175
+
176
+ def get_inputs(self, cam: Camera, flip=None, scale=None):
177
+
178
+ img = Image.fromarray(cam.image)
179
+ # original copy of image
180
+ cam.canvas = torch.tensor(np.array(img))
181
+
182
+ post_rot = torch.eye(2)
183
+ post_tran = torch.zeros(2)
184
+
185
+ # image view augmentation (resize, crop, horizontal flip, rotate)
186
+ img_augs = self.sample_augmentation(
187
+ H=img.height, W=img.width, flip=flip, scale=scale)
188
+ resize, resize_dims, crop, flip, rotate = img_augs
189
+ img, post_rot2, post_tran2 = \
190
+ self.img_transform(img, post_rot,
191
+ post_tran,
192
+ resize=resize,
193
+ resize_dims=resize_dims,
194
+ crop=crop,
195
+ flip=flip,
196
+ rotate=rotate)
197
+
198
+ # for convenience, make augmentation matrices 3x3
199
+ post_tran = torch.zeros(3)
200
+ post_rot = torch.eye(3)
201
+ post_tran[:2] = post_tran2
202
+ post_rot[:2, :2] = post_rot2
203
+
204
+ if self.is_train and self.data_config.get('pmd', None) is not None:
205
+ img = self.photo_metric_distortion(img, self.data_config['pmd'])
206
+
207
+ # original image
208
+ cam.image = self.normalize_img(img)
209
+ cam.post_rot = post_rot
210
+ cam.post_tran = post_tran
211
+ cam.sensor2lidar_rotation = torch.tensor(cam.sensor2lidar_rotation)
212
+ cam.sensor2lidar_translation = torch.tensor(cam.sensor2lidar_translation)
213
+ cam.intrinsics = torch.tensor(cam.intrinsics)
214
+ cam.distortion = torch.tensor(cam.distortion)
215
+ return cam
216
+
217
+ def __call__(self, results):
218
+ return self.get_inputs(results)
det_map/det/__init__.py ADDED
File without changes
det_map/det/dal/__init__.py ADDED
File without changes
det_map/det/dal/dal.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Phigent Robotics. All rights reserved.
2
+ import torch
3
+
4
+ from det_map.det.dal.mmdet3d.models.detectors.bevdet import BEVDet
5
+ from det_map.det.dal.mmdet3d.models.utils import FFN
6
+ from det_map.det.dal.mmdet3d.models.utils.spconv_voxelize import SPConvVoxelization
7
+ try:
8
+ from det_map.det.dal.mmdet3d.models import *
9
+ from det_map.det.dal.mmdet3d.core import *
10
+ except Exception:
11
+ raise Exception
12
+
13
+ class DAL(BEVDet):
14
+ def __init__(self, **kwargs):
15
+ super(DAL, self).__init__(**kwargs)
16
+
17
+ # image view auxiliary task heads
18
+ self.num_cls = self.pts_bbox_head.num_classes
19
+ heads = dict(heatmap=(self.num_cls, 2))
20
+ input_feat_dim = kwargs['pts_bbox_head']['hidden_channel']
21
+ self.auxiliary_heads = FFN(
22
+ input_feat_dim,
23
+ heads,
24
+ conv_cfg=dict(type="Conv1d"),
25
+ norm_cfg=dict(type="BN1d"),
26
+ bias=True)
27
+ self.auxiliary_heads.init_weights()
28
+
29
+ pts_voxel_cfg = kwargs.get('pts_voxel_layer', None)
30
+ if pts_voxel_cfg:
31
+ pts_voxel_cfg['num_point_features'] = 5
32
+ self.pts_voxel_layer = SPConvVoxelization(**pts_voxel_cfg)
33
+
34
+ def extract_img_feat(self, img, img_metas):
35
+ """Extract features of images."""
36
+ img = self.prepare_inputs(img)
37
+ x, _ = self.image_encoder(img[0])
38
+ return [x] + img[1:]
39
+
40
+ def extract_feat(self, points, img, img_metas):
41
+ """Extract features from images and points."""
42
+ img_feats = self.extract_img_feat(img, img_metas)
43
+ pts_feats = self.extract_pts_feat(points, img_feats, img_metas)
44
+ return (img_feats, pts_feats)
45
+
46
+ def forward_img_auxiliary_train(self,
47
+ x,
48
+ img_metas,
49
+ gt_bboxes,
50
+ gt_labels,
51
+ gt_bboxes_ignore=None,
52
+ proposals=None,
53
+ **kwargs):
54
+ max_instance = 150
55
+ num_pos = 0
56
+ centers_augego = x[0].new_zeros((len(gt_bboxes), max_instance, 3))
57
+ box_targets_all = x[0].new_zeros((len(gt_bboxes), max_instance, 10))
58
+ valid_mask = x[0].new_zeros((len(gt_bboxes), max_instance, 1))
59
+ label = x[0].new_zeros((len(gt_bboxes), max_instance, 1)).to(torch.long)
60
+ for sid in range(len(gt_bboxes)):
61
+ centers_augego_tmp = gt_bboxes[sid].gravity_center.to(x[0])
62
+ box_targets_tmp = self.pts_bbox_head.bbox_coder.encode(gt_bboxes[sid].tensor)
63
+ if gt_bboxes_ignore is not None:
64
+ centers_augego_tmp = centers_augego_tmp[gt_bboxes_ignore[sid], :]
65
+ box_targets_tmp = box_targets_tmp[gt_bboxes_ignore[sid], :]
66
+ num_valid_samples = centers_augego_tmp.shape[0]
67
+ num_pos += num_valid_samples
68
+ valid_mask[sid, :num_valid_samples, :] = 1.0
69
+ centers_augego[sid, :num_valid_samples, :] = centers_augego_tmp
70
+ box_targets_all[sid, :num_valid_samples, :] = box_targets_tmp
71
+ label_tmp = gt_labels[sid].unsqueeze(-1)
72
+ if gt_bboxes_ignore is not None:
73
+ label_tmp = label_tmp[gt_bboxes_ignore[sid], :]
74
+ label[sid, :num_valid_samples, :] = label_tmp
75
+ img_feats = self.pts_bbox_head.extract_img_feat_from_3dpoints(
76
+ centers_augego, x, fuse=False)
77
+ heatmap = self.auxiliary_heads.heatmap(img_feats)
78
+ loss_cls_img = self.pts_bbox_head.loss_cls(
79
+ heatmap.permute(0, 2, 1).reshape(-1, self.num_cls),
80
+ label.flatten(),
81
+ valid_mask.flatten(),
82
+ avg_factor=max(num_pos, 1))
83
+ return dict(loss_cls_img=loss_cls_img)
84
+
85
+ def forward_train(self,
86
+ points=None,
87
+ img_metas=None,
88
+ gt_bboxes_3d=None,
89
+ gt_labels_3d=None,
90
+ gt_labels=None,
91
+ gt_bboxes=None,
92
+ img_inputs=None,
93
+ proposals=None,
94
+ gt_bboxes_ignore=None,
95
+ **kwargs):
96
+ """Forward training function.
97
+
98
+ Args:
99
+ points (list[torch.Tensor], optional): Points of each sample.
100
+ Defaults to None.
101
+ img_metas (list[dict], optional): Meta information of each sample.
102
+ Defaults to None.
103
+ gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`], optional):
104
+ Ground truth 3D boxes. Defaults to None.
105
+ gt_labels_3d (list[torch.Tensor], optional): Ground truth labels
106
+ of 3D boxes. Defaults to None.
107
+ gt_labels (list[torch.Tensor], optional): Ground truth labels
108
+ of 2D boxes in images. Defaults to None.
109
+ gt_bboxes (list[torch.Tensor], optional): Ground truth 2D boxes in
110
+ images. Defaults to None.
111
+ img (torch.Tensor optional): Images of each sample with shape
112
+ (N, C, H, W). Defaults to None.
113
+ proposals ([list[torch.Tensor], optional): Predicted proposals
114
+ used for training Fast RCNN. Defaults to None.
115
+ gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth
116
+ 2D boxes in images to be ignored. Defaults to None.
117
+
118
+ Returns:
119
+ dict: Losses of different branches.
120
+ """
121
+ img_feats, pts_feats = self.extract_feat(
122
+ points, img=img_inputs, img_metas=img_metas)
123
+ img_feats_bev = \
124
+ self.img_view_transformer(img_feats + img_inputs[1:7],
125
+ depth_from_lidar=kwargs['gt_depth'])
126
+
127
+ losses = dict()
128
+ losses_pts = \
129
+ self.forward_pts_train([img_feats, pts_feats, img_feats_bev],
130
+ gt_bboxes_3d, gt_labels_3d, img_metas,
131
+ gt_bboxes_ignore)
132
+ losses.update(losses_pts)
133
+ losses_img_auxiliary = \
134
+ self.forward_img_auxiliary_train(img_feats, img_metas,
135
+ gt_bboxes_3d, gt_labels_3d,
136
+ gt_bboxes_ignore,
137
+ **kwargs)
138
+ losses.update(losses_img_auxiliary)
139
+ return losses
140
+
141
+ def simple_test(self,
142
+ points,
143
+ img_metas,
144
+ img_inputs=None,
145
+ rescale=False,
146
+ **kwargs):
147
+ """Test function without augmentaiton."""
148
+ img_feats, pts_feats = self.extract_feat(
149
+ points, img=img_inputs, img_metas=img_metas)
150
+ img_feats_bev = \
151
+ self.img_view_transformer(img_feats + img_inputs[1:7],
152
+ depth_from_lidar=kwargs['gt_depth'][0])
153
+
154
+ bbox_list = [dict() for _ in range(len(img_metas))]
155
+ bbox_pts = self.simple_test_pts([img_feats, pts_feats, img_feats_bev],
156
+ img_metas, rescale=rescale)
157
+ for result_dict, pts_bbox in zip(bbox_list, bbox_pts):
158
+ result_dict['pts_bbox'] = pts_bbox
159
+ return bbox_list
det_map/det/dal/mmdet3d/__init__.py ADDED
File without changes
det_map/det/dal/mmdet3d/core/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from .bbox import * # noqa: F401, F403
3
+ from .points import * # noqa: F401, F403
4
+ from .post_processing import * # noqa: F401, F403
5
+ from .utils import * # noqa: F401, F403
6
+ from .samplers import *
det_map/det/dal/mmdet3d/core/bbox/__init__.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner
3
+ # from .bbox_target import bbox_target
4
+ from .iou_calculators import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D,
5
+
6
+ axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d,
7
+ )
8
+
9
+ from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes,
10
+ Coord3DMode, DepthInstance3DBoxes,
11
+ LiDARInstance3DBoxes, get_box_type, limit_period,
12
+ mono_cam_box2vis, points_cam2img, points_img2cam,
13
+ xywhr2xyxyr)
14
+ from .transforms import bbox3d2result, bbox3d2roi, bbox3d_mapping_back
15
+ from .coders import *
16
+ __all__ = [
17
+ 'AssignResult', 'BaseAssigner', 'MaxIoUAssigner','TransFusionBBoxCoder'
18
+ , 'bbox_overlaps_3d',
19
+ 'AxisAlignedBboxOverlaps3D', 'axis_aligned_bbox_overlaps_3d', 'Box3DMode',
20
+ 'LiDARInstance3DBoxes', 'CameraInstance3DBoxes', 'bbox3d2roi',
21
+ 'bbox3d2result', 'DepthInstance3DBoxes', 'BaseInstance3DBoxes',
22
+ 'bbox3d_mapping_back', 'xywhr2xyxyr', 'limit_period', 'points_cam2img',
23
+ 'points_img2cam', 'get_box_type', 'Coord3DMode', 'mono_cam_box2vis'
24
+ ]
det_map/det/dal/mmdet3d/core/bbox/assigners/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmdet.core.bbox import AssignResult, BaseAssigner, MaxIoUAssigner
3
+ from .hungarian_assigner_3d import HungarianAssigner3D
4
+
5
+ __all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult',
6
+ 'HungarianAssigner3D']
det_map/det/dal/mmdet3d/core/bbox/assigners/hungarian_assigner_3d.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from mmdet.core.bbox.builder import BBOX_ASSIGNERS
2
+ from mmdet.core.bbox.assigners import AssignResult, BaseAssigner
3
+ from mmdet.core.bbox.match_costs import build_match_cost
4
+ from mmdet.core.bbox.match_costs.builder import MATCH_COST
5
+ from mmdet.core.bbox.iou_calculators import build_iou_calculator
6
+ import torch
7
+
8
+ try:
9
+ from scipy.optimize import linear_sum_assignment
10
+ except ImportError:
11
+ linear_sum_assignment = None
12
+
13
+ @MATCH_COST.register_module()
14
+ class BBoxBEVL1Cost(object):
15
+ def __init__(self, weight):
16
+ self.weight = weight
17
+
18
+ def __call__(self, bboxes, gt_bboxes, train_cfg):
19
+ pc_start = bboxes.new(train_cfg['point_cloud_range'][0:2])
20
+ pc_range = bboxes.new(train_cfg['point_cloud_range'][3:5]) - bboxes.new(train_cfg['point_cloud_range'][0:2])
21
+ # normalize the box center to [0, 1]
22
+ normalized_bboxes_xy = (bboxes[:, :2] - pc_start) / pc_range
23
+ normalized_gt_bboxes_xy = (gt_bboxes[:, :2] - pc_start) / pc_range
24
+ reg_cost = torch.cdist(normalized_bboxes_xy, normalized_gt_bboxes_xy, p=1)
25
+ return reg_cost * self.weight
26
+
27
+
28
+ @MATCH_COST.register_module()
29
+ class IoU3DCost(object):
30
+ def __init__(self, weight):
31
+ self.weight = weight
32
+
33
+ def __call__(self, iou):
34
+ iou_cost = - iou
35
+ return iou_cost * self.weight
36
+
37
+
38
+ @BBOX_ASSIGNERS.register_module()
39
+ class HeuristicAssigner3D(BaseAssigner):
40
+ def __init__(self,
41
+ dist_thre=100,
42
+ iou_calculator=dict(type='BboxOverlaps3D')
43
+ ):
44
+ self.dist_thre = dist_thre # distance in meter
45
+ self.iou_calculator = build_iou_calculator(iou_calculator)
46
+
47
+ def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None, query_labels=None):
48
+ dist_thre = self.dist_thre
49
+ num_gts, num_bboxes = len(gt_bboxes), len(bboxes)
50
+
51
+ bev_dist = torch.norm(bboxes[:, 0:2][None, :, :] - gt_bboxes[:, 0:2][:, None, :], dim=-1) # [num_gts, num_bboxes]
52
+ if query_labels is not None:
53
+ # only match the gt box and query with same category
54
+ not_same_class = (query_labels[None] != gt_labels[:, None])
55
+ bev_dist += not_same_class * dist_thre
56
+
57
+ # for each gt box, assign it to the nearest pred box
58
+ nearest_values, nearest_indices = bev_dist.min(1) # [num_gts]
59
+ assigned_gt_inds = torch.ones([num_bboxes, ]).to(bboxes) * 0
60
+ assigned_gt_vals = torch.ones([num_bboxes, ]).to(bboxes) * 10000
61
+ assigned_gt_labels = torch.ones([num_bboxes, ]).to(bboxes) * -1
62
+ for idx_gts in range(num_gts):
63
+ # for idx_pred in torch.where(bev_dist[idx_gts] < dist_thre)[0]: # each gt match to all the pred box within some radius
64
+ idx_pred = nearest_indices[idx_gts] # each gt only match to the nearest pred box
65
+ if bev_dist[idx_gts, idx_pred] <= dist_thre:
66
+ if bev_dist[idx_gts, idx_pred] < assigned_gt_vals[idx_pred]: # if this pred box is assigned, then compare
67
+ assigned_gt_vals[idx_pred] = bev_dist[idx_gts, idx_pred]
68
+ assigned_gt_inds[idx_pred] = idx_gts + 1 # for AssignResult, 0 is negative, -1 is ignore, 1-based indices are positive
69
+ assigned_gt_labels[idx_pred] = gt_labels[idx_gts]
70
+
71
+ max_overlaps = torch.zeros([num_bboxes, ]).to(bboxes)
72
+ matched_indices = torch.where(assigned_gt_inds > 0)
73
+ matched_iou = self.iou_calculator(gt_bboxes[assigned_gt_inds[matched_indices].long() - 1], bboxes[matched_indices]).diag()
74
+ max_overlaps[matched_indices] = matched_iou
75
+
76
+ return AssignResult(
77
+ num_gts, assigned_gt_inds.long(), max_overlaps, labels=assigned_gt_labels
78
+ )
79
+
80
+
81
+ @BBOX_ASSIGNERS.register_module()
82
+ class HungarianAssigner3D(BaseAssigner):
83
+ def __init__(self,
84
+ cls_cost=dict(type='ClassificationCost', weight=1.),
85
+ reg_cost=dict(type='BBoxBEVL1Cost', weight=1.0),
86
+ iou_cost=dict(type='IoU3DCost', weight=1.0),
87
+ iou_calculator=dict(type='BboxOverlaps3D'),
88
+ ):
89
+ self.cls_cost = build_match_cost(cls_cost)
90
+ self.reg_cost = build_match_cost(reg_cost)
91
+ self.iou_cost = build_match_cost(iou_cost)
92
+ self.iou_calculator = build_iou_calculator(iou_calculator)
93
+
94
+ def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg):
95
+ num_gts, num_bboxes = gt_bboxes.size(0), bboxes.size(0)
96
+
97
+ # 1. assign -1 by default
98
+ assigned_gt_inds = bboxes.new_full((num_bboxes,),
99
+ -1,
100
+ dtype=torch.long)
101
+ assigned_labels = bboxes.new_full((num_bboxes,),
102
+ -1,
103
+ dtype=torch.long)
104
+ if num_gts == 0 or num_bboxes == 0:
105
+ # No ground truth or boxes, return empty assignment
106
+ if num_gts == 0:
107
+ # No ground truth, assign all to background
108
+ assigned_gt_inds[:] = 0
109
+ return AssignResult(
110
+ num_gts, assigned_gt_inds, None, labels=assigned_labels)
111
+
112
+ # 2. compute the weighted costs
113
+ # see mmdetection/mmdet/core/bbox/match_costs/match_cost.py
114
+ cls_cost = self.cls_cost(cls_pred[0].T, gt_labels)
115
+ reg_cost = self.reg_cost(bboxes, gt_bboxes, train_cfg)
116
+
117
+ iou = self.iou_calculator(bboxes, gt_bboxes)
118
+ iou_cost = self.iou_cost(iou)
119
+
120
+ # weighted sum of above three costs
121
+ cost = cls_cost + reg_cost + iou_cost
122
+
123
+ # 3. do Hungarian matching on CPU using linear_sum_assignment
124
+ cost = cost.detach().cpu()
125
+ if linear_sum_assignment is None:
126
+ raise ImportError('Please run "pip install scipy" '
127
+ 'to install scipy first.')
128
+ try:
129
+ matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
130
+ except:
131
+ assigned_gt_inds[:] = 0
132
+ return AssignResult(
133
+ num_gts, assigned_gt_inds, None, labels=assigned_labels)
134
+ matched_row_inds = torch.from_numpy(matched_row_inds).to(bboxes.device)
135
+ matched_col_inds = torch.from_numpy(matched_col_inds).to(bboxes.device)
136
+
137
+ # 4. assign backgrounds and foregrounds
138
+ # assign all indices to backgrounds first
139
+ assigned_gt_inds[:] = 0
140
+ # assign foregrounds based on matching results
141
+ assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
142
+ assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
143
+
144
+ max_overlaps = torch.zeros_like(iou.max(1).values)
145
+ max_overlaps[matched_row_inds] = iou[matched_row_inds, matched_col_inds]
146
+ # max_overlaps = iou.max(1).values
147
+ return AssignResult(
148
+ num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels)
det_map/det/dal/mmdet3d/core/bbox/box_np_ops.py ADDED
@@ -0,0 +1,827 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ # TODO: clean the functions in this file and move the APIs into box structures
3
+ # in the future
4
+ # NOTICE: All functions in this file are valid for LiDAR or depth boxes only
5
+ # if we use default parameters.
6
+
7
+ import numba
8
+ import numpy as np
9
+
10
+ from .structures.utils import limit_period, points_cam2img, rotation_3d_in_axis
11
+
12
+
13
+ def camera_to_lidar(points, r_rect, velo2cam):
14
+ """Convert points in camera coordinate to lidar coordinate.
15
+
16
+ Note:
17
+ This function is for KITTI only.
18
+
19
+ Args:
20
+ points (np.ndarray, shape=[N, 3]): Points in camera coordinate.
21
+ r_rect (np.ndarray, shape=[4, 4]): Matrix to project points in
22
+ specific camera coordinate (e.g. CAM2) to CAM0.
23
+ velo2cam (np.ndarray, shape=[4, 4]): Matrix to project points in
24
+ camera coordinate to lidar coordinate.
25
+
26
+ Returns:
27
+ np.ndarray, shape=[N, 3]: Points in lidar coordinate.
28
+ """
29
+ points_shape = list(points.shape[0:-1])
30
+ if points.shape[-1] == 3:
31
+ points = np.concatenate([points, np.ones(points_shape + [1])], axis=-1)
32
+ lidar_points = points @ np.linalg.inv((r_rect @ velo2cam).T)
33
+ return lidar_points[..., :3]
34
+
35
+
36
+ def box_camera_to_lidar(data, r_rect, velo2cam):
37
+ """Convert boxes in camera coordinate to lidar coordinate.
38
+
39
+ Note:
40
+ This function is for KITTI only.
41
+
42
+ Args:
43
+ data (np.ndarray, shape=[N, 7]): Boxes in camera coordinate.
44
+ r_rect (np.ndarray, shape=[4, 4]): Matrix to project points in
45
+ specific camera coordinate (e.g. CAM2) to CAM0.
46
+ velo2cam (np.ndarray, shape=[4, 4]): Matrix to project points in
47
+ camera coordinate to lidar coordinate.
48
+
49
+ Returns:
50
+ np.ndarray, shape=[N, 3]: Boxes in lidar coordinate.
51
+ """
52
+ xyz = data[:, 0:3]
53
+ x_size, y_size, z_size = data[:, 3:4], data[:, 4:5], data[:, 5:6]
54
+ r = data[:, 6:7]
55
+ xyz_lidar = camera_to_lidar(xyz, r_rect, velo2cam)
56
+ # yaw and dims also needs to be converted
57
+ r_new = -r - np.pi / 2
58
+ r_new = limit_period(r_new, period=np.pi * 2)
59
+ return np.concatenate([xyz_lidar, x_size, z_size, y_size, r_new], axis=1)
60
+
61
+
62
+ def corners_nd(dims, origin=0.5):
63
+ """Generate relative box corners based on length per dim and origin point.
64
+
65
+ Args:
66
+ dims (np.ndarray, shape=[N, ndim]): Array of length per dim
67
+ origin (list or array or float, optional): origin point relate to
68
+ smallest point. Defaults to 0.5
69
+
70
+ Returns:
71
+ np.ndarray, shape=[N, 2 ** ndim, ndim]: Returned corners.
72
+ point layout example: (2d) x0y0, x0y1, x1y0, x1y1;
73
+ (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1
74
+ where x0 < x1, y0 < y1, z0 < z1.
75
+ """
76
+ ndim = int(dims.shape[1])
77
+ corners_norm = np.stack(
78
+ np.unravel_index(np.arange(2**ndim), [2] * ndim),
79
+ axis=1).astype(dims.dtype)
80
+ # now corners_norm has format: (2d) x0y0, x0y1, x1y0, x1y1
81
+ # (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1
82
+ # so need to convert to a format which is convenient to do other computing.
83
+ # for 2d boxes, format is clockwise start with minimum point
84
+ # for 3d boxes, please draw lines by your hand.
85
+ if ndim == 2:
86
+ # generate clockwise box corners
87
+ corners_norm = corners_norm[[0, 1, 3, 2]]
88
+ elif ndim == 3:
89
+ corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
90
+ corners_norm = corners_norm - np.array(origin, dtype=dims.dtype)
91
+ corners = dims.reshape([-1, 1, ndim]) * corners_norm.reshape(
92
+ [1, 2**ndim, ndim])
93
+ return corners
94
+
95
+
96
+ def center_to_corner_box2d(centers, dims, angles=None, origin=0.5):
97
+ """Convert kitti locations, dimensions and angles to corners.
98
+ format: center(xy), dims(xy), angles(counterclockwise when positive)
99
+
100
+ Args:
101
+ centers (np.ndarray): Locations in kitti label file with shape (N, 2).
102
+ dims (np.ndarray): Dimensions in kitti label file with shape (N, 2).
103
+ angles (np.ndarray, optional): Rotation_y in kitti label file with
104
+ shape (N). Defaults to None.
105
+ origin (list or array or float, optional): origin point relate to
106
+ smallest point. Defaults to 0.5.
107
+
108
+ Returns:
109
+ np.ndarray: Corners with the shape of (N, 4, 2).
110
+ """
111
+ # 'length' in kitti format is in x axis.
112
+ # xyz(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar)
113
+ # center in kitti format is [0.5, 1.0, 0.5] in xyz.
114
+ corners = corners_nd(dims, origin=origin)
115
+ # corners: [N, 4, 2]
116
+ if angles is not None:
117
+ corners = rotation_3d_in_axis(corners, angles)
118
+ corners += centers.reshape([-1, 1, 2])
119
+ return corners
120
+
121
+
122
+ @numba.jit(nopython=True)
123
+ def depth_to_points(depth, trunc_pixel):
124
+ """Convert depth map to points.
125
+
126
+ Args:
127
+ depth (np.array, shape=[H, W]): Depth map which
128
+ the row of [0~`trunc_pixel`] are truncated.
129
+ trunc_pixel (int): The number of truncated row.
130
+
131
+ Returns:
132
+ np.ndarray: Points in camera coordinates.
133
+ """
134
+ num_pts = np.sum(depth[trunc_pixel:, ] > 0.1)
135
+ points = np.zeros((num_pts, 3), dtype=depth.dtype)
136
+ x = np.array([0, 0, 1], dtype=depth.dtype)
137
+ k = 0
138
+ for i in range(trunc_pixel, depth.shape[0]):
139
+ for j in range(depth.shape[1]):
140
+ if depth[i, j] > 0.1:
141
+ x = np.array([j, i, 1], dtype=depth.dtype)
142
+ points[k] = x * depth[i, j]
143
+ k += 1
144
+ return points
145
+
146
+
147
+ def depth_to_lidar_points(depth, trunc_pixel, P2, r_rect, velo2cam):
148
+ """Convert depth map to points in lidar coordinate.
149
+
150
+ Args:
151
+ depth (np.array, shape=[H, W]): Depth map which
152
+ the row of [0~`trunc_pixel`] are truncated.
153
+ trunc_pixel (int): The number of truncated row.
154
+ P2 (p.array, shape=[4, 4]): Intrinsics of Camera2.
155
+ r_rect (np.ndarray, shape=[4, 4]): Matrix to project points in
156
+ specific camera coordinate (e.g. CAM2) to CAM0.
157
+ velo2cam (np.ndarray, shape=[4, 4]): Matrix to project points in
158
+ camera coordinate to lidar coordinate.
159
+
160
+ Returns:
161
+ np.ndarray: Points in lidar coordinates.
162
+ """
163
+ pts = depth_to_points(depth, trunc_pixel)
164
+ points_shape = list(pts.shape[0:-1])
165
+ points = np.concatenate([pts, np.ones(points_shape + [1])], axis=-1)
166
+ points = points @ np.linalg.inv(P2.T)
167
+ lidar_points = camera_to_lidar(points, r_rect, velo2cam)
168
+ return lidar_points
169
+
170
+
171
+ def center_to_corner_box3d(centers,
172
+ dims,
173
+ angles=None,
174
+ origin=(0.5, 1.0, 0.5),
175
+ axis=1):
176
+ """Convert kitti locations, dimensions and angles to corners.
177
+
178
+ Args:
179
+ centers (np.ndarray): Locations in kitti label file with shape (N, 3).
180
+ dims (np.ndarray): Dimensions in kitti label file with shape (N, 3).
181
+ angles (np.ndarray, optional): Rotation_y in kitti label file with
182
+ shape (N). Defaults to None.
183
+ origin (list or array or float, optional): Origin point relate to
184
+ smallest point. Use (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0)
185
+ in lidar. Defaults to (0.5, 1.0, 0.5).
186
+ axis (int, optional): Rotation axis. 1 for camera and 2 for lidar.
187
+ Defaults to 1.
188
+
189
+ Returns:
190
+ np.ndarray: Corners with the shape of (N, 8, 3).
191
+ """
192
+ # 'length' in kitti format is in x axis.
193
+ # yzx(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(lwh)(lidar)
194
+ # center in kitti format is [0.5, 1.0, 0.5] in xyz.
195
+ corners = corners_nd(dims, origin=origin)
196
+ # corners: [N, 8, 3]
197
+ if angles is not None:
198
+ corners = rotation_3d_in_axis(corners, angles, axis=axis)
199
+ corners += centers.reshape([-1, 1, 3])
200
+ return corners
201
+
202
+
203
+ @numba.jit(nopython=True)
204
+ def box2d_to_corner_jit(boxes):
205
+ """Convert box2d to corner.
206
+
207
+ Args:
208
+ boxes (np.ndarray, shape=[N, 5]): Boxes2d with rotation.
209
+
210
+ Returns:
211
+ box_corners (np.ndarray, shape=[N, 4, 2]): Box corners.
212
+ """
213
+ num_box = boxes.shape[0]
214
+ corners_norm = np.zeros((4, 2), dtype=boxes.dtype)
215
+ corners_norm[1, 1] = 1.0
216
+ corners_norm[2] = 1.0
217
+ corners_norm[3, 0] = 1.0
218
+ corners_norm -= np.array([0.5, 0.5], dtype=boxes.dtype)
219
+ corners = boxes.reshape(num_box, 1, 5)[:, :, 2:4] * corners_norm.reshape(
220
+ 1, 4, 2)
221
+ rot_mat_T = np.zeros((2, 2), dtype=boxes.dtype)
222
+ box_corners = np.zeros((num_box, 4, 2), dtype=boxes.dtype)
223
+ for i in range(num_box):
224
+ rot_sin = np.sin(boxes[i, -1])
225
+ rot_cos = np.cos(boxes[i, -1])
226
+ rot_mat_T[0, 0] = rot_cos
227
+ rot_mat_T[0, 1] = rot_sin
228
+ rot_mat_T[1, 0] = -rot_sin
229
+ rot_mat_T[1, 1] = rot_cos
230
+ box_corners[i] = corners[i] @ rot_mat_T + boxes[i, :2]
231
+ return box_corners
232
+
233
+
234
+ @numba.njit
235
+ def corner_to_standup_nd_jit(boxes_corner):
236
+ """Convert boxes_corner to aligned (min-max) boxes.
237
+
238
+ Args:
239
+ boxes_corner (np.ndarray, shape=[N, 2**dim, dim]): Boxes corners.
240
+
241
+ Returns:
242
+ np.ndarray, shape=[N, dim*2]: Aligned (min-max) boxes.
243
+ """
244
+ num_boxes = boxes_corner.shape[0]
245
+ ndim = boxes_corner.shape[-1]
246
+ result = np.zeros((num_boxes, ndim * 2), dtype=boxes_corner.dtype)
247
+ for i in range(num_boxes):
248
+ for j in range(ndim):
249
+ result[i, j] = np.min(boxes_corner[i, :, j])
250
+ for j in range(ndim):
251
+ result[i, j + ndim] = np.max(boxes_corner[i, :, j])
252
+ return result
253
+
254
+
255
+ @numba.jit(nopython=True)
256
+ def corner_to_surfaces_3d_jit(corners):
257
+ """Convert 3d box corners from corner function above to surfaces that
258
+ normal vectors all direct to internal.
259
+
260
+ Args:
261
+ corners (np.ndarray): 3d box corners with the shape of (N, 8, 3).
262
+
263
+ Returns:
264
+ np.ndarray: Surfaces with the shape of (N, 6, 4, 3).
265
+ """
266
+ # box_corners: [N, 8, 3], must from corner functions in this module
267
+ num_boxes = corners.shape[0]
268
+ surfaces = np.zeros((num_boxes, 6, 4, 3), dtype=corners.dtype)
269
+ corner_idxes = np.array([
270
+ 0, 1, 2, 3, 7, 6, 5, 4, 0, 3, 7, 4, 1, 5, 6, 2, 0, 4, 5, 1, 3, 2, 6, 7
271
+ ]).reshape(6, 4)
272
+ for i in range(num_boxes):
273
+ for j in range(6):
274
+ for k in range(4):
275
+ surfaces[i, j, k] = corners[i, corner_idxes[j, k]]
276
+ return surfaces
277
+
278
+
279
+ def rotation_points_single_angle(points, angle, axis=0):
280
+ """Rotate points with a single angle.
281
+
282
+ Args:
283
+ points (np.ndarray, shape=[N, 3]]):
284
+ angle (np.ndarray, shape=[1]]):
285
+ axis (int, optional): Axis to rotate at. Defaults to 0.
286
+
287
+ Returns:
288
+ np.ndarray: Rotated points.
289
+ """
290
+ # points: [N, 3]
291
+ rot_sin = np.sin(angle)
292
+ rot_cos = np.cos(angle)
293
+ if axis == 1:
294
+ rot_mat_T = np.array(
295
+ [[rot_cos, 0, rot_sin], [0, 1, 0], [-rot_sin, 0, rot_cos]],
296
+ dtype=points.dtype)
297
+ elif axis == 2 or axis == -1:
298
+ rot_mat_T = np.array(
299
+ [[rot_cos, rot_sin, 0], [-rot_sin, rot_cos, 0], [0, 0, 1]],
300
+ dtype=points.dtype)
301
+ elif axis == 0:
302
+ rot_mat_T = np.array(
303
+ [[1, 0, 0], [0, rot_cos, rot_sin], [0, -rot_sin, rot_cos]],
304
+ dtype=points.dtype)
305
+ else:
306
+ raise ValueError('axis should in range')
307
+
308
+ return points @ rot_mat_T, rot_mat_T
309
+
310
+
311
+ def box3d_to_bbox(box3d, P2):
312
+ """Convert box3d in camera coordinates to bbox in image coordinates.
313
+
314
+ Args:
315
+ box3d (np.ndarray, shape=[N, 7]): Boxes in camera coordinate.
316
+ P2 (np.array, shape=[4, 4]): Intrinsics of Camera2.
317
+
318
+ Returns:
319
+ np.ndarray, shape=[N, 4]: Boxes 2d in image coordinates.
320
+ """
321
+ box_corners = center_to_corner_box3d(
322
+ box3d[:, :3], box3d[:, 3:6], box3d[:, 6], [0.5, 1.0, 0.5], axis=1)
323
+ box_corners_in_image = points_cam2img(box_corners, P2)
324
+ # box_corners_in_image: [N, 8, 2]
325
+ minxy = np.min(box_corners_in_image, axis=1)
326
+ maxxy = np.max(box_corners_in_image, axis=1)
327
+ bbox = np.concatenate([minxy, maxxy], axis=1)
328
+ return bbox
329
+
330
+
331
+ def corner_to_surfaces_3d(corners):
332
+ """convert 3d box corners from corner function above to surfaces that
333
+ normal vectors all direct to internal.
334
+
335
+ Args:
336
+ corners (np.ndarray): 3D box corners with shape of (N, 8, 3).
337
+
338
+ Returns:
339
+ np.ndarray: Surfaces with the shape of (N, 6, 4, 3).
340
+ """
341
+ # box_corners: [N, 8, 3], must from corner functions in this module
342
+ surfaces = np.array([
343
+ [corners[:, 0], corners[:, 1], corners[:, 2], corners[:, 3]],
344
+ [corners[:, 7], corners[:, 6], corners[:, 5], corners[:, 4]],
345
+ [corners[:, 0], corners[:, 3], corners[:, 7], corners[:, 4]],
346
+ [corners[:, 1], corners[:, 5], corners[:, 6], corners[:, 2]],
347
+ [corners[:, 0], corners[:, 4], corners[:, 5], corners[:, 1]],
348
+ [corners[:, 3], corners[:, 2], corners[:, 6], corners[:, 7]],
349
+ ]).transpose([2, 0, 1, 3])
350
+ return surfaces
351
+
352
+
353
+ def points_in_rbbox(points, rbbox, z_axis=2, origin=(0.5, 0.5, 0)):
354
+ """Check points in rotated bbox and return indices.
355
+
356
+ Note:
357
+ This function is for counterclockwise boxes.
358
+
359
+ Args:
360
+ points (np.ndarray, shape=[N, 3+dim]): Points to query.
361
+ rbbox (np.ndarray, shape=[M, 7]): Boxes3d with rotation.
362
+ z_axis (int, optional): Indicate which axis is height.
363
+ Defaults to 2.
364
+ origin (tuple[int], optional): Indicate the position of
365
+ box center. Defaults to (0.5, 0.5, 0).
366
+
367
+ Returns:
368
+ np.ndarray, shape=[N, M]: Indices of points in each box.
369
+ """
370
+ # TODO: this function is different from PointCloud3D, be careful
371
+ # when start to use nuscene, check the input
372
+ rbbox_corners = center_to_corner_box3d(
373
+ rbbox[:, :3], rbbox[:, 3:6], rbbox[:, 6], origin=origin, axis=z_axis)
374
+ surfaces = corner_to_surfaces_3d(rbbox_corners)
375
+ indices = points_in_convex_polygon_3d_jit(points[:, :3], surfaces)
376
+ return indices
377
+
378
+
379
+ def minmax_to_corner_2d(minmax_box):
380
+ """Convert minmax box to corners2d.
381
+
382
+ Args:
383
+ minmax_box (np.ndarray, shape=[N, dims]): minmax boxes.
384
+
385
+ Returns:
386
+ np.ndarray: 2d corners of boxes
387
+ """
388
+ ndim = minmax_box.shape[-1] // 2
389
+ center = minmax_box[..., :ndim]
390
+ dims = minmax_box[..., ndim:] - center
391
+ return center_to_corner_box2d(center, dims, origin=0.0)
392
+
393
+
394
+ def create_anchors_3d_range(feature_size,
395
+ anchor_range,
396
+ sizes=((3.9, 1.6, 1.56), ),
397
+ rotations=(0, np.pi / 2),
398
+ dtype=np.float32):
399
+ """Create anchors 3d by range.
400
+
401
+ Args:
402
+ feature_size (list[float] | tuple[float]): Feature map size. It is
403
+ either a list of a tuple of [D, H, W](in order of z, y, and x).
404
+ anchor_range (torch.Tensor | list[float]): Range of anchors with
405
+ shape [6]. The order is consistent with that of anchors, i.e.,
406
+ (x_min, y_min, z_min, x_max, y_max, z_max).
407
+ sizes (list[list] | np.ndarray | torch.Tensor, optional):
408
+ Anchor size with shape [N, 3], in order of x, y, z.
409
+ Defaults to ((3.9, 1.6, 1.56), ).
410
+ rotations (list[float] | np.ndarray | torch.Tensor, optional):
411
+ Rotations of anchors in a single feature grid.
412
+ Defaults to (0, np.pi / 2).
413
+ dtype (type, optional): Data type. Defaults to np.float32.
414
+
415
+ Returns:
416
+ np.ndarray: Range based anchors with shape of
417
+ (*feature_size, num_sizes, num_rots, 7).
418
+ """
419
+ anchor_range = np.array(anchor_range, dtype)
420
+ z_centers = np.linspace(
421
+ anchor_range[2], anchor_range[5], feature_size[0], dtype=dtype)
422
+ y_centers = np.linspace(
423
+ anchor_range[1], anchor_range[4], feature_size[1], dtype=dtype)
424
+ x_centers = np.linspace(
425
+ anchor_range[0], anchor_range[3], feature_size[2], dtype=dtype)
426
+ sizes = np.reshape(np.array(sizes, dtype=dtype), [-1, 3])
427
+ rotations = np.array(rotations, dtype=dtype)
428
+ rets = np.meshgrid(
429
+ x_centers, y_centers, z_centers, rotations, indexing='ij')
430
+ tile_shape = [1] * 5
431
+ tile_shape[-2] = int(sizes.shape[0])
432
+ for i in range(len(rets)):
433
+ rets[i] = np.tile(rets[i][..., np.newaxis, :], tile_shape)
434
+ rets[i] = rets[i][..., np.newaxis] # for concat
435
+ sizes = np.reshape(sizes, [1, 1, 1, -1, 1, 3])
436
+ tile_size_shape = list(rets[0].shape)
437
+ tile_size_shape[3] = 1
438
+ sizes = np.tile(sizes, tile_size_shape)
439
+ rets.insert(3, sizes)
440
+ ret = np.concatenate(rets, axis=-1)
441
+ return np.transpose(ret, [2, 1, 0, 3, 4, 5])
442
+
443
+
444
+ def center_to_minmax_2d(centers, dims, origin=0.5):
445
+ """Center to minmax.
446
+
447
+ Args:
448
+ centers (np.ndarray): Center points.
449
+ dims (np.ndarray): Dimensions.
450
+ origin (list or array or float, optional): Origin point relate
451
+ to smallest point. Defaults to 0.5.
452
+
453
+ Returns:
454
+ np.ndarray: Minmax points.
455
+ """
456
+ if origin == 0.5:
457
+ return np.concatenate([centers - dims / 2, centers + dims / 2],
458
+ axis=-1)
459
+ corners = center_to_corner_box2d(centers, dims, origin=origin)
460
+ return corners[:, [0, 2]].reshape([-1, 4])
461
+
462
+
463
+ def rbbox2d_to_near_bbox(rbboxes):
464
+ """convert rotated bbox to nearest 'standing' or 'lying' bbox.
465
+
466
+ Args:
467
+ rbboxes (np.ndarray): Rotated bboxes with shape of
468
+ (N, 5(x, y, xdim, ydim, rad)).
469
+
470
+ Returns:
471
+ np.ndarray: Bounding boxes with the shape of
472
+ (N, 4(xmin, ymin, xmax, ymax)).
473
+ """
474
+ rots = rbboxes[..., -1]
475
+ rots_0_pi_div_2 = np.abs(limit_period(rots, 0.5, np.pi))
476
+ cond = (rots_0_pi_div_2 > np.pi / 4)[..., np.newaxis]
477
+ bboxes_center = np.where(cond, rbboxes[:, [0, 1, 3, 2]], rbboxes[:, :4])
478
+ bboxes = center_to_minmax_2d(bboxes_center[:, :2], bboxes_center[:, 2:])
479
+ return bboxes
480
+
481
+
482
+ @numba.jit(nopython=True)
483
+ def iou_jit(boxes, query_boxes, mode='iou', eps=0.0):
484
+ """Calculate box iou. Note that jit version runs ~10x faster than the
485
+ box_overlaps function in mmdet3d.core.evaluation.
486
+
487
+ Note:
488
+ This function is for counterclockwise boxes.
489
+
490
+ Args:
491
+ boxes (np.ndarray): Input bounding boxes with shape of (N, 4).
492
+ query_boxes (np.ndarray): Query boxes with shape of (K, 4).
493
+ mode (str, optional): IoU mode. Defaults to 'iou'.
494
+ eps (float, optional): Value added to denominator. Defaults to 0.
495
+
496
+ Returns:
497
+ np.ndarray: Overlap between boxes and query_boxes
498
+ with the shape of [N, K].
499
+ """
500
+ N = boxes.shape[0]
501
+ K = query_boxes.shape[0]
502
+ overlaps = np.zeros((N, K), dtype=boxes.dtype)
503
+ for k in range(K):
504
+ box_area = ((query_boxes[k, 2] - query_boxes[k, 0] + eps) *
505
+ (query_boxes[k, 3] - query_boxes[k, 1] + eps))
506
+ for n in range(N):
507
+ iw = (
508
+ min(boxes[n, 2], query_boxes[k, 2]) -
509
+ max(boxes[n, 0], query_boxes[k, 0]) + eps)
510
+ if iw > 0:
511
+ ih = (
512
+ min(boxes[n, 3], query_boxes[k, 3]) -
513
+ max(boxes[n, 1], query_boxes[k, 1]) + eps)
514
+ if ih > 0:
515
+ if mode == 'iou':
516
+ ua = ((boxes[n, 2] - boxes[n, 0] + eps) *
517
+ (boxes[n, 3] - boxes[n, 1] + eps) + box_area -
518
+ iw * ih)
519
+ else:
520
+ ua = ((boxes[n, 2] - boxes[n, 0] + eps) *
521
+ (boxes[n, 3] - boxes[n, 1] + eps))
522
+ overlaps[n, k] = iw * ih / ua
523
+ return overlaps
524
+
525
+
526
+ def projection_matrix_to_CRT_kitti(proj):
527
+ """Split projection matrix of KITTI.
528
+
529
+ Note:
530
+ This function is for KITTI only.
531
+
532
+ P = C @ [R|T]
533
+ C is upper triangular matrix, so we need to inverse CR and use QR
534
+ stable for all kitti camera projection matrix.
535
+
536
+ Args:
537
+ proj (p.array, shape=[4, 4]): Intrinsics of camera.
538
+
539
+ Returns:
540
+ tuple[np.ndarray]: Splited matrix of C, R and T.
541
+ """
542
+
543
+ CR = proj[0:3, 0:3]
544
+ CT = proj[0:3, 3]
545
+ RinvCinv = np.linalg.inv(CR)
546
+ Rinv, Cinv = np.linalg.qr(RinvCinv)
547
+ C = np.linalg.inv(Cinv)
548
+ R = np.linalg.inv(Rinv)
549
+ T = Cinv @ CT
550
+ return C, R, T
551
+
552
+
553
+ def remove_outside_points(points, rect, Trv2c, P2, image_shape):
554
+ """Remove points which are outside of image.
555
+
556
+ Note:
557
+ This function is for KITTI only.
558
+
559
+ Args:
560
+ points (np.ndarray, shape=[N, 3+dims]): Total points.
561
+ rect (np.ndarray, shape=[4, 4]): Matrix to project points in
562
+ specific camera coordinate (e.g. CAM2) to CAM0.
563
+ Trv2c (np.ndarray, shape=[4, 4]): Matrix to project points in
564
+ camera coordinate to lidar coordinate.
565
+ P2 (p.array, shape=[4, 4]): Intrinsics of Camera2.
566
+ image_shape (list[int]): Shape of image.
567
+
568
+ Returns:
569
+ np.ndarray, shape=[N, 3+dims]: Filtered points.
570
+ """
571
+ # 5x faster than remove_outside_points_v1(2ms vs 10ms)
572
+ C, R, T = projection_matrix_to_CRT_kitti(P2)
573
+ image_bbox = [0, 0, image_shape[1], image_shape[0]]
574
+ frustum = get_frustum(image_bbox, C)
575
+ frustum -= T
576
+ frustum = np.linalg.inv(R) @ frustum.T
577
+ frustum = camera_to_lidar(frustum.T, rect, Trv2c)
578
+ frustum_surfaces = corner_to_surfaces_3d_jit(frustum[np.newaxis, ...])
579
+ indices = points_in_convex_polygon_3d_jit(points[:, :3], frustum_surfaces)
580
+ points = points[indices.reshape([-1])]
581
+ return points
582
+
583
+
584
+ def get_frustum(bbox_image, C, near_clip=0.001, far_clip=100):
585
+ """Get frustum corners in camera coordinates.
586
+
587
+ Args:
588
+ bbox_image (list[int]): box in image coordinates.
589
+ C (np.ndarray): Intrinsics.
590
+ near_clip (float, optional): Nearest distance of frustum.
591
+ Defaults to 0.001.
592
+ far_clip (float, optional): Farthest distance of frustum.
593
+ Defaults to 100.
594
+
595
+ Returns:
596
+ np.ndarray, shape=[8, 3]: coordinates of frustum corners.
597
+ """
598
+ fku = C[0, 0]
599
+ fkv = -C[1, 1]
600
+ u0v0 = C[0:2, 2]
601
+ z_points = np.array(
602
+ [near_clip] * 4 + [far_clip] * 4, dtype=C.dtype)[:, np.newaxis]
603
+ b = bbox_image
604
+ box_corners = np.array(
605
+ [[b[0], b[1]], [b[0], b[3]], [b[2], b[3]], [b[2], b[1]]],
606
+ dtype=C.dtype)
607
+ near_box_corners = (box_corners - u0v0) / np.array(
608
+ [fku / near_clip, -fkv / near_clip], dtype=C.dtype)
609
+ far_box_corners = (box_corners - u0v0) / np.array(
610
+ [fku / far_clip, -fkv / far_clip], dtype=C.dtype)
611
+ ret_xy = np.concatenate([near_box_corners, far_box_corners],
612
+ axis=0) # [8, 2]
613
+ ret_xyz = np.concatenate([ret_xy, z_points], axis=1)
614
+ return ret_xyz
615
+
616
+
617
+ def surface_equ_3d(polygon_surfaces):
618
+ """
619
+
620
+ Args:
621
+ polygon_surfaces (np.ndarray): Polygon surfaces with shape of
622
+ [num_polygon, max_num_surfaces, max_num_points_of_surface, 3].
623
+ All surfaces' normal vector must direct to internal.
624
+ Max_num_points_of_surface must at least 3.
625
+
626
+ Returns:
627
+ tuple: normal vector and its direction.
628
+ """
629
+ # return [a, b, c], d in ax+by+cz+d=0
630
+ # polygon_surfaces: [num_polygon, num_surfaces, num_points_of_polygon, 3]
631
+ surface_vec = polygon_surfaces[:, :, :2, :] - \
632
+ polygon_surfaces[:, :, 1:3, :]
633
+ # normal_vec: [..., 3]
634
+ normal_vec = np.cross(surface_vec[:, :, 0, :], surface_vec[:, :, 1, :])
635
+ # print(normal_vec.shape, points[..., 0, :].shape)
636
+ # d = -np.inner(normal_vec, points[..., 0, :])
637
+ d = np.einsum('aij, aij->ai', normal_vec, polygon_surfaces[:, :, 0, :])
638
+ return normal_vec, -d
639
+
640
+
641
+ @numba.njit
642
+ def _points_in_convex_polygon_3d_jit(points, polygon_surfaces, normal_vec, d,
643
+ num_surfaces):
644
+ """
645
+ Args:
646
+ points (np.ndarray): Input points with shape of (num_points, 3).
647
+ polygon_surfaces (np.ndarray): Polygon surfaces with shape of
648
+ (num_polygon, max_num_surfaces, max_num_points_of_surface, 3).
649
+ All surfaces' normal vector must direct to internal.
650
+ Max_num_points_of_surface must at least 3.
651
+ normal_vec (np.ndarray): Normal vector of polygon_surfaces.
652
+ d (int): Directions of normal vector.
653
+ num_surfaces (np.ndarray): Number of surfaces a polygon contains
654
+ shape of (num_polygon).
655
+
656
+ Returns:
657
+ np.ndarray: Result matrix with the shape of [num_points, num_polygon].
658
+ """
659
+ max_num_surfaces, max_num_points_of_surface = polygon_surfaces.shape[1:3]
660
+ num_points = points.shape[0]
661
+ num_polygons = polygon_surfaces.shape[0]
662
+ ret = np.ones((num_points, num_polygons), dtype=np.bool_)
663
+ sign = 0.0
664
+ for i in range(num_points):
665
+ for j in range(num_polygons):
666
+ for k in range(max_num_surfaces):
667
+ if k > num_surfaces[j]:
668
+ break
669
+ sign = (
670
+ points[i, 0] * normal_vec[j, k, 0] +
671
+ points[i, 1] * normal_vec[j, k, 1] +
672
+ points[i, 2] * normal_vec[j, k, 2] + d[j, k])
673
+ if sign >= 0:
674
+ ret[i, j] = False
675
+ break
676
+ return ret
677
+
678
+
679
+ def points_in_convex_polygon_3d_jit(points,
680
+ polygon_surfaces,
681
+ num_surfaces=None):
682
+ """Check points is in 3d convex polygons.
683
+
684
+ Args:
685
+ points (np.ndarray): Input points with shape of (num_points, 3).
686
+ polygon_surfaces (np.ndarray): Polygon surfaces with shape of
687
+ (num_polygon, max_num_surfaces, max_num_points_of_surface, 3).
688
+ All surfaces' normal vector must direct to internal.
689
+ Max_num_points_of_surface must at least 3.
690
+ num_surfaces (np.ndarray, optional): Number of surfaces a polygon
691
+ contains shape of (num_polygon). Defaults to None.
692
+
693
+ Returns:
694
+ np.ndarray: Result matrix with the shape of [num_points, num_polygon].
695
+ """
696
+ max_num_surfaces, max_num_points_of_surface = polygon_surfaces.shape[1:3]
697
+ # num_points = points.shape[0]
698
+ num_polygons = polygon_surfaces.shape[0]
699
+ if num_surfaces is None:
700
+ num_surfaces = np.full((num_polygons, ), 9999999, dtype=np.int64)
701
+ normal_vec, d = surface_equ_3d(polygon_surfaces[:, :, :3, :])
702
+ # normal_vec: [num_polygon, max_num_surfaces, 3]
703
+ # d: [num_polygon, max_num_surfaces]
704
+ return _points_in_convex_polygon_3d_jit(points, polygon_surfaces,
705
+ normal_vec, d, num_surfaces)
706
+
707
+
708
+ @numba.njit
709
+ def points_in_convex_polygon_jit(points, polygon, clockwise=False):
710
+ """Check points is in 2d convex polygons. True when point in polygon.
711
+
712
+ Args:
713
+ points (np.ndarray): Input points with the shape of [num_points, 2].
714
+ polygon (np.ndarray): Input polygon with the shape of
715
+ [num_polygon, num_points_of_polygon, 2].
716
+ clockwise (bool, optional): Indicate polygon is clockwise. Defaults
717
+ to True.
718
+
719
+ Returns:
720
+ np.ndarray: Result matrix with the shape of [num_points, num_polygon].
721
+ """
722
+ # first convert polygon to directed lines
723
+ num_points_of_polygon = polygon.shape[1]
724
+ num_points = points.shape[0]
725
+ num_polygons = polygon.shape[0]
726
+ # vec for all the polygons
727
+ if clockwise:
728
+ vec1 = polygon - polygon[:,
729
+ np.array([num_points_of_polygon - 1] + list(
730
+ range(num_points_of_polygon - 1))), :]
731
+ else:
732
+ vec1 = polygon[:,
733
+ np.array([num_points_of_polygon - 1] +
734
+ list(range(num_points_of_polygon -
735
+ 1))), :] - polygon
736
+ ret = np.zeros((num_points, num_polygons), dtype=np.bool_)
737
+ success = True
738
+ cross = 0.0
739
+ for i in range(num_points):
740
+ for j in range(num_polygons):
741
+ success = True
742
+ for k in range(num_points_of_polygon):
743
+ vec = vec1[j, k]
744
+ cross = vec[1] * (polygon[j, k, 0] - points[i, 0])
745
+ cross -= vec[0] * (polygon[j, k, 1] - points[i, 1])
746
+ if cross >= 0:
747
+ success = False
748
+ break
749
+ ret[i, j] = success
750
+ return ret
751
+
752
+
753
+ def boxes3d_to_corners3d_lidar(boxes3d, bottom_center=True):
754
+ """Convert kitti center boxes to corners.
755
+
756
+ 7 -------- 4
757
+ /| /|
758
+ 6 -------- 5 .
759
+ | | | |
760
+ . 3 -------- 0
761
+ |/ |/
762
+ 2 -------- 1
763
+
764
+ Note:
765
+ This function is for LiDAR boxes only.
766
+
767
+ Args:
768
+ boxes3d (np.ndarray): Boxes with shape of (N, 7)
769
+ [x, y, z, x_size, y_size, z_size, ry] in LiDAR coords,
770
+ see the definition of ry in KITTI dataset.
771
+ bottom_center (bool, optional): Whether z is on the bottom center
772
+ of object. Defaults to True.
773
+
774
+ Returns:
775
+ np.ndarray: Box corners with the shape of [N, 8, 3].
776
+ """
777
+ boxes_num = boxes3d.shape[0]
778
+ x_size, y_size, z_size = boxes3d[:, 3], boxes3d[:, 4], boxes3d[:, 5]
779
+ x_corners = np.array([
780
+ x_size / 2., -x_size / 2., -x_size / 2., x_size / 2., x_size / 2.,
781
+ -x_size / 2., -x_size / 2., x_size / 2.
782
+ ],
783
+ dtype=np.float32).T
784
+ y_corners = np.array([
785
+ -y_size / 2., -y_size / 2., y_size / 2., y_size / 2., -y_size / 2.,
786
+ -y_size / 2., y_size / 2., y_size / 2.
787
+ ],
788
+ dtype=np.float32).T
789
+ if bottom_center:
790
+ z_corners = np.zeros((boxes_num, 8), dtype=np.float32)
791
+ z_corners[:, 4:8] = z_size.reshape(boxes_num, 1).repeat(
792
+ 4, axis=1) # (N, 8)
793
+ else:
794
+ z_corners = np.array([
795
+ -z_size / 2., -z_size / 2., -z_size / 2., -z_size / 2.,
796
+ z_size / 2., z_size / 2., z_size / 2., z_size / 2.
797
+ ],
798
+ dtype=np.float32).T
799
+
800
+ ry = boxes3d[:, 6]
801
+ zeros, ones = np.zeros(
802
+ ry.size, dtype=np.float32), np.ones(
803
+ ry.size, dtype=np.float32)
804
+ rot_list = np.array([[np.cos(ry), np.sin(ry), zeros],
805
+ [-np.sin(ry), np.cos(ry), zeros],
806
+ [zeros, zeros, ones]]) # (3, 3, N)
807
+ R_list = np.transpose(rot_list, (2, 0, 1)) # (N, 3, 3)
808
+
809
+ temp_corners = np.concatenate((x_corners.reshape(
810
+ -1, 8, 1), y_corners.reshape(-1, 8, 1), z_corners.reshape(-1, 8, 1)),
811
+ axis=2) # (N, 8, 3)
812
+ rotated_corners = np.matmul(temp_corners, R_list) # (N, 8, 3)
813
+ x_corners = rotated_corners[:, :, 0]
814
+ y_corners = rotated_corners[:, :, 1]
815
+ z_corners = rotated_corners[:, :, 2]
816
+
817
+ x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2]
818
+
819
+ x = x_loc.reshape(-1, 1) + x_corners.reshape(-1, 8)
820
+ y = y_loc.reshape(-1, 1) + y_corners.reshape(-1, 8)
821
+ z = z_loc.reshape(-1, 1) + z_corners.reshape(-1, 8)
822
+
823
+ corners = np.concatenate(
824
+ (x.reshape(-1, 8, 1), y.reshape(-1, 8, 1), z.reshape(-1, 8, 1)),
825
+ axis=2)
826
+
827
+ return corners.astype(np.float32)
det_map/det/dal/mmdet3d/core/bbox/coders/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from mmdet.core.bbox import build_bbox_coder
3
+ from .transfusion_bbox_coder import TransFusionBBoxCoder
4
+
5
+ __all__ = [
6
+ 'build_bbox_coder','TransFusionBBoxCoder'
7
+ ]
det_map/det/dal/mmdet3d/core/bbox/coders/transfusion_bbox_coder.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ from mmdet.core.bbox import BaseBBoxCoder
4
+ from mmdet.core.bbox.builder import BBOX_CODERS
5
+
6
+
7
+ @BBOX_CODERS.register_module()
8
+ class TransFusionBBoxCoder(BaseBBoxCoder):
9
+ def __init__(self,
10
+ pc_range,
11
+ out_size_factor,
12
+ voxel_size,
13
+ post_center_range=None,
14
+ score_threshold=None,
15
+ code_size=8,
16
+ ):
17
+ self.pc_range = pc_range
18
+ self.out_size_factor = out_size_factor
19
+ self.voxel_size = voxel_size
20
+ self.post_center_range = post_center_range
21
+ self.score_threshold = score_threshold
22
+ self.code_size = code_size
23
+
24
+ def encode(self, dst_boxes):
25
+ targets = torch.zeros([dst_boxes.shape[0], self.code_size]).to(dst_boxes.device)
26
+ targets[:, 0] = (dst_boxes[:, 0] - self.pc_range[0]) / (self.out_size_factor * self.voxel_size[0])
27
+ targets[:, 1] = (dst_boxes[:, 1] - self.pc_range[1]) / (self.out_size_factor * self.voxel_size[1])
28
+ # targets[:, 2] = (dst_boxes[:, 2] - self.post_center_range[2]) / (self.post_center_range[5] - self.post_center_range[2])
29
+ targets[:, 3] = dst_boxes[:, 3].log()
30
+ targets[:, 4] = dst_boxes[:, 4].log()
31
+ targets[:, 5] = dst_boxes[:, 5].log()
32
+ targets[:, 2] = dst_boxes[:, 2] + dst_boxes[:, 5] * 0.5 # bottom center to gravity center
33
+ targets[:, 6] = torch.sin(dst_boxes[:, 6])
34
+ targets[:, 7] = torch.cos(dst_boxes[:, 6])
35
+ if self.code_size == 10:
36
+ targets[:, 8:10] = dst_boxes[:, 7:]
37
+ return targets
38
+
39
+ def decode(self, heatmap, rot, dim, center, height, vel, filter=False):
40
+ """Decode bboxes.
41
+ Args:
42
+ heat (torch.Tensor): Heatmap with the shape of [B, num_cls, num_proposals].
43
+ rot (torch.Tensor): Rotation with the shape of
44
+ [B, 1, num_proposals].
45
+ dim (torch.Tensor): Dim of the boxes with the shape of
46
+ [B, 3, num_proposals].
47
+ center (torch.Tensor): bev center of the boxes with the shape of
48
+ [B, 2, num_proposals]. (in feature map metric)
49
+ hieght (torch.Tensor): height of the boxes with the shape of
50
+ [B, 2, num_proposals]. (in real world metric)
51
+ vel (torch.Tensor): Velocity with the shape of [B, 2, num_proposals].
52
+ filter: if False, return all box without checking score and center_range
53
+ Returns:
54
+ list[dict]: Decoded boxes.
55
+ """
56
+ # class label
57
+ final_preds = heatmap.max(1, keepdims=False).indices
58
+ final_scores = heatmap.max(1, keepdims=False).values
59
+
60
+ # change size to real world metric
61
+ center[:, 0, :] = center[:, 0, :] * self.out_size_factor * self.voxel_size[0] + self.pc_range[0]
62
+ center[:, 1, :] = center[:, 1, :] * self.out_size_factor * self.voxel_size[1] + self.pc_range[1]
63
+ # center[:, 2, :] = center[:, 2, :] * (self.post_center_range[5] - self.post_center_range[2]) + self.post_center_range[2]
64
+ dim[:, 0, :] = dim[:, 0, :].exp()
65
+ dim[:, 1, :] = dim[:, 1, :].exp()
66
+ dim[:, 2, :] = dim[:, 2, :].exp()
67
+ height = height - dim[:, 2:3, :] * 0.5 # gravity center to bottom center
68
+ rots, rotc = rot[:, 0:1, :], rot[:, 1:2, :]
69
+ rot = torch.atan2(rots, rotc)
70
+
71
+ if vel is None:
72
+ final_box_preds = torch.cat([center, height, dim, rot], dim=1).permute(0, 2, 1)
73
+ else:
74
+ final_box_preds = torch.cat([center, height, dim, rot, vel], dim=1).permute(0, 2, 1)
75
+
76
+ predictions_dicts = []
77
+ for i in range(heatmap.shape[0]):
78
+ boxes3d = final_box_preds[i]
79
+ scores = final_scores[i]
80
+ labels = final_preds[i]
81
+ predictions_dict = {
82
+ 'bboxes': boxes3d,
83
+ 'scores': scores,
84
+ 'labels': labels
85
+ }
86
+ predictions_dicts.append(predictions_dict)
87
+
88
+ if filter is False:
89
+ return predictions_dicts
90
+
91
+ # use score threshold
92
+ if self.score_threshold is not None:
93
+ thresh_mask = final_scores > self.score_threshold
94
+
95
+ if self.post_center_range is not None:
96
+ self.post_center_range = torch.tensor(
97
+ self.post_center_range, device=heatmap.device)
98
+ mask = (final_box_preds[..., :3] >=
99
+ self.post_center_range[:3]).all(2)
100
+ mask &= (final_box_preds[..., :3] <=
101
+ self.post_center_range[3:]).all(2)
102
+
103
+ predictions_dicts = []
104
+ for i in range(heatmap.shape[0]):
105
+ cmask = mask[i, :]
106
+ if self.score_threshold:
107
+ cmask &= thresh_mask[i]
108
+
109
+ boxes3d = final_box_preds[i, cmask]
110
+ scores = final_scores[i, cmask]
111
+ labels = final_preds[i, cmask]
112
+ predictions_dict = {
113
+ 'bboxes': boxes3d,
114
+ 'scores': scores,
115
+ 'labels': labels
116
+ }
117
+
118
+ predictions_dicts.append(predictions_dict)
119
+ else:
120
+ raise NotImplementedError(
121
+ 'Need to reorganize output as a batch, only '
122
+ 'support post_center_range is not None for now!')
123
+
124
+ return predictions_dicts
det_map/det/dal/mmdet3d/core/bbox/iou_calculators/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from .iou3d_calculator import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D,
3
+ axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d,
4
+ )
5
+
6
+ __all__ = [
7
+ 'BboxOverlaps3D',
8
+ 'bbox_overlaps_3d', 'AxisAlignedBboxOverlaps3D',
9
+ 'axis_aligned_bbox_overlaps_3d'
10
+ ]
det_map/det/dal/mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import torch
3
+ from mmdet.core.bbox import bbox_overlaps
4
+ from mmdet.core.bbox.iou_calculators.builder import IOU_CALCULATORS
5
+
6
+ from ..structures import get_box_type
7
+
8
+
9
+ @IOU_CALCULATORS.register_module()
10
+ class BboxOverlaps3D(object):
11
+ """3D IoU Calculator.
12
+
13
+ Args:
14
+ coordinate (str): The coordinate system, valid options are
15
+ 'camera', 'lidar', and 'depth'.
16
+ """
17
+
18
+ def __init__(self, coordinate):
19
+ assert coordinate in ['camera', 'lidar', 'depth']
20
+ self.coordinate = coordinate
21
+
22
+ def __call__(self, bboxes1, bboxes2, mode='iou'):
23
+ """Calculate 3D IoU using cuda implementation.
24
+
25
+ Note:
26
+ This function calculate the IoU of 3D boxes based on their volumes.
27
+ IoU calculator ``:class:BboxOverlaps3D`` uses this function to
28
+ calculate the actual 3D IoUs of boxes.
29
+
30
+ Args:
31
+ bboxes1 (torch.Tensor): with shape (N, 7+C),
32
+ (x, y, z, x_size, y_size, z_size, ry, v*).
33
+ bboxes2 (torch.Tensor): with shape (M, 7+C),
34
+ (x, y, z, x_size, y_size, z_size, ry, v*).
35
+ mode (str): "iou" (intersection over union) or
36
+ iof (intersection over foreground).
37
+
38
+ Return:
39
+ torch.Tensor: Bbox overlaps results of bboxes1 and bboxes2
40
+ with shape (M, N) (aligned mode is not supported currently).
41
+ """
42
+ return bbox_overlaps_3d(bboxes1, bboxes2, mode, self.coordinate)
43
+
44
+ def __repr__(self):
45
+ """str: return a string that describes the module"""
46
+ repr_str = self.__class__.__name__
47
+ repr_str += f'(coordinate={self.coordinate}'
48
+ return repr_str
49
+
50
+
51
+ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'):
52
+ """Calculate 3D IoU using cuda implementation.
53
+
54
+ Note:
55
+ This function calculates the IoU of 3D boxes based on their volumes.
56
+ IoU calculator :class:`BboxOverlaps3D` uses this function to
57
+ calculate the actual IoUs of boxes.
58
+
59
+ Args:
60
+ bboxes1 (torch.Tensor): with shape (N, 7+C),
61
+ (x, y, z, x_size, y_size, z_size, ry, v*).
62
+ bboxes2 (torch.Tensor): with shape (M, 7+C),
63
+ (x, y, z, x_size, y_size, z_size, ry, v*).
64
+ mode (str): "iou" (intersection over union) or
65
+ iof (intersection over foreground).
66
+ coordinate (str): 'camera' or 'lidar' coordinate system.
67
+
68
+ Return:
69
+ torch.Tensor: Bbox overlaps results of bboxes1 and bboxes2
70
+ with shape (M, N) (aligned mode is not supported currently).
71
+ """
72
+ assert bboxes1.size(-1) == bboxes2.size(-1) >= 7
73
+
74
+ box_type, _ = get_box_type(coordinate)
75
+
76
+ bboxes1 = box_type(bboxes1, box_dim=bboxes1.shape[-1])
77
+ bboxes2 = box_type(bboxes2, box_dim=bboxes2.shape[-1])
78
+
79
+ return bboxes1.overlaps(bboxes1, bboxes2, mode=mode)
80
+
81
+
82
+ @IOU_CALCULATORS.register_module()
83
+ class AxisAlignedBboxOverlaps3D(object):
84
+ """Axis-aligned 3D Overlaps (IoU) Calculator."""
85
+
86
+ def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False):
87
+ """Calculate IoU between 2D bboxes.
88
+
89
+ Args:
90
+ bboxes1 (Tensor): shape (B, m, 6) in <x1, y1, z1, x2, y2, z2>
91
+ format or empty.
92
+ bboxes2 (Tensor): shape (B, n, 6) in <x1, y1, z1, x2, y2, z2>
93
+ format or empty.
94
+ B indicates the batch dim, in shape (B1, B2, ..., Bn).
95
+ If ``is_aligned`` is ``True``, then m and n must be equal.
96
+ mode (str): "iou" (intersection over union) or "giou" (generalized
97
+ intersection over union).
98
+ is_aligned (bool, optional): If True, then m and n must be equal.
99
+ Defaults to False.
100
+ Returns:
101
+ Tensor: shape (m, n) if ``is_aligned`` is False else shape (m,)
102
+ """
103
+ assert bboxes1.size(-1) == bboxes2.size(-1) == 6
104
+ return axis_aligned_bbox_overlaps_3d(bboxes1, bboxes2, mode,
105
+ is_aligned)
106
+
107
+ def __repr__(self):
108
+ """str: a string describing the module"""
109
+ repr_str = self.__class__.__name__ + '()'
110
+ return repr_str
111
+
112
+
113
+ def axis_aligned_bbox_overlaps_3d(bboxes1,
114
+ bboxes2,
115
+ mode='iou',
116
+ is_aligned=False,
117
+ eps=1e-6):
118
+ """Calculate overlap between two set of axis aligned 3D bboxes. If
119
+ ``is_aligned`` is ``False``, then calculate the overlaps between each bbox
120
+ of bboxes1 and bboxes2, otherwise the overlaps between each aligned pair of
121
+ bboxes1 and bboxes2.
122
+
123
+ Args:
124
+ bboxes1 (Tensor): shape (B, m, 6) in <x1, y1, z1, x2, y2, z2>
125
+ format or empty.
126
+ bboxes2 (Tensor): shape (B, n, 6) in <x1, y1, z1, x2, y2, z2>
127
+ format or empty.
128
+ B indicates the batch dim, in shape (B1, B2, ..., Bn).
129
+ If ``is_aligned`` is ``True``, then m and n must be equal.
130
+ mode (str): "iou" (intersection over union) or "giou" (generalized
131
+ intersection over union).
132
+ is_aligned (bool, optional): If True, then m and n must be equal.
133
+ Defaults to False.
134
+ eps (float, optional): A value added to the denominator for numerical
135
+ stability. Defaults to 1e-6.
136
+
137
+ Returns:
138
+ Tensor: shape (m, n) if ``is_aligned`` is False else shape (m,)
139
+
140
+ Example:
141
+ >>> bboxes1 = torch.FloatTensor([
142
+ >>> [0, 0, 0, 10, 10, 10],
143
+ >>> [10, 10, 10, 20, 20, 20],
144
+ >>> [32, 32, 32, 38, 40, 42],
145
+ >>> ])
146
+ >>> bboxes2 = torch.FloatTensor([
147
+ >>> [0, 0, 0, 10, 20, 20],
148
+ >>> [0, 10, 10, 10, 19, 20],
149
+ >>> [10, 10, 10, 20, 20, 20],
150
+ >>> ])
151
+ >>> overlaps = axis_aligned_bbox_overlaps_3d(bboxes1, bboxes2)
152
+ >>> assert overlaps.shape == (3, 3)
153
+ >>> overlaps = bbox_overlaps(bboxes1, bboxes2, is_aligned=True)
154
+ >>> assert overlaps.shape == (3, )
155
+ Example:
156
+ >>> empty = torch.empty(0, 6)
157
+ >>> nonempty = torch.FloatTensor([[0, 0, 0, 10, 9, 10]])
158
+ >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1)
159
+ >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0)
160
+ >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0)
161
+ """
162
+
163
+ assert mode in ['iou', 'giou'], f'Unsupported mode {mode}'
164
+ # Either the boxes are empty or the length of boxes's last dimension is 6
165
+ assert (bboxes1.size(-1) == 6 or bboxes1.size(0) == 0)
166
+ assert (bboxes2.size(-1) == 6 or bboxes2.size(0) == 0)
167
+
168
+ # Batch dim must be the same
169
+ # Batch dim: (B1, B2, ... Bn)
170
+ assert bboxes1.shape[:-2] == bboxes2.shape[:-2]
171
+ batch_shape = bboxes1.shape[:-2]
172
+
173
+ rows = bboxes1.size(-2)
174
+ cols = bboxes2.size(-2)
175
+ if is_aligned:
176
+ assert rows == cols
177
+
178
+ if rows * cols == 0:
179
+ if is_aligned:
180
+ return bboxes1.new(batch_shape + (rows,))
181
+ else:
182
+ return bboxes1.new(batch_shape + (rows, cols))
183
+
184
+ area1 = (bboxes1[..., 3] -
185
+ bboxes1[..., 0]) * (bboxes1[..., 4] - bboxes1[..., 1]) * (
186
+ bboxes1[..., 5] - bboxes1[..., 2])
187
+ area2 = (bboxes2[..., 3] -
188
+ bboxes2[..., 0]) * (bboxes2[..., 4] - bboxes2[..., 1]) * (
189
+ bboxes2[..., 5] - bboxes2[..., 2])
190
+
191
+ if is_aligned:
192
+ lt = torch.max(bboxes1[..., :3], bboxes2[..., :3]) # [B, rows, 3]
193
+ rb = torch.min(bboxes1[..., 3:], bboxes2[..., 3:]) # [B, rows, 3]
194
+
195
+ wh = (rb - lt).clamp(min=0) # [B, rows, 2]
196
+ overlap = wh[..., 0] * wh[..., 1] * wh[..., 2]
197
+
198
+ if mode in ['iou', 'giou']:
199
+ union = area1 + area2 - overlap
200
+ else:
201
+ union = area1
202
+ if mode == 'giou':
203
+ enclosed_lt = torch.min(bboxes1[..., :3], bboxes2[..., :3])
204
+ enclosed_rb = torch.max(bboxes1[..., 3:], bboxes2[..., 3:])
205
+ else:
206
+ lt = torch.max(bboxes1[..., :, None, :3],
207
+ bboxes2[..., None, :, :3]) # [B, rows, cols, 3]
208
+ rb = torch.min(bboxes1[..., :, None, 3:],
209
+ bboxes2[..., None, :, 3:]) # [B, rows, cols, 3]
210
+
211
+ wh = (rb - lt).clamp(min=0) # [B, rows, cols, 3]
212
+ overlap = wh[..., 0] * wh[..., 1] * wh[..., 2]
213
+
214
+ if mode in ['iou', 'giou']:
215
+ union = area1[..., None] + area2[..., None, :] - overlap
216
+ if mode == 'giou':
217
+ enclosed_lt = torch.min(bboxes1[..., :, None, :3],
218
+ bboxes2[..., None, :, :3])
219
+ enclosed_rb = torch.max(bboxes1[..., :, None, 3:],
220
+ bboxes2[..., None, :, 3:])
221
+
222
+ eps = union.new_tensor([eps])
223
+ union = torch.max(union, eps)
224
+ ious = overlap / union
225
+ if mode in ['iou']:
226
+ return ious
227
+ # calculate gious
228
+ enclose_wh = (enclosed_rb - enclosed_lt).clamp(min=0)
229
+ enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1] * enclose_wh[..., 2]
230
+ enclose_area = torch.max(enclose_area, eps)
231
+ gious = ious - (enclose_area - union) / enclose_area
232
+ return gious
det_map/det/dal/mmdet3d/core/bbox/structures/__init__.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from .base_box3d import BaseInstance3DBoxes
3
+ from .box_3d_mode import Box3DMode
4
+ from .cam_box3d import CameraInstance3DBoxes
5
+ from .coord_3d_mode import Coord3DMode
6
+ from .depth_box3d import DepthInstance3DBoxes
7
+ from .lidar_box3d import LiDARInstance3DBoxes
8
+ from .utils import (get_box_type, get_proj_mat_by_coord_type, limit_period,
9
+ mono_cam_box2vis, points_cam2img, points_img2cam,
10
+ rotation_3d_in_axis, xywhr2xyxyr)
11
+
12
+ __all__ = [
13
+ 'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes',
14
+ 'CameraInstance3DBoxes', 'DepthInstance3DBoxes', 'xywhr2xyxyr',
15
+ 'get_box_type', 'rotation_3d_in_axis', 'limit_period', 'points_cam2img',
16
+ 'points_img2cam', 'Coord3DMode', 'mono_cam_box2vis',
17
+ 'get_proj_mat_by_coord_type'
18
+ ]
det_map/det/dal/mmdet3d/core/bbox/structures/base_box3d.py ADDED
@@ -0,0 +1,578 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import warnings
3
+ from abc import abstractmethod
4
+
5
+ import numpy as np
6
+ import torch
7
+ from mmcv.ops import box_iou_rotated, points_in_boxes_all, points_in_boxes_part
8
+
9
+ from .utils import limit_period
10
+
11
+
12
+ class BaseInstance3DBoxes(object):
13
+ """Base class for 3D Boxes.
14
+
15
+ Note:
16
+ The box is bottom centered, i.e. the relative position of origin in
17
+ the box is (0.5, 0.5, 0).
18
+
19
+ Args:
20
+ tensor (torch.Tensor | np.ndarray | list): a N x box_dim matrix.
21
+ box_dim (int): Number of the dimension of a box.
22
+ Each row is (x, y, z, x_size, y_size, z_size, yaw).
23
+ Defaults to 7.
24
+ with_yaw (bool): Whether the box is with yaw rotation.
25
+ If False, the value of yaw will be set to 0 as minmax boxes.
26
+ Defaults to True.
27
+ origin (tuple[float], optional): Relative position of the box origin.
28
+ Defaults to (0.5, 0.5, 0). This will guide the box be converted to
29
+ (0.5, 0.5, 0) mode.
30
+
31
+ Attributes:
32
+ tensor (torch.Tensor): Float matrix of N x box_dim.
33
+ box_dim (int): Integer indicating the dimension of a box.
34
+ Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
35
+ with_yaw (bool): If True, the value of yaw will be set to 0 as minmax
36
+ boxes.
37
+ """
38
+
39
+ def __init__(self, tensor, box_dim=7, with_yaw=True, origin=(0.5, 0.5, 0)):
40
+ if isinstance(tensor, torch.Tensor):
41
+ device = tensor.device
42
+ else:
43
+ device = torch.device('cpu')
44
+ tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
45
+ if tensor.numel() == 0:
46
+ # Use reshape, so we don't end up creating a new tensor that
47
+ # does not depend on the inputs (and consequently confuses jit)
48
+ tensor = tensor.reshape((0, box_dim)).to(
49
+ dtype=torch.float32, device=device)
50
+ assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()
51
+
52
+ if tensor.shape[-1] == 6:
53
+ # If the dimension of boxes is 6, we expand box_dim by padding
54
+ # 0 as a fake yaw and set with_yaw to False.
55
+ assert box_dim == 6
56
+ fake_rot = tensor.new_zeros(tensor.shape[0], 1)
57
+ tensor = torch.cat((tensor, fake_rot), dim=-1)
58
+ self.box_dim = box_dim + 1
59
+ self.with_yaw = False
60
+ else:
61
+ self.box_dim = box_dim
62
+ self.with_yaw = with_yaw
63
+ self.tensor = tensor.clone()
64
+
65
+ if origin != (0.5, 0.5, 0):
66
+ dst = self.tensor.new_tensor((0.5, 0.5, 0))
67
+ src = self.tensor.new_tensor(origin)
68
+ self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)
69
+
70
+ @property
71
+ def volume(self):
72
+ """torch.Tensor: A vector with volume of each box."""
73
+ return self.tensor[:, 3] * self.tensor[:, 4] * self.tensor[:, 5]
74
+
75
+ @property
76
+ def dims(self):
77
+ """torch.Tensor: Size dimensions of each box in shape (N, 3)."""
78
+ return self.tensor[:, 3:6]
79
+
80
+ @property
81
+ def yaw(self):
82
+ """torch.Tensor: A vector with yaw of each box in shape (N, )."""
83
+ return self.tensor[:, 6]
84
+
85
+ @property
86
+ def height(self):
87
+ """torch.Tensor: A vector with height of each box in shape (N, )."""
88
+ return self.tensor[:, 5]
89
+
90
+ @property
91
+ def top_height(self):
92
+ """torch.Tensor:
93
+ A vector with the top height of each box in shape (N, )."""
94
+ return self.bottom_height + self.height
95
+
96
+ @property
97
+ def bottom_height(self):
98
+ """torch.Tensor:
99
+ A vector with bottom's height of each box in shape (N, )."""
100
+ return self.tensor[:, 2]
101
+
102
+ @property
103
+ def center(self):
104
+ """Calculate the center of all the boxes.
105
+
106
+ Note:
107
+ In MMDetection3D's convention, the bottom center is
108
+ usually taken as the default center.
109
+
110
+ The relative position of the centers in different kinds of
111
+ boxes are different, e.g., the relative center of a boxes is
112
+ (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0) in lidar.
113
+ It is recommended to use ``bottom_center`` or ``gravity_center``
114
+ for clearer usage.
115
+
116
+ Returns:
117
+ torch.Tensor: A tensor with center of each box in shape (N, 3).
118
+ """
119
+ return self.bottom_center
120
+
121
+ @property
122
+ def bottom_center(self):
123
+ """torch.Tensor: A tensor with center of each box in shape (N, 3)."""
124
+ return self.tensor[:, :3]
125
+
126
+ @property
127
+ def gravity_center(self):
128
+ """torch.Tensor: A tensor with center of each box in shape (N, 3)."""
129
+ pass
130
+
131
+ @property
132
+ def corners(self):
133
+ """torch.Tensor:
134
+ a tensor with 8 corners of each box in shape (N, 8, 3)."""
135
+ pass
136
+
137
+ @property
138
+ def bev(self):
139
+ """torch.Tensor: 2D BEV box of each box with rotation
140
+ in XYWHR format, in shape (N, 5)."""
141
+ return self.tensor[:, [0, 1, 3, 4, 6]]
142
+
143
+ @property
144
+ def nearest_bev(self):
145
+ """torch.Tensor: A tensor of 2D BEV box of each box
146
+ without rotation."""
147
+ # Obtain BEV boxes with rotation in XYWHR format
148
+ bev_rotated_boxes = self.bev
149
+ # convert the rotation to a valid range
150
+ rotations = bev_rotated_boxes[:, -1]
151
+ normed_rotations = torch.abs(limit_period(rotations, 0.5, np.pi))
152
+
153
+ # find the center of boxes
154
+ conditions = (normed_rotations > np.pi / 4)[..., None]
155
+ bboxes_xywh = torch.where(conditions, bev_rotated_boxes[:,
156
+ [0, 1, 3, 2]],
157
+ bev_rotated_boxes[:, :4])
158
+
159
+ centers = bboxes_xywh[:, :2]
160
+ dims = bboxes_xywh[:, 2:]
161
+ bev_boxes = torch.cat([centers - dims / 2, centers + dims / 2], dim=-1)
162
+ return bev_boxes
163
+
164
+ def in_range_bev(self, box_range):
165
+ """Check whether the boxes are in the given range.
166
+
167
+ Args:
168
+ box_range (list | torch.Tensor): the range of box
169
+ (x_min, y_min, x_max, y_max)
170
+
171
+ Note:
172
+ The original implementation of SECOND checks whether boxes in
173
+ a range by checking whether the points are in a convex
174
+ polygon, we reduce the burden for simpler cases.
175
+
176
+ Returns:
177
+ torch.Tensor: Whether each box is inside the reference range.
178
+ """
179
+ in_range_flags = ((self.bev[:, 0] > box_range[0])
180
+ & (self.bev[:, 1] > box_range[1])
181
+ & (self.bev[:, 0] < box_range[2])
182
+ & (self.bev[:, 1] < box_range[3]))
183
+ return in_range_flags
184
+
185
+ @abstractmethod
186
+ def rotate(self, angle, points=None):
187
+ """Rotate boxes with points (optional) with the given angle or rotation
188
+ matrix.
189
+
190
+ Args:
191
+ angle (float | torch.Tensor | np.ndarray):
192
+ Rotation angle or rotation matrix.
193
+ points (torch.Tensor | numpy.ndarray |
194
+ :obj:`BasePoints`, optional):
195
+ Points to rotate. Defaults to None.
196
+ """
197
+ pass
198
+
199
+ @abstractmethod
200
+ def flip(self, bev_direction='horizontal'):
201
+ """Flip the boxes in BEV along given BEV direction.
202
+
203
+ Args:
204
+ bev_direction (str, optional): Direction by which to flip.
205
+ Can be chosen from 'horizontal' and 'vertical'.
206
+ Defaults to 'horizontal'.
207
+ """
208
+ pass
209
+
210
+ def translate(self, trans_vector):
211
+ """Translate boxes with the given translation vector.
212
+
213
+ Args:
214
+ trans_vector (torch.Tensor): Translation vector of size (1, 3).
215
+ """
216
+ if not isinstance(trans_vector, torch.Tensor):
217
+ trans_vector = self.tensor.new_tensor(trans_vector)
218
+ self.tensor[:, :3] += trans_vector
219
+
220
+ def in_range_3d(self, box_range):
221
+ """Check whether the boxes are in the given range.
222
+
223
+ Args:
224
+ box_range (list | torch.Tensor): The range of box
225
+ (x_min, y_min, z_min, x_max, y_max, z_max)
226
+
227
+ Note:
228
+ In the original implementation of SECOND, checking whether
229
+ a box in the range checks whether the points are in a convex
230
+ polygon, we try to reduce the burden for simpler cases.
231
+
232
+ Returns:
233
+ torch.Tensor: A binary vector indicating whether each box is
234
+ inside the reference range.
235
+ """
236
+ in_range_flags = ((self.tensor[:, 0] > box_range[0])
237
+ & (self.tensor[:, 1] > box_range[1])
238
+ & (self.tensor[:, 2] > box_range[2])
239
+ & (self.tensor[:, 0] < box_range[3])
240
+ & (self.tensor[:, 1] < box_range[4])
241
+ & (self.tensor[:, 2] < box_range[5]))
242
+ return in_range_flags
243
+
244
+ @abstractmethod
245
+ def convert_to(self, dst, rt_mat=None):
246
+ """Convert self to ``dst`` mode.
247
+
248
+ Args:
249
+ dst (:obj:`Box3DMode`): The target Box mode.
250
+ rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
251
+ translation matrix between different coordinates.
252
+ Defaults to None.
253
+ The conversion from `src` coordinates to `dst` coordinates
254
+ usually comes along the change of sensors, e.g., from camera
255
+ to LiDAR. This requires a transformation matrix.
256
+
257
+ Returns:
258
+ :obj:`BaseInstance3DBoxes`: The converted box of the same type
259
+ in the `dst` mode.
260
+ """
261
+ pass
262
+
263
+ def scale(self, scale_factor):
264
+ """Scale the box with horizontal and vertical scaling factors.
265
+
266
+ Args:
267
+ scale_factors (float): Scale factors to scale the boxes.
268
+ """
269
+ self.tensor[:, :6] *= scale_factor
270
+ self.tensor[:, 7:] *= scale_factor # velocity
271
+
272
+ def limit_yaw(self, offset=0.5, period=np.pi):
273
+ """Limit the yaw to a given period and offset.
274
+
275
+ Args:
276
+ offset (float, optional): The offset of the yaw. Defaults to 0.5.
277
+ period (float, optional): The expected period. Defaults to np.pi.
278
+ """
279
+ self.tensor[:, 6] = limit_period(self.tensor[:, 6], offset, period)
280
+
281
+ def nonempty(self, threshold=0.0):
282
+ """Find boxes that are non-empty.
283
+
284
+ A box is considered empty,
285
+ if either of its side is no larger than threshold.
286
+
287
+ Args:
288
+ threshold (float, optional): The threshold of minimal sizes.
289
+ Defaults to 0.0.
290
+
291
+ Returns:
292
+ torch.Tensor: A binary vector which represents whether each
293
+ box is empty (False) or non-empty (True).
294
+ """
295
+ box = self.tensor
296
+ size_x = box[..., 3]
297
+ size_y = box[..., 4]
298
+ size_z = box[..., 5]
299
+ keep = ((size_x > threshold)
300
+ & (size_y > threshold) & (size_z > threshold))
301
+ return keep
302
+
303
+ def __getitem__(self, item):
304
+ """
305
+ Note:
306
+ The following usage are allowed:
307
+ 1. `new_boxes = boxes[3]`:
308
+ return a `Boxes` that contains only one box.
309
+ 2. `new_boxes = boxes[2:10]`:
310
+ return a slice of boxes.
311
+ 3. `new_boxes = boxes[vector]`:
312
+ where vector is a torch.BoolTensor with `length = len(boxes)`.
313
+ Nonzero elements in the vector will be selected.
314
+ Note that the returned Boxes might share storage with this Boxes,
315
+ subject to Pytorch's indexing semantics.
316
+
317
+ Returns:
318
+ :obj:`BaseInstance3DBoxes`: A new object of
319
+ :class:`BaseInstance3DBoxes` after indexing.
320
+ """
321
+ original_type = type(self)
322
+ if isinstance(item, int):
323
+ return original_type(
324
+ self.tensor[item].view(1, -1),
325
+ box_dim=self.box_dim,
326
+ with_yaw=self.with_yaw)
327
+ b = self.tensor[item]
328
+ assert b.dim() == 2, \
329
+ f'Indexing on Boxes with {item} failed to return a matrix!'
330
+ return original_type(b, box_dim=self.box_dim, with_yaw=self.with_yaw)
331
+
332
+ def __len__(self):
333
+ """int: Number of boxes in the current object."""
334
+ return self.tensor.shape[0]
335
+
336
+ def __repr__(self):
337
+ """str: Return a strings that describes the object."""
338
+ return self.__class__.__name__ + '(\n ' + str(self.tensor) + ')'
339
+
340
+ @classmethod
341
+ def cat(cls, boxes_list):
342
+ """Concatenate a list of Boxes into a single Boxes.
343
+
344
+ Args:
345
+ boxes_list (list[:obj:`BaseInstance3DBoxes`]): List of boxes.
346
+
347
+ Returns:
348
+ :obj:`BaseInstance3DBoxes`: The concatenated Boxes.
349
+ """
350
+ assert isinstance(boxes_list, (list, tuple))
351
+ if len(boxes_list) == 0:
352
+ return cls(torch.empty(0))
353
+ assert all(isinstance(box, cls) for box in boxes_list)
354
+
355
+ # use torch.cat (v.s. layers.cat)
356
+ # so the returned boxes never share storage with input
357
+ cat_boxes = cls(
358
+ torch.cat([b.tensor for b in boxes_list], dim=0),
359
+ box_dim=boxes_list[0].tensor.shape[1],
360
+ with_yaw=boxes_list[0].with_yaw)
361
+ return cat_boxes
362
+
363
+ def to(self, device):
364
+ """Convert current boxes to a specific device.
365
+
366
+ Args:
367
+ device (str | :obj:`torch.device`): The name of the device.
368
+
369
+ Returns:
370
+ :obj:`BaseInstance3DBoxes`: A new boxes object on the
371
+ specific device.
372
+ """
373
+ original_type = type(self)
374
+ return original_type(
375
+ self.tensor.to(device),
376
+ box_dim=self.box_dim,
377
+ with_yaw=self.with_yaw)
378
+
379
+ def clone(self):
380
+ """Clone the Boxes.
381
+
382
+ Returns:
383
+ :obj:`BaseInstance3DBoxes`: Box object with the same properties
384
+ as self.
385
+ """
386
+ original_type = type(self)
387
+ return original_type(
388
+ self.tensor.clone(), box_dim=self.box_dim, with_yaw=self.with_yaw)
389
+
390
+ @property
391
+ def device(self):
392
+ """str: The device of the boxes are on."""
393
+ return self.tensor.device
394
+
395
+ def __iter__(self):
396
+ """Yield a box as a Tensor of shape (4,) at a time.
397
+
398
+ Returns:
399
+ torch.Tensor: A box of shape (4,).
400
+ """
401
+ yield from self.tensor
402
+
403
+ @classmethod
404
+ def height_overlaps(cls, boxes1, boxes2, mode='iou'):
405
+ """Calculate height overlaps of two boxes.
406
+
407
+ Note:
408
+ This function calculates the height overlaps between boxes1 and
409
+ boxes2, boxes1 and boxes2 should be in the same type.
410
+
411
+ Args:
412
+ boxes1 (:obj:`BaseInstance3DBoxes`): Boxes 1 contain N boxes.
413
+ boxes2 (:obj:`BaseInstance3DBoxes`): Boxes 2 contain M boxes.
414
+ mode (str, optional): Mode of IoU calculation. Defaults to 'iou'.
415
+
416
+ Returns:
417
+ torch.Tensor: Calculated iou of boxes.
418
+ """
419
+ assert isinstance(boxes1, BaseInstance3DBoxes)
420
+ assert isinstance(boxes2, BaseInstance3DBoxes)
421
+ assert type(boxes1) == type(boxes2), '"boxes1" and "boxes2" should' \
422
+ f'be in the same type, got {type(boxes1)} and {type(boxes2)}.'
423
+
424
+ boxes1_top_height = boxes1.top_height.view(-1, 1)
425
+ boxes1_bottom_height = boxes1.bottom_height.view(-1, 1)
426
+ boxes2_top_height = boxes2.top_height.view(1, -1)
427
+ boxes2_bottom_height = boxes2.bottom_height.view(1, -1)
428
+
429
+ heighest_of_bottom = torch.max(boxes1_bottom_height,
430
+ boxes2_bottom_height)
431
+ lowest_of_top = torch.min(boxes1_top_height, boxes2_top_height)
432
+ overlaps_h = torch.clamp(lowest_of_top - heighest_of_bottom, min=0)
433
+ return overlaps_h
434
+
435
+ @classmethod
436
+ def overlaps(cls, boxes1, boxes2, mode='iou'):
437
+ """Calculate 3D overlaps of two boxes.
438
+
439
+ Note:
440
+ This function calculates the overlaps between ``boxes1`` and
441
+ ``boxes2``, ``boxes1`` and ``boxes2`` should be in the same type.
442
+
443
+ Args:
444
+ boxes1 (:obj:`BaseInstance3DBoxes`): Boxes 1 contain N boxes.
445
+ boxes2 (:obj:`BaseInstance3DBoxes`): Boxes 2 contain M boxes.
446
+ mode (str, optional): Mode of iou calculation. Defaults to 'iou'.
447
+
448
+ Returns:
449
+ torch.Tensor: Calculated 3D overlaps of the boxes.
450
+ """
451
+ assert isinstance(boxes1, BaseInstance3DBoxes)
452
+ assert isinstance(boxes2, BaseInstance3DBoxes)
453
+ assert type(boxes1) == type(boxes2), '"boxes1" and "boxes2" should' \
454
+ f'be in the same type, got {type(boxes1)} and {type(boxes2)}.'
455
+
456
+ assert mode in ['iou', 'iof']
457
+
458
+ rows = len(boxes1)
459
+ cols = len(boxes2)
460
+ if rows * cols == 0:
461
+ return boxes1.tensor.new(rows, cols)
462
+
463
+ # height overlap
464
+ overlaps_h = cls.height_overlaps(boxes1, boxes2)
465
+
466
+ # bev overlap
467
+ iou2d = box_iou_rotated(boxes1.bev, boxes2.bev)
468
+ areas1 = (boxes1.bev[:, 2] * boxes1.bev[:, 3]).unsqueeze(1).expand(
469
+ rows, cols)
470
+ areas2 = (boxes2.bev[:, 2] * boxes2.bev[:, 3]).unsqueeze(0).expand(
471
+ rows, cols)
472
+ overlaps_bev = iou2d * (areas1 + areas2) / (1 + iou2d)
473
+
474
+ # 3d overlaps
475
+ overlaps_3d = overlaps_bev.to(boxes1.device) * overlaps_h
476
+
477
+ volume1 = boxes1.volume.view(-1, 1)
478
+ volume2 = boxes2.volume.view(1, -1)
479
+
480
+ if mode == 'iou':
481
+ # the clamp func is used to avoid division of 0
482
+ iou3d = overlaps_3d / torch.clamp(
483
+ volume1 + volume2 - overlaps_3d, min=1e-8)
484
+ else:
485
+ iou3d = overlaps_3d / torch.clamp(volume1, min=1e-8)
486
+
487
+ return iou3d
488
+
489
+ def new_box(self, data):
490
+ """Create a new box object with data.
491
+
492
+ The new box and its tensor has the similar properties
493
+ as self and self.tensor, respectively.
494
+
495
+ Args:
496
+ data (torch.Tensor | numpy.array | list): Data to be copied.
497
+
498
+ Returns:
499
+ :obj:`BaseInstance3DBoxes`: A new bbox object with ``data``,
500
+ the object's other properties are similar to ``self``.
501
+ """
502
+ new_tensor = self.tensor.new_tensor(data) \
503
+ if not isinstance(data, torch.Tensor) else data.to(self.device)
504
+ original_type = type(self)
505
+ return original_type(
506
+ new_tensor, box_dim=self.box_dim, with_yaw=self.with_yaw)
507
+
508
+ def points_in_boxes_part(self, points, boxes_override=None):
509
+ """Find the box in which each point is.
510
+
511
+ Args:
512
+ points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),
513
+ 3 dimensions are (x, y, z) in LiDAR or depth coordinate.
514
+ boxes_override (torch.Tensor, optional): Boxes to override
515
+ `self.tensor`. Defaults to None.
516
+
517
+ Returns:
518
+ torch.Tensor: The index of the first box that each point
519
+ is in, in shape (M, ). Default value is -1
520
+ (if the point is not enclosed by any box).
521
+
522
+ Note:
523
+ If a point is enclosed by multiple boxes, the index of the
524
+ first box will be returned.
525
+ """
526
+ if boxes_override is not None:
527
+ boxes = boxes_override
528
+ else:
529
+ boxes = self.tensor
530
+ if points.dim() == 2:
531
+ points = points.unsqueeze(0)
532
+ box_idx = points_in_boxes_part(points,
533
+ boxes.unsqueeze(0).to(
534
+ points.device)).squeeze(0)
535
+ return box_idx
536
+
537
+ def points_in_boxes_all(self, points, boxes_override=None):
538
+ """Find all boxes in which each point is.
539
+
540
+ Args:
541
+ points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),
542
+ 3 dimensions are (x, y, z) in LiDAR or depth coordinate.
543
+ boxes_override (torch.Tensor, optional): Boxes to override
544
+ `self.tensor`. Defaults to None.
545
+
546
+ Returns:
547
+ torch.Tensor: A tensor indicating whether a point is in a box,
548
+ in shape (M, T). T is the number of boxes. Denote this
549
+ tensor as A, if the m^th point is in the t^th box, then
550
+ `A[m, t] == 1`, elsewise `A[m, t] == 0`.
551
+ """
552
+ if boxes_override is not None:
553
+ boxes = boxes_override
554
+ else:
555
+ boxes = self.tensor
556
+
557
+ points_clone = points.clone()[..., :3]
558
+ if points_clone.dim() == 2:
559
+ points_clone = points_clone.unsqueeze(0)
560
+ else:
561
+ assert points_clone.dim() == 3 and points_clone.shape[0] == 1
562
+
563
+ boxes = boxes.to(points_clone.device).unsqueeze(0)
564
+ box_idxs_of_pts = points_in_boxes_all(points_clone, boxes)
565
+
566
+ return box_idxs_of_pts.squeeze(0)
567
+
568
+ def points_in_boxes(self, points, boxes_override=None):
569
+ warnings.warn('DeprecationWarning: points_in_boxes is a '
570
+ 'deprecated method, please consider using '
571
+ 'points_in_boxes_part.')
572
+ return self.points_in_boxes_part(points, boxes_override)
573
+
574
+ def points_in_boxes_batch(self, points, boxes_override=None):
575
+ warnings.warn('DeprecationWarning: points_in_boxes_batch is a '
576
+ 'deprecated method, please consider using '
577
+ 'points_in_boxes_all.')
578
+ return self.points_in_boxes_all(points, boxes_override)
det_map/det/dal/mmdet3d/core/bbox/structures/box_3d_mode.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from enum import IntEnum, unique
3
+
4
+ import numpy as np
5
+ import torch
6
+
7
+ from .base_box3d import BaseInstance3DBoxes
8
+ from .cam_box3d import CameraInstance3DBoxes
9
+ from .depth_box3d import DepthInstance3DBoxes
10
+ from .lidar_box3d import LiDARInstance3DBoxes
11
+ from .utils import limit_period
12
+
13
+
14
+ @unique
15
+ class Box3DMode(IntEnum):
16
+ r"""Enum of different ways to represent a box.
17
+
18
+ Coordinates in LiDAR:
19
+
20
+ .. code-block:: none
21
+
22
+ up z
23
+ ^ x front
24
+ | /
25
+ | /
26
+ left y <------ 0
27
+
28
+ The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),
29
+ and the yaw is around the z axis, thus the rotation axis=2.
30
+
31
+ Coordinates in camera:
32
+
33
+ .. code-block:: none
34
+
35
+ z front
36
+ /
37
+ /
38
+ 0 ------> x right
39
+ |
40
+ |
41
+ v
42
+ down y
43
+
44
+ The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],
45
+ and the yaw is around the y axis, thus the rotation axis=1.
46
+
47
+ Coordinates in Depth mode:
48
+
49
+ .. code-block:: none
50
+
51
+ up z
52
+ ^ y front
53
+ | /
54
+ | /
55
+ 0 ------> x right
56
+
57
+ The relative coordinate of bottom center in a DEPTH box is (0.5, 0.5, 0),
58
+ and the yaw is around the z axis, thus the rotation axis=2.
59
+ """
60
+
61
+ LIDAR = 0
62
+ CAM = 1
63
+ DEPTH = 2
64
+
65
+ @staticmethod
66
+ def convert(box, src, dst, rt_mat=None, with_yaw=True):
67
+ """Convert boxes from `src` mode to `dst` mode.
68
+
69
+ Args:
70
+ box (tuple | list | np.ndarray |
71
+ torch.Tensor | :obj:`BaseInstance3DBoxes`):
72
+ Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7.
73
+ src (:obj:`Box3DMode`): The src Box mode.
74
+ dst (:obj:`Box3DMode`): The target Box mode.
75
+ rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
76
+ translation matrix between different coordinates.
77
+ Defaults to None.
78
+ The conversion from `src` coordinates to `dst` coordinates
79
+ usually comes along the change of sensors, e.g., from camera
80
+ to LiDAR. This requires a transformation matrix.
81
+ with_yaw (bool, optional): If `box` is an instance of
82
+ :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle.
83
+ Defaults to True.
84
+
85
+ Returns:
86
+ (tuple | list | np.ndarray | torch.Tensor |
87
+ :obj:`BaseInstance3DBoxes`):
88
+ The converted box of the same type.
89
+ """
90
+ if src == dst:
91
+ return box
92
+
93
+ is_numpy = isinstance(box, np.ndarray)
94
+ is_Instance3DBoxes = isinstance(box, BaseInstance3DBoxes)
95
+ single_box = isinstance(box, (list, tuple))
96
+ if single_box:
97
+ assert len(box) >= 7, (
98
+ 'Box3DMode.convert takes either a k-tuple/list or '
99
+ 'an Nxk array/tensor, where k >= 7')
100
+ arr = torch.tensor(box)[None, :]
101
+ else:
102
+ # avoid modifying the input box
103
+ if is_numpy:
104
+ arr = torch.from_numpy(np.asarray(box)).clone()
105
+ elif is_Instance3DBoxes:
106
+ arr = box.tensor.clone()
107
+ else:
108
+ arr = box.clone()
109
+
110
+ if is_Instance3DBoxes:
111
+ with_yaw = box.with_yaw
112
+
113
+ # convert box from `src` mode to `dst` mode.
114
+ x_size, y_size, z_size = arr[..., 3:4], arr[..., 4:5], arr[..., 5:6]
115
+ if with_yaw:
116
+ yaw = arr[..., 6:7]
117
+ if src == Box3DMode.LIDAR and dst == Box3DMode.CAM:
118
+ if rt_mat is None:
119
+ rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])
120
+ xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
121
+ if with_yaw:
122
+ yaw = -yaw - np.pi / 2
123
+ yaw = limit_period(yaw, period=np.pi * 2)
124
+ elif src == Box3DMode.CAM and dst == Box3DMode.LIDAR:
125
+ if rt_mat is None:
126
+ rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])
127
+ xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
128
+ if with_yaw:
129
+ yaw = -yaw - np.pi / 2
130
+ yaw = limit_period(yaw, period=np.pi * 2)
131
+ elif src == Box3DMode.DEPTH and dst == Box3DMode.CAM:
132
+ if rt_mat is None:
133
+ rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
134
+ xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
135
+ if with_yaw:
136
+ yaw = -yaw
137
+ elif src == Box3DMode.CAM and dst == Box3DMode.DEPTH:
138
+ if rt_mat is None:
139
+ rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])
140
+ xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
141
+ if with_yaw:
142
+ yaw = -yaw
143
+ elif src == Box3DMode.LIDAR and dst == Box3DMode.DEPTH:
144
+ if rt_mat is None:
145
+ rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
146
+ xyz_size = torch.cat([x_size, y_size, z_size], dim=-1)
147
+ if with_yaw:
148
+ yaw = yaw + np.pi / 2
149
+ yaw = limit_period(yaw, period=np.pi * 2)
150
+ elif src == Box3DMode.DEPTH and dst == Box3DMode.LIDAR:
151
+ if rt_mat is None:
152
+ rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])
153
+ xyz_size = torch.cat([x_size, y_size, z_size], dim=-1)
154
+ if with_yaw:
155
+ yaw = yaw - np.pi / 2
156
+ yaw = limit_period(yaw, period=np.pi * 2)
157
+ else:
158
+ raise NotImplementedError(
159
+ f'Conversion from Box3DMode {src} to {dst} '
160
+ 'is not supported yet')
161
+
162
+ if not isinstance(rt_mat, torch.Tensor):
163
+ rt_mat = arr.new_tensor(rt_mat)
164
+ if rt_mat.size(1) == 4:
165
+ extended_xyz = torch.cat(
166
+ [arr[..., :3], arr.new_ones(arr.size(0), 1)], dim=-1)
167
+ xyz = extended_xyz @ rt_mat.t()
168
+ else:
169
+ xyz = arr[..., :3] @ rt_mat.t()
170
+
171
+ if with_yaw:
172
+ remains = arr[..., 7:]
173
+ arr = torch.cat([xyz[..., :3], xyz_size, yaw, remains], dim=-1)
174
+ else:
175
+ remains = arr[..., 6:]
176
+ arr = torch.cat([xyz[..., :3], xyz_size, remains], dim=-1)
177
+
178
+ # convert arr to the original type
179
+ original_type = type(box)
180
+ if single_box:
181
+ return original_type(arr.flatten().tolist())
182
+ if is_numpy:
183
+ return arr.numpy()
184
+ elif is_Instance3DBoxes:
185
+ if dst == Box3DMode.CAM:
186
+ target_type = CameraInstance3DBoxes
187
+ elif dst == Box3DMode.LIDAR:
188
+ target_type = LiDARInstance3DBoxes
189
+ elif dst == Box3DMode.DEPTH:
190
+ target_type = DepthInstance3DBoxes
191
+ else:
192
+ raise NotImplementedError(
193
+ f'Conversion to {dst} through {original_type}'
194
+ ' is not supported yet')
195
+ return target_type(arr, box_dim=arr.size(-1), with_yaw=with_yaw)
196
+ else:
197
+ return arr
det_map/det/dal/mmdet3d/core/bbox/structures/cam_box3d.py ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import numpy as np
3
+ import torch
4
+
5
+ from ...points import BasePoints
6
+ from .base_box3d import BaseInstance3DBoxes
7
+ from .utils import rotation_3d_in_axis, yaw2local
8
+
9
+
10
+ class CameraInstance3DBoxes(BaseInstance3DBoxes):
11
+ """3D boxes of instances in CAM coordinates.
12
+
13
+ Coordinates in camera:
14
+
15
+ .. code-block:: none
16
+
17
+ z front (yaw=-0.5*pi)
18
+ /
19
+ /
20
+ 0 ------> x right (yaw=0)
21
+ |
22
+ |
23
+ v
24
+ down y
25
+
26
+ The relative coordinate of bottom center in a CAM box is (0.5, 1.0, 0.5),
27
+ and the yaw is around the y axis, thus the rotation axis=1.
28
+ The yaw is 0 at the positive direction of x axis, and decreases from
29
+ the positive direction of x to the positive direction of z.
30
+
31
+ Attributes:
32
+ tensor (torch.Tensor): Float matrix in shape (N, box_dim).
33
+ box_dim (int): Integer indicating the dimension of a box
34
+ Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
35
+ with_yaw (bool): If True, the value of yaw will be set to 0 as
36
+ axis-aligned boxes tightly enclosing the original boxes.
37
+ """
38
+ YAW_AXIS = 1
39
+
40
+ def __init__(self,
41
+ tensor,
42
+ box_dim=7,
43
+ with_yaw=True,
44
+ origin=(0.5, 1.0, 0.5)):
45
+ if isinstance(tensor, torch.Tensor):
46
+ device = tensor.device
47
+ else:
48
+ device = torch.device('cpu')
49
+ tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
50
+ if tensor.numel() == 0:
51
+ # Use reshape, so we don't end up creating a new tensor that
52
+ # does not depend on the inputs (and consequently confuses jit)
53
+ tensor = tensor.reshape((0, box_dim)).to(
54
+ dtype=torch.float32, device=device)
55
+ assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()
56
+
57
+ if tensor.shape[-1] == 6:
58
+ # If the dimension of boxes is 6, we expand box_dim by padding
59
+ # 0 as a fake yaw and set with_yaw to False.
60
+ assert box_dim == 6
61
+ fake_rot = tensor.new_zeros(tensor.shape[0], 1)
62
+ tensor = torch.cat((tensor, fake_rot), dim=-1)
63
+ self.box_dim = box_dim + 1
64
+ self.with_yaw = False
65
+ else:
66
+ self.box_dim = box_dim
67
+ self.with_yaw = with_yaw
68
+ self.tensor = tensor.clone()
69
+
70
+ if origin != (0.5, 1.0, 0.5):
71
+ dst = self.tensor.new_tensor((0.5, 1.0, 0.5))
72
+ src = self.tensor.new_tensor(origin)
73
+ self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)
74
+
75
+ @property
76
+ def height(self):
77
+ """torch.Tensor: A vector with height of each box in shape (N, )."""
78
+ return self.tensor[:, 4]
79
+
80
+ @property
81
+ def top_height(self):
82
+ """torch.Tensor:
83
+ A vector with the top height of each box in shape (N, )."""
84
+ # the positive direction is down rather than up
85
+ return self.bottom_height - self.height
86
+
87
+ @property
88
+ def bottom_height(self):
89
+ """torch.Tensor:
90
+ A vector with bottom's height of each box in shape (N, )."""
91
+ return self.tensor[:, 1]
92
+
93
+ @property
94
+ def local_yaw(self):
95
+ """torch.Tensor:
96
+ A vector with local yaw of each box in shape (N, ).
97
+ local_yaw equals to alpha in kitti, which is commonly
98
+ used in monocular 3D object detection task, so only
99
+ :obj:`CameraInstance3DBoxes` has the property.
100
+ """
101
+ yaw = self.yaw
102
+ loc = self.gravity_center
103
+ local_yaw = yaw2local(yaw, loc)
104
+
105
+ return local_yaw
106
+
107
+ @property
108
+ def gravity_center(self):
109
+ """torch.Tensor: A tensor with center of each box in shape (N, 3)."""
110
+ bottom_center = self.bottom_center
111
+ gravity_center = torch.zeros_like(bottom_center)
112
+ gravity_center[:, [0, 2]] = bottom_center[:, [0, 2]]
113
+ gravity_center[:, 1] = bottom_center[:, 1] - self.tensor[:, 4] * 0.5
114
+ return gravity_center
115
+
116
+ @property
117
+ def corners(self):
118
+ """torch.Tensor: Coordinates of corners of all the boxes in
119
+ shape (N, 8, 3).
120
+
121
+ Convert the boxes to in clockwise order, in the form of
122
+ (x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)
123
+
124
+ .. code-block:: none
125
+
126
+ front z
127
+ /
128
+ /
129
+ (x0, y0, z1) + ----------- + (x1, y0, z1)
130
+ /| / |
131
+ / | / |
132
+ (x0, y0, z0) + ----------- + + (x1, y1, z1)
133
+ | / . | /
134
+ | / origin | /
135
+ (x0, y1, z0) + ----------- + -------> x right
136
+ | (x1, y1, z0)
137
+ |
138
+ v
139
+ down y
140
+ """
141
+ if self.tensor.numel() == 0:
142
+ return torch.empty([0, 8, 3], device=self.tensor.device)
143
+
144
+ dims = self.dims
145
+ corners_norm = torch.from_numpy(
146
+ np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)).to(
147
+ device=dims.device, dtype=dims.dtype)
148
+
149
+ corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
150
+ # use relative origin [0.5, 1, 0.5]
151
+ corners_norm = corners_norm - dims.new_tensor([0.5, 1, 0.5])
152
+ corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])
153
+
154
+ corners = rotation_3d_in_axis(
155
+ corners, self.tensor[:, 6], axis=self.YAW_AXIS)
156
+ corners += self.tensor[:, :3].view(-1, 1, 3)
157
+ return corners
158
+
159
+ @property
160
+ def bev(self):
161
+ """torch.Tensor: 2D BEV box of each box with rotation
162
+ in XYWHR format, in shape (N, 5)."""
163
+ bev = self.tensor[:, [0, 2, 3, 5, 6]].clone()
164
+ # positive direction of the gravity axis
165
+ # in cam coord system points to the earth
166
+ # so the bev yaw angle needs to be reversed
167
+ bev[:, -1] = -bev[:, -1]
168
+ return bev
169
+
170
+ def rotate(self, angle, points=None):
171
+ """Rotate boxes with points (optional) with the given angle or rotation
172
+ matrix.
173
+
174
+ Args:
175
+ angle (float | torch.Tensor | np.ndarray):
176
+ Rotation angle or rotation matrix.
177
+ points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):
178
+ Points to rotate. Defaults to None.
179
+
180
+ Returns:
181
+ tuple or None: When ``points`` is None, the function returns
182
+ None, otherwise it returns the rotated points and the
183
+ rotation matrix ``rot_mat_T``.
184
+ """
185
+ if not isinstance(angle, torch.Tensor):
186
+ angle = self.tensor.new_tensor(angle)
187
+
188
+ assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \
189
+ f'invalid rotation angle shape {angle.shape}'
190
+
191
+ if angle.numel() == 1:
192
+ self.tensor[:, 0:3], rot_mat_T = rotation_3d_in_axis(
193
+ self.tensor[:, 0:3],
194
+ angle,
195
+ axis=self.YAW_AXIS,
196
+ return_mat=True)
197
+ else:
198
+ rot_mat_T = angle
199
+ rot_sin = rot_mat_T[2, 0]
200
+ rot_cos = rot_mat_T[0, 0]
201
+ angle = np.arctan2(rot_sin, rot_cos)
202
+ self.tensor[:, 0:3] = self.tensor[:, 0:3] @ rot_mat_T
203
+
204
+ self.tensor[:, 6] += angle
205
+
206
+ if points is not None:
207
+ if isinstance(points, torch.Tensor):
208
+ points[:, :3] = points[:, :3] @ rot_mat_T
209
+ elif isinstance(points, np.ndarray):
210
+ rot_mat_T = rot_mat_T.cpu().numpy()
211
+ points[:, :3] = np.dot(points[:, :3], rot_mat_T)
212
+ elif isinstance(points, BasePoints):
213
+ points.rotate(rot_mat_T)
214
+ else:
215
+ raise ValueError
216
+ return points, rot_mat_T
217
+
218
+ def flip(self, bev_direction='horizontal', points=None):
219
+ """Flip the boxes in BEV along given BEV direction.
220
+
221
+ In CAM coordinates, it flips the x (horizontal) or z (vertical) axis.
222
+
223
+ Args:
224
+ bev_direction (str): Flip direction (horizontal or vertical).
225
+ points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):
226
+ Points to flip. Defaults to None.
227
+
228
+ Returns:
229
+ torch.Tensor, numpy.ndarray or None: Flipped points.
230
+ """
231
+ assert bev_direction in ('horizontal', 'vertical')
232
+ if bev_direction == 'horizontal':
233
+ self.tensor[:, 0::7] = -self.tensor[:, 0::7]
234
+ if self.with_yaw:
235
+ self.tensor[:, 6] = -self.tensor[:, 6] + np.pi
236
+ elif bev_direction == 'vertical':
237
+ self.tensor[:, 2::7] = -self.tensor[:, 2::7]
238
+ if self.with_yaw:
239
+ self.tensor[:, 6] = -self.tensor[:, 6]
240
+
241
+ if points is not None:
242
+ assert isinstance(points, (torch.Tensor, np.ndarray, BasePoints))
243
+ if isinstance(points, (torch.Tensor, np.ndarray)):
244
+ if bev_direction == 'horizontal':
245
+ points[:, 0] = -points[:, 0]
246
+ elif bev_direction == 'vertical':
247
+ points[:, 2] = -points[:, 2]
248
+ elif isinstance(points, BasePoints):
249
+ points.flip(bev_direction)
250
+ return points
251
+
252
+ @classmethod
253
+ def height_overlaps(cls, boxes1, boxes2, mode='iou'):
254
+ """Calculate height overlaps of two boxes.
255
+
256
+ This function calculates the height overlaps between ``boxes1`` and
257
+ ``boxes2``, where ``boxes1`` and ``boxes2`` should be in the same type.
258
+
259
+ Args:
260
+ boxes1 (:obj:`CameraInstance3DBoxes`): Boxes 1 contain N boxes.
261
+ boxes2 (:obj:`CameraInstance3DBoxes`): Boxes 2 contain M boxes.
262
+ mode (str, optional): Mode of iou calculation. Defaults to 'iou'.
263
+
264
+ Returns:
265
+ torch.Tensor: Calculated iou of boxes' heights.
266
+ """
267
+ assert isinstance(boxes1, CameraInstance3DBoxes)
268
+ assert isinstance(boxes2, CameraInstance3DBoxes)
269
+
270
+ boxes1_top_height = boxes1.top_height.view(-1, 1)
271
+ boxes1_bottom_height = boxes1.bottom_height.view(-1, 1)
272
+ boxes2_top_height = boxes2.top_height.view(1, -1)
273
+ boxes2_bottom_height = boxes2.bottom_height.view(1, -1)
274
+
275
+ # positive direction of the gravity axis
276
+ # in cam coord system points to the earth
277
+ heighest_of_bottom = torch.min(boxes1_bottom_height,
278
+ boxes2_bottom_height)
279
+ lowest_of_top = torch.max(boxes1_top_height, boxes2_top_height)
280
+ overlaps_h = torch.clamp(heighest_of_bottom - lowest_of_top, min=0)
281
+ return overlaps_h
282
+
283
+ def convert_to(self, dst, rt_mat=None):
284
+ """Convert self to ``dst`` mode.
285
+
286
+ Args:
287
+ dst (:obj:`Box3DMode`): The target Box mode.
288
+ rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
289
+ translation matrix between different coordinates.
290
+ Defaults to None.
291
+ The conversion from ``src`` coordinates to ``dst`` coordinates
292
+ usually comes along the change of sensors, e.g., from camera
293
+ to LiDAR. This requires a transformation matrix.
294
+
295
+ Returns:
296
+ :obj:`BaseInstance3DBoxes`:
297
+ The converted box of the same type in the ``dst`` mode.
298
+ """
299
+ from .box_3d_mode import Box3DMode
300
+ return Box3DMode.convert(
301
+ box=self, src=Box3DMode.CAM, dst=dst, rt_mat=rt_mat)
302
+
303
+ def points_in_boxes_part(self, points, boxes_override=None):
304
+ """Find the box in which each point is.
305
+
306
+ Args:
307
+ points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),
308
+ 3 dimensions are (x, y, z) in LiDAR or depth coordinate.
309
+ boxes_override (torch.Tensor, optional): Boxes to override
310
+ `self.tensor `. Defaults to None.
311
+
312
+ Returns:
313
+ torch.Tensor: The index of the box in which
314
+ each point is, in shape (M, ). Default value is -1
315
+ (if the point is not enclosed by any box).
316
+ """
317
+ from .coord_3d_mode import Coord3DMode
318
+
319
+ points_lidar = Coord3DMode.convert(points, Coord3DMode.CAM,
320
+ Coord3DMode.LIDAR)
321
+ if boxes_override is not None:
322
+ boxes_lidar = boxes_override
323
+ else:
324
+ boxes_lidar = Coord3DMode.convert(self.tensor, Coord3DMode.CAM,
325
+ Coord3DMode.LIDAR)
326
+
327
+ box_idx = super().points_in_boxes_part(points_lidar, boxes_lidar)
328
+ return box_idx
329
+
330
+ def points_in_boxes_all(self, points, boxes_override=None):
331
+ """Find all boxes in which each point is.
332
+
333
+ Args:
334
+ points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),
335
+ 3 dimensions are (x, y, z) in LiDAR or depth coordinate.
336
+ boxes_override (torch.Tensor, optional): Boxes to override
337
+ `self.tensor `. Defaults to None.
338
+
339
+ Returns:
340
+ torch.Tensor: The index of all boxes in which each point is,
341
+ in shape (B, M, T).
342
+ """
343
+ from .coord_3d_mode import Coord3DMode
344
+
345
+ points_lidar = Coord3DMode.convert(points, Coord3DMode.CAM,
346
+ Coord3DMode.LIDAR)
347
+ if boxes_override is not None:
348
+ boxes_lidar = boxes_override
349
+ else:
350
+ boxes_lidar = Coord3DMode.convert(self.tensor, Coord3DMode.CAM,
351
+ Coord3DMode.LIDAR)
352
+
353
+ box_idx = super().points_in_boxes_all(points_lidar, boxes_lidar)
354
+ return box_idx