lkllkl
/

navsim_ours

Model card Files Files and versions Community

lkllkl commited on Jan 25

Commit

da2e2ac

verified ·

1 Parent(s): 73c776c

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
.gitignore +27 -0
Dockerfile +24 -0
LICENSE +201 -0
README.md +125 -0
assets/ckpts.png +0 -0
assets/navsim_transparent.png +3 -0
det_map/__init__.py +0 -0
det_map/agent_lightning.py +93 -0
det_map/config/agent/det_agent.yaml +203 -0
det_map/config/agent/map_agent.yaml +320 -0
det_map/config/defaults/default_common.yaml +23 -0
det_map/config/defaults/default_evaluation.yaml +7 -0
det_map/config/defaults/ray_distributed_no_torch.yaml +8 -0
det_map/config/scene_filter/det_all_scenes.yaml +12 -0
det_map/config/scene_filter/navtiny.yaml +265 -0
det_map/config/splits/default_train_val_test_log_split.yaml +0 -0
det_map/config/train_det.yaml +48 -0
det_map/data/__init__.py +0 -0
det_map/data/datasets/__init__.py +0 -0
det_map/data/datasets/dataclasses.py +521 -0
det_map/data/datasets/dataloader.py +172 -0
det_map/data/datasets/dataset.py +41 -0
det_map/data/datasets/dataset_det.py +28 -0
det_map/data/datasets/feature_builders.py +94 -0
det_map/data/datasets/lidar_utils.py +66 -0
det_map/data/pipelines/__init__.py +0 -0
det_map/data/pipelines/color_utils.py +357 -0
det_map/data/pipelines/filter_lidar.py +74 -0
det_map/data/pipelines/lidar_aug.py +151 -0
det_map/data/pipelines/point_shuffle.py +17 -0
det_map/data/pipelines/prepare_depth.py +76 -0
det_map/data/pipelines/prepare_img.py +218 -0
det_map/det/__init__.py +0 -0
det_map/det/dal/__init__.py +0 -0
det_map/det/dal/dal.py +159 -0
det_map/det/dal/mmdet3d/__init__.py +0 -0
det_map/det/dal/mmdet3d/core/__init__.py +6 -0
det_map/det/dal/mmdet3d/core/bbox/__init__.py +24 -0
det_map/det/dal/mmdet3d/core/bbox/assigners/__init__.py +6 -0
det_map/det/dal/mmdet3d/core/bbox/assigners/hungarian_assigner_3d.py +148 -0
det_map/det/dal/mmdet3d/core/bbox/box_np_ops.py +827 -0
det_map/det/dal/mmdet3d/core/bbox/coders/__init__.py +7 -0
det_map/det/dal/mmdet3d/core/bbox/coders/transfusion_bbox_coder.py +124 -0
det_map/det/dal/mmdet3d/core/bbox/iou_calculators/__init__.py +10 -0
det_map/det/dal/mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py +232 -0
det_map/det/dal/mmdet3d/core/bbox/structures/__init__.py +18 -0
det_map/det/dal/mmdet3d/core/bbox/structures/base_box3d.py +578 -0
det_map/det/dal/mmdet3d/core/bbox/structures/box_3d_mode.py +197 -0
det_map/det/dal/mmdet3d/core/bbox/structures/cam_box3d.py +354 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/navsim_transparent.png filter=lfs diff=lfs merge=lfs -text
+navsim/agents/backbones/ops_dcnv3/build/temp.linux-x86_64-cpython-39/zhenxinl_nuplan/navsim_workspace/navsim_ours/navsim/agents/backbones/ops_dcnv3/src/cuda/dcnv3_cuda.o filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,27 @@

+# python
+build/
+vocab_score_local/
+vocab_score_full/
+vocab_score_full_8192/
+vocab_score_local_8192/
+models_local/
+traj_local/
+*.so
+*.pyc
+**/__pycache__/
+dist/
+.pytest_cache/*
+.pydevproject
+.idea/
+debug/
+# IDE
+.vscode/*
+# Pip
+*.egg-info
+# files
+*.log
+*.jpg
+*.pcd

Dockerfile ADDED Viewed

	@@ -0,0 +1,24 @@

+FROM nvcr.io/nvidia/pytorch:23.05-py3
+RUN apt-get update
+RUN apt-get install -y tmux htop
+RUN git clone https://[email protected]/woxihuanjiangguo/navsim_ours.git /navsim_ours
+WORKDIR /navsim_ours
+ENV HYDRA_FULL_ERROR=1
+ENV NUPLAN_MAP_VERSION="nuplan-maps-v1.0"
+ENV NUPLAN_MAPS_ROOT="/zhenxinl_nuplan/navsim_workspace/dataset/maps"
+ENV NAVSIM_EXP_ROOT="/zhenxinl_nuplan/navsim_workspace/exp"
+ENV NAVSIM_DEVKIT_ROOT="/navsim_ours"
+ENV NAVSIM_TRAJPDM_ROOT="/zhenxinl_nuplan/navsim_workspace/dataset/traj_pdm"
+ENV OPENSCENE_DATA_ROOT="/zhenxinl_nuplan/navsim_workspace/dataset"
+ENV CUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME
+ENV CFLAGS="-I$CUDA_HOME/include $CFLAGS"
+RUN pip uninstall torch torchvision torchaudio -y
+RUN pip3 install torch torchvision torchaudio
+RUN pip install openmim
+RUN mim install mmdet==2.28.2
+RUN pip install spconv-cu120
+RUN pip install numba
+RUN pip install -e .

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2024 autonomousvision
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md ADDED Viewed

	@@ -0,0 +1,125 @@

+<div id="top" align="center">
+<p align="center">
+  <img src="assets/navsim_transparent.png" width="500">
+</p>
+**NAVSIM:** *Data-Driven **N**on-Reactive **A**utonomous **V**ehicle **Sim**ulation*
+</div>
+## Highlights <a name="highlight"></a>
+🔥 NAVSIM gathers simulation-based metrics (such as progress and time to collision) for end-to-end driving by unrolling simplified bird's eye view abstractions of scenes for a short simulation horizon. It operates under the condition that the policy has no influence on the environment, which enables **efficient, open-loop metric computation** while being **better aligned with closed-loop** evaluations than traditional displacement errors.
+> NAVSIM attempts to address some of the challenges faced by the community:
+>
+> 1. **Providing a principled evaluation** (by incorporating ideas + data from nuPlan)
+>   - Key Idea: **PDM Score**, a multi-dimensional metric implemented in open-loop with strong correlation to closed-loop metrics
+>   - Critical scenario sampling, focusing on situations with intention changes where the ego history cannot be extrapolated into a plan
+>   - Official leaderboard on HuggingFace that remains open and prevents ambiguity in metric definitions between projects
+>
+> 2. **Maintaining ease of use** (by emulating nuScenes)
+>   - Simple data format and reasonably-sized download (<nuPlan’s 20+ TB)
+>   - Large-scale publicly available test split for internal benchmarking
+>   - Continually-maintained devkit
+🏁 **NAVSIM** will serve as a main track in the **`CVPR 2024 Autonomous Grand Challenge`**. The leaderboard for the challenge is open! For further details, please [check the challenge website](https://opendrivelab.com/challenge2024/)!
+<p align="center">
+  <img src="assets/navsim_cameras.gif" width="800">
+</p>
+## Table of Contents
+1. [Highlights](#highlight)
+2. [Getting started](#gettingstarted)
+3. [Changelog](#changelog)
+4. [License and citation](#licenseandcitation)
+5. [Other resources](#otherresources)
+## Getting started <a name="gettingstarted"></a>
+- [Download and installation](docs/install.md)
+- [Understanding and creating agents](docs/agents.md)
+- [Understanding the data format and classes](docs/cache.md)
+- [Dataset splits vs. filtered training / test splits](docs/splits.md)
+- [Understanding the PDM Score](docs/metrics.md)
+- [Submitting to the Leaderboard](docs/submission.md)
+<p align="right">(<a href="#top">back to top</a>)</p>
+## Changelog <a name="changelog"></a>
+- **`[2024/04/21]`** NAVSIM v1.0 release (official devkit version for [AGC 2024](https://opendrivelab.com/challenge2024/))
+  - **IMPORTANT NOTE**: The name of the data split `competition_test` was changed to `private_test_e2e`. Please adapt your directory name accordingly. For details see [installation](docs/install.md).
+  - Parallelization of metric caching / evaluation
+  - Adds [Transfuser](https://arxiv.org/abs/2205.15997) baseline (see [agents](docs/agents.md#Baselines))
+  - Adds standardized training and test filtered splits (see [splits](docs/splits.md))
+  - Visualization tools (see [tutorial_visualization.ipynb](tutorial/tutorial_visualization.ipynb))
+  - Refactoring
+- **`[2024/04/03]`** NAVSIM v0.4 release
+  - Support for test phase frames of competition
+  - Download script for trainval
+  - Egostatus MLP Agent and training pipeline
+  - Refactoring, Fixes, Documentation
+- **`[2024/03/25]`** NAVSIM v0.3 release (official devkit version for warm-up phase)
+  - Changes env variable NUPLAN_EXP_ROOT to NAVSIM_EXP_ROOT
+  - Adds code for Leaderboard submission
+  - Major refactoring of dataloading and configs
+- **`[2024/03/11]`** NAVSIM v0.2 release
+  - Easier installation and download
+  - mini and test data split integration
+  - Privileged `Human` agent
+- **`[2024/02/20]`** NAVSIM v0.1 release (initial demo)
+  - OpenScene-mini sensor blobs and annotation logs
+  - Naive `ConstantVelocity` agent
+<p align="right">(<a href="#top">back to top</a>)</p>
+## License and citation <a name="licenseandcitation"></a>
+All assets and code in this repository are under the [Apache 2.0 license](./LICENSE) unless specified otherwise. The datasets (including nuPlan and OpenScene) inherit their own distribution licenses. Please consider citing our paper and project if they help your research.
+```BibTeX
+@misc{Contributors2024navsim,
+    title={NAVSIM: Data-Driven Non-Reactive Autonomous Vehicle Simulation},
+    author={NAVSIM Contributors},
+    howpublished={\url{https://github.com/autonomousvision/navsim}},
+    year={2024}
+}
+```
+```BibTeX
+@inproceedings{Dauner2023CORL,
+    title = {Parting with Misconceptions about Learning-based Vehicle Motion Planning},
+    author = {Daniel Dauner and Marcel Hallgarten and Andreas Geiger and Kashyap Chitta},
+    booktitle = {Conference on Robot Learning (CoRL)},
+    year = {2023}
+}
+```
+<p align="right">(<a href="#top">back to top</a>)</p>
+## Other resources <a name="otherresources"></a>
+<a href="https://twitter.com/AutoVisionGroup" target="_blank">
+    <img alt="Twitter Follow" src="https://img.shields.io/twitter/follow/Awesome Vision Group?style=social&color=brightgreen&logo=twitter" />
+  </a>
+<a href="https://twitter.com/kashyap7x" target="_blank">
+    <img alt="Twitter Follow" src="https://img.shields.io/twitter/follow/Kashyap Chitta?style=social&color=brightgreen&logo=twitter" />
+  </a>
+<a href="https://twitter.com/DanielDauner" target="_blank">
+    <img alt="Twitter Follow" src="https://img.shields.io/twitter/follow/Daniel Dauner?style=social&color=brightgreen&logo=twitter" />
+  </a>
+<a href="https://twitter.com/MHallgarten0797" target="_blank">
+    <img alt="Twitter Follow" src="https://img.shields.io/twitter/follow/Marcel Hallgarten?style=social&color=brightgreen&logo=twitter" />
+  </a>
+- [SLEDGE](https://github.com/autonomousvision/sledge) | [tuPlan garage](https://github.com/autonomousvision/tuplan_garage) | [CARLA garage](https://github.com/autonomousvision/carla_garage) | [Survey on E2EAD](https://github.com/OpenDriveLab/End-to-end-Autonomous-Driving)
+- [PlanT](https://github.com/autonomousvision/plant) | [KING](https://github.com/autonomousvision/king) | [TransFuser](https://github.com/autonomousvision/transfuser) | [NEAT](https://github.com/autonomousvision/neat)
+<p align="right">(<a href="#top">back to top</a>)</p>

assets/ckpts.png ADDED Viewed

assets/navsim_transparent.png ADDED Viewed

Git LFS Details

SHA256: 77619d3f762206401f7a1221e0999df257bd0b4f9c5793667ad21413ddd031b6
Pointer size: 132 Bytes
Size of remote file: 4.85 MB

det_map/__init__.py ADDED Viewed

File without changes

det_map/agent_lightning.py ADDED Viewed

	@@ -0,0 +1,93 @@

+from typing import Dict, Tuple, List
+import pytorch_lightning as pl
+import torch
+from nuplan.planning.simulation.trajectory.trajectory_sampling import TrajectorySampling
+from torch import Tensor
+from navsim.agents.abstract_agent import AbstractAgent
+from navsim.agents.vadv2.vadv2_agent import Vadv2Agent
+from navsim.common.dataclasses import Trajectory
+class AgentLightningModuleMap(pl.LightningModule):
+    def __init__(
+            self,
+            agent: AbstractAgent,
+    ):
+        super().__init__()
+        self.agent = agent
+    def _step(
+            self,
+            batch: Tuple[Dict[str, Tensor], Dict[str, Tensor], List[str]],
+            logging_prefix: str,
+    ):
+        features, targets = batch
+        if logging_prefix in ['train', 'val'] and isinstance(self.agent, Vadv2Agent):
+            prediction = self.agent.forward_train(features, targets['interpolated_traj'])
+        else:
+            prediction = self.agent.forward(features)
+        loss, loss_dict = self.agent.compute_loss(features, targets, prediction)
+        for k, v in loss_dict.items():
+            self.log(f"{logging_prefix}/{k}", v, on_step=True, on_epoch=True, prog_bar=True, sync_dist=True)
+        self.log(f"{logging_prefix}/loss", loss, on_step=True, on_epoch=True, prog_bar=True, sync_dist=True)
+        return loss
+    def training_step(
+            self,
+            batch: Tuple[Dict[str, Tensor], Dict[str, Tensor]],
+            batch_idx: int
+    ):
+        return self._step(batch, "train")
+    def validation_step(
+            self,
+            batch: Tuple[Dict[str, Tensor], Dict[str, Tensor]],
+            batch_idx: int
+    ):
+        return self._step(batch, "val")
+    def configure_optimizers(self):
+        return self.agent.get_optimizers()
+    def predict_step(
+            self,
+            batch: Tuple[Dict[str, Tensor], Dict[str, Tensor]],
+            batch_idx: int
+    ):
+        features, targets, tokens = batch
+        self.agent.eval()
+        with torch.no_grad():
+            predictions = self.agent.forward(features)
+            poses = predictions["trajectory"].cpu().numpy()
+            imis = predictions["imi"].softmax(-1).log().cpu().numpy()
+            nocs = predictions["noc"].log().cpu().numpy()
+            das = predictions["da"].log().cpu().numpy()
+            ttcs = predictions["ttc"].log().cpu().numpy()
+            comforts = predictions["comfort"].log().cpu().numpy()
+            progresses = predictions["progress"].log().cpu().numpy()
+        if poses.shape[1] == 40:
+            interval_length = 0.1
+        else:
+            interval_length = 0.5
+        return {token: {
+            'trajectory': Trajectory(pose, TrajectorySampling(time_horizon=4, interval_length=interval_length)),
+            'imi': imi,
+            'noc': noc,
+            'da': da,
+            'ttc': ttc,
+            'comfort': comfort,
+            'progress': progress
+        } for pose, imi, noc, da, ttc, comfort, progress, token in zip(poses, imis, nocs, das, ttcs, comforts, progresses,
+                                                                          tokens)}
+    # def on_after_backward(self) -> None:
+    #     print("on_after_backward enter")
+    #     for name, param in self.named_parameters():
+    #         if param.grad is None:
+    #             print(name)
+    #     print("on_after_backward exit")

det_map/config/agent/det_agent.yaml ADDED Viewed

	@@ -0,0 +1,203 @@

+_target_: det_map.det.det_agent.DetAgent
+_convert_: 'all'
+is_train: &is_train
+  is_train: True
+ranges: &ranges
+  x_range: (-54.0, 54.0)
+  y_range: (-54.0, 54.0)
+  z_range: (-10.0, 10.0)
+point_cloud_range: &point_cloud_range
+  point_cloud_range: [ -54.0, -54.0, -10.0, 54.0, 54.0, 10.0 ]
+voxel_size: &voxel_size
+  voxel_size: [0.075, 0.075, 0.2]
+grid_config: &grid_config
+  grid_config:
+    x: (-54.0, 54.0, 0.6)
+    y: (-54.0, 54.0, 0.6)
+    z: (-10.0, 10.0, 20.0)
+    depth: (1.0, 60.0, 0.5)
+model:
+  _target_: det_map.det.dal.dal.DAL
+  _convert_: 'all'
+  use_grid_mask: true
+  pts_voxel_layer:
+    max_num_points: 10
+    <<: *voxel_size
+    <<: *point_cloud_range
+    max_voxels: [ 120000, 160000 ]
+  pts_voxel_encoder:
+    type: HardSimpleVFE
+    num_features: 5
+  pts_middle_encoder:
+    type: SparseEncoder
+    in_channels: 5
+    base_channels: 24
+    sparse_shape: [ 41, 1440, 1440 ]
+    output_channels: 192
+    order: [ 'conv', 'norm', 'act' ]
+    encoder_channels: ((24, 24, 48), (48, 48, 96), (96, 96, 192), (192, 192))
+    encoder_paddings: ((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0))
+    block_type: basicblock
+  pts_backbone:
+    type: SECOND
+    in_channels: 384
+    out_channels: [ 192, 384 ]
+    layer_nums: [ 8, 8 ]
+    layer_strides: [ 1, 2 ]
+    norm_cfg:
+      type: BN
+      eps: 1e-3
+      momentum: 0.01
+    conv_cfg:
+      type: Conv2d
+      bias: false
+  pts_neck:
+    type: SECONDFPN
+    in_channels: [ 192, 384 ]
+    out_channels: [ 256, 256 ]
+    upsample_strides: [ 1, 2 ]
+    norm_cfg:
+      type: BN
+      eps: 1e-3
+      momentum: 0.01
+    upsample_cfg:
+      type: deconv
+      bias: false
+    use_conv_for_no_stride: true
+  img_backbone:
+    pretrained: 'torchvision://resnet18'
+    type: ResNet
+    depth: 18
+    num_stages: 4
+    out_indices: [ 1, 2, 3 ]
+    frozen_stages: -1
+    norm_cfg:
+      type: BN
+      requires_grad: true
+    norm_eval: false
+    with_cp: false
+    style: pytorch
+  img_neck:
+    type: CustomFPN
+    in_channels: [ 128, 256, 512 ]
+    out_channels: img_feat_dim
+    num_outs: 1
+    start_level: 0
+    out_ids: [ 0 ]
+  img_view_transformer:
+    type: LSSViewTransformer
+    <<: *grid_config
+    input_size: data_config['input_size']
+    in_channels: img_feat_dim
+    out_channels: feat_bev_img_dim
+    downsample: 8
+    with_depth_from_lidar: true
+  pts_bbox_head:
+    type: DALHead
+    feat_bev_img_dim: feat_bev_img_dim
+    img_feat_dim: img_feat_dim
+    sparse_fuse_layers: 2
+    dense_fuse_layers: 2
+    instance_attn: false
+    num_proposals: 200
+    in_channels: 512
+    hidden_channel: 128
+    num_classes: 10
+    num_decoder_layers: 1
+    num_heads: 8
+    nms_kernel_size: 3
+    ffn_channel: 256
+    dropout: 0.1
+    bn_momentum: 0.1
+    activation: relu
+    auxiliary: true
+    common_heads:
+      center: [ 2, 2 ]
+      height: [ 1, 2 ]
+      dim: [ 3, 2 ]
+      rot: [ 2, 2 ]
+      vel: [ 2, 2 ]
+    bbox_coder:
+      type: TransFusionBBoxCoder
+      pc_range: point_cloud_range[:2]
+      post_center_range: [ -61.2, -61.2, -10.0, 61.2, 61.2, 10.0 ]
+      score_threshold: 0.0
+      out_size_factor: 8
+      voxel_size: voxel_size[:2]
+      code_size: 10
+    loss_cls:
+      type: FocalLoss
+      use_sigmoid: true
+      gamma: 2.0
+      alpha: 0.25
+      reduction: mean
+      loss_weight: 1.0
+    loss_heatmap:
+      type: GaussianFocalLoss
+      reduction: mean
+pipelines:
+  lidar_filter:
+    _target_: det_map.data.pipelines.filter_lidar.LiDARFilter
+    _convert_: 'all'
+    close_radius: 1.0
+    <<: *ranges
+  #  only include in training
+  point_shuffle:
+    _target_: det_map.data.pipelines.point_shuffle.PointShuffle
+    <<: *is_train
+  lidar_aug:
+    _target_: det_map.data.pipelines.lidar_aug.LiDARAug
+    bda_aug_conf:
+      rot_lim: (-22.5 * 2, 22.5 * 2)
+      scale_lim: (0.9, 1.1)
+      flip_dx_ratio: 0.5
+      flip_dy_ratio: 0.5
+      tran_lim: (0.5, 0.5, 0.5)
+    <<: *ranges
+    #    if no aug for map, set this is_train to False
+    <<: *is_train
+  depth:
+    _target_: det_map.data.pipelines.prepare_depth.LiDAR2Depth
+    <<: *grid_config
+  img:
+    _target_: det_map.data.pipelines.prepare_img.PrepareImageInputs
+    _convert_: 'all'
+    opencv_pp: True
+    #    Flag should be False in Eval!!!!
+    <<: *is_train
+    data_config:
+      input_size: (256, 704)
+      src_size: (900, 1600)
+      # Augmentation
+      resize: (-0.06, 0.44)
+      rot: (-5.4, 5.4)
+      flip: True
+      crop_h: (0.0, 0.0)
+      random_crop_height: True
+      vflip: True
+      resize_test: 0.04
+      pmd:
+        brightness_delta: 32
+        contrast_lower: 0.5
+        contrast_upper: 1.5
+        saturation_lower: 0.5
+        saturation_upper: 1.5
+        hue_delta: 18
+        rate: 0.5
+<<: *is_train
+checkpoint_path: null
+hidden_layer_dim: 512
+lr: 1e-4

det_map/config/agent/map_agent.yaml ADDED Viewed

	@@ -0,0 +1,320 @@

+_target_: det_map.map.map_agent.MapAgent
+_convert_: 'all'
+is_train: &is_train
+  is_train: True
+point_cloud_range: &point_cloud_range
+  pc_range: [ -15.0, -30.0, -10.0, 15.0, 30.0, 10.0 ]
+lidar_filter_ranges: &lidar_filter_ranges
+  x_range: (-15.0, 15.0)
+  y_range: (-30.0, 30.0)
+  z_range: (-10.0, 10.0)
+voxel_size: &voxel_size
+  voxel_size: [0.075, 0.075, 20.0]
+img_voxel_size: &img_voxel_size
+  voxel_size: [0.3, 0.3, 20.0]
+dbound: &dbound
+  dbound: [1.0, 35.0, 0.5]
+grid_config: &grid_config
+  grid_config:
+    x: (-15.0, 15.0, 0.6)
+    y: (-30.0, 30.0, 0.6)
+    z: (-10.0, 10.0, 20.0)
+    depth: (1.0, 60.0, 0.5)
+img_norm_cfg : &img_norm_cfg
+  img_norm_cfg:
+    mean: [123.675, 116.28, 103.53]
+    std: [58.395, 57.12, 57.375]
+    to_rgb: True
+map_classes: &map_classes
+  map_classes: ['divider', 'ped_crossing','boundary', 'centerline']
+#fixed_ptsnum_per_gt_line: &fixed_ptsnum_per_gt_line
+#  fixed_ptsnum_per_gt_line: 20
+#fixed_ptsnum_per_pred_line: &fixed_ptsnum_per_pred_line
+#  fixed_ptsnum_per_pred_line: 20
+eval_use_same_gt_sample_num_flag: &eval_use_same_gt_sample_num_flag
+  eval_use_same_gt_sample_num_flag: True
+#_pos_dim_: &_pos_dim_
+#  _pos_dim_: 128
+#_ffn_dim_: &_ffn_dim_
+#  _ffn_dim_: 512
+#_num_levels_: &_num_levels_
+#  _num_levels_: 1
+#bev_h_: &bev_h_
+#  bev_h_: 100
+#bev_w_: &bev_w_
+#  bev_w_: 200
+#queue_length: &queue_length
+#  queue_length: 1
+aux_seg : &aux_seg_cfg
+  aux_seg:
+    use_aux_seg: False
+    bev_seg: False
+    pv_seg: False
+    seg_classes: 1
+    feat_down_sample: 32
+    pv_thickness: 1
+#z_cfg : &z_cfg
+#
+#  pred_z_flag: True
+#  gt_z_flag: True
+model:
+  _target_: det_map.map.map_model.MapModel
+  _convert_: 'all'
+  use_grid_mask: True
+  video_test_mode: False
+  pretrained:
+    img: ckpts/resnet50-19c8e357.pth
+  img_backbone:
+    type: ResNet
+    depth: 50
+    num_stages: 4
+    out_indices: [3]
+    frozen_stages: 1
+    norm_cfg:
+      type: BN
+      requires_grad: False
+    norm_eval: True
+    style: pytorch
+  img_neck:
+    type: FPN
+    in_channels: [2048]
+    out_channels: 256
+    start_level: 0
+    add_extra_convs: on_output
+    num_outs: 1
+    relu_before_extra_convs: True
+  pts_bbox_head:
+    type: MapTRv2Head
+    <<: *point_cloud_range
+    bev_h: 100
+    bev_w: 50
+    num_query: 900
+    num_vec_one2one: 20
+    num_vec_one2many: 300
+    k_one2many: 6
+    num_pts_per_vec: 20
+    num_pts_per_gt_vec: 20
+    dir_interval: 1
+    query_embed_type: 'instance_pts'
+    transform_method: 'minmax'
+    gt_shift_pts_pattern: 'v2'
+    num_classes: 2
+    in_channels: 256
+    sync_cls_avg_factor: True
+    with_box_refine: True
+    as_two_stage: False
+    code_size: 2
+    code_weights: None
+    <<: *aux_seg_cfg
+#    z_cfg: *z_cfg
+    transformer:
+      type: MapTRPerceptionTransformer
+      bev_h: 100
+      bev_w: 50
+#      fuser:
+#        type: 'ConvFuser'
+#        in_channels: [256, 256]
+#        out_channels: 256
+      num_cams: 2
+#      z_cfg: *z_cfg
+      rotate_prev_bev: False
+      use_shift: True
+      use_can_bus: False
+      embed_dims: 256
+      encoder:
+        type: 'SpatialDecoder'
+        num_layers: 1
+        <<: *point_cloud_range
+        grid_config:
+          x: [-15.0, 15.0, 0.6]
+          y: [-30.0, 30.0, 0.6]
+          z: [ -10.0, 10.0, 20.0 ]
+        data_config:
+          input_size: [256, 704]
+        transformerlayers:
+          type: 'SpatialDecoderLayer'
+          attn_cfgs:
+            - type: 'SpatialCrossAttention'
+              <<: *point_cloud_range
+              num_cams: 2
+              dropout: 0.0
+              embed_dims: 256
+              deformable_attention:
+                type: 'MSDeformableAttention'
+                embed_dims: 256
+                num_points: 8
+                num_levels: 1
+          ffn_cfgs:
+            type: 'FFN'
+            embed_dims: 256
+            feedforward_channels: 1024
+            ffn_drop: 0.0
+            act_cfg:
+              type: 'ReLU'
+              inplace: True
+          feedforward_channels: 1024
+          ffn_dropout: 0.0
+          operation_order: ['cross_attn', 'norm' ,'ffn', 'norm']
+      decoder:
+        type: MapTRDecoder
+        num_layers: 6
+        return_intermediate: True
+        transformerlayers:
+          type: DecoupledDetrTransformerDecoderLayer
+          num_vec: 20
+          num_pts_per_vec: 20
+          attn_cfgs:
+            - type: MultiheadAttention
+              embed_dims: 256
+              num_heads: 8
+              dropout: 0.1
+            - type: MultiheadAttention
+              embed_dims: 256
+              num_heads: 8
+              dropout: 0.1
+            - type: CustomMSDeformableAttention
+              embed_dims: 256
+              num_levels: 1
+          feedforward_channels: 512
+          ffn_dropout: 0.1
+          operation_order: ['self_attn', 'norm', 'self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm']
+    positional_encoding:
+      type: LearnedPositionalEncoding
+      num_feats: 128
+      row_num_embed: 100
+      col_num_embed: 50
+    loss_cls:
+      type: FocalLoss
+      use_sigmoid: True
+      gamma: 2.0
+      alpha: 0.25
+      loss_weight: 2.0
+    loss_bbox:
+      type: L1Loss
+      loss_weight: 0.0
+    loss_iou:
+      type: GIoULoss
+      loss_weight: 0.0
+    loss_pts:
+      type: PtsL1Loss
+      loss_weight: 5.0
+    loss_dir:
+      type: PtsDirCosLoss
+      loss_weight: 0.005
+    loss_seg:
+      type: SimpleLoss
+      pos_weight: 4.0
+      loss_weight: 1.0
+    loss_pv_seg:
+      type: SimpleLoss
+      pos_weight: 1.0
+      loss_weight: 2.0
+#  train_cfg:
+#    pts:
+#      grid_size: [512, 512, 1]
+#      <<: *voxel_size
+#      point_cloud_range: [ -15.0, -30.0, -10.0, 15.0, 30.0, 10.0 ]
+#      out_size_factor: 4
+#      assigner:
+#        type: MapTRAssigner
+#        cls_cost:
+#          type: FocalLossCost
+#          weight: 2.0
+#        reg_cost:
+#          type: BBoxL1Cost
+#          weight: 0.0
+#          box_format: 'xywh'
+#        iou_cost:
+#          type: IoUCost
+#          iou_mode: 'giou'
+#          weight: 0.0
+#        pts_cost:
+#          type: OrderedPtsL1Cost
+#          weight: 5
+#        pc_range: [ -15.0, -30.0, -10.0, 15.0, 30.0, 10.0 ]
+pipelines:
+  lidar_filter:
+    _target_: det_map.data.pipelines.filter_lidar.LiDARFilter
+    _convert_: 'all'
+    close_radius: 1.0
+    <<: *lidar_filter_ranges
+  #  only include in training
+  point_shuffle:
+    _target_: det_map.data.pipelines.point_shuffle.PointShuffle
+    <<: *is_train
+  lidar_aug:
+    _target_: det_map.data.pipelines.lidar_aug.LiDARAug
+    bda_aug_conf:
+      rot_lim: (-22.5 * 2, 22.5 * 2)
+      scale_lim: (0.9, 1.1)
+      flip_dx_ratio: 0.5
+      flip_dy_ratio: 0.5
+      tran_lim: (0.5, 0.5, 0.5)
+    <<: *lidar_filter_ranges
+    #    if no aug for map, set this is_train to False
+    <<: *is_train
+  depth:
+    _target_: det_map.data.pipelines.prepare_depth.LiDAR2Depth
+    <<: *grid_config
+  img:
+    _target_: det_map.data.pipelines.prepare_img.PrepareImageInputs
+    _convert_: 'all'
+    opencv_pp: True
+    #    Flag should be False in Eval!!!!
+    <<: *is_train
+    data_config:
+      input_size: (256, 704)
+      src_size: (900, 1600)
+      # Augmentation
+      resize: (-0.06, 0.44)
+      rot: (-5.4, 5.4)
+      flip: True
+      crop_h: (0.0, 0.0)
+      random_crop_height: True
+      vflip: True
+      resize_test: 0.04
+      pmd:
+        brightness_delta: 32
+        contrast_lower: 0.5
+        contrast_upper: 1.5
+        saturation_lower: 0.5
+        saturation_upper: 1.5
+        hue_delta: 18
+        rate: 0.5
+#<<: *is_train
+checkpoint_path: null
+hidden_layer_dim: 512
+lr: 1e-4

det_map/config/defaults/default_common.yaml ADDED Viewed

	@@ -0,0 +1,23 @@

+# Default common configs
+defaults:
+  # Worker that is used to run simulations
+#  - ray_distributed_no_torch
+  - ray_distributed_no_torch
+split: ???
+distributed_timeout_seconds: 7200 # Sets how long to wait while synchronizing across worker nodes in a distributed context.
+selected_simulation_metrics: null
+# Sets verbosity level, in particular determines if progress bars are shown or not.
+verbose: false
+# Logger
+logger_level: info                                  # Level of logger
+logger_format_string: null                          # Logger format string, set null to use the default format string
+# Execution
+max_number_of_workers: null                         # Set null to disable threading for simulation execution
+gpu: true                                           # Whether to use available GPUs during training/simulation

det_map/config/defaults/default_evaluation.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+# Cache parameters
+experiment_name: ???
+navsim_log_path: ${oc.env:OPENSCENE_DATA_ROOT}/navsim_logs/${split} # path to log annotations
+sensor_blobs_path: ${oc.env:OPENSCENE_DATA_ROOT}/sensor_blobs/${split} # path to sensor blobs
+date_format: '%Y.%m.%d.%H.%M.%S'
+experiment_uid: ${now:${date_format}}
+output_dir: ${oc.env:NAVSIM_EXP_ROOT}/${experiment_name}/${experiment_uid} # path where output csv is saved

det_map/config/defaults/ray_distributed_no_torch.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+_target_: navsim.planning.utils.multithreading.worker_ray_no_torch.RayDistributedNoTorch
+_convert_: 'all'
+master_node_ip: null    # Set to a master node IP if you desire to connect to cluster remotely
+threads_per_node: null  # Number of CPU threads to use per node, "null" means all threads available
+debug_mode: false       # If true all tasks will be executed serially, mainly for testing
+log_to_driver: true     # If true, all printouts from ray threads will be displayed in driver
+logs_subdir: 'logs'     # Subdirectory to store logs inside the experiment directory
+use_distributed: false  # Whether to use the built-in distributed mode of ray

det_map/config/scene_filter/det_all_scenes.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+_target_: det_map.data.datasets.dataloader.SceneFilter
+_convert_: 'all'
+num_history_frames: 4
+num_future_frames: 10
+# map has_route可能要设成 True
+has_route: False
+max_scenes: Null
+log_names: Null
+tokens: Null

det_map/config/scene_filter/navtiny.yaml ADDED Viewed

	@@ -0,0 +1,265 @@

+_target_: navsim.common.dataclasses.SceneFilter
+_convert_: 'all'
+num_history_frames: 4
+num_future_frames: 10
+frame_interval: 1
+has_route: true
+max_scenes: null
+log_names: null # list of log names to extract scenes from, if null, all logs are extracted
+tokens:
+ - 'ed4ac2dad0fa584b'
+ - '2111b648fcba5bb7'
+ - '1fc1dd0dc3d157ae'
+ - '76a69c9e9e375670'
+ - '4d3a4cbc9efb5337'
+ - '06df05f607855dbf'
+ - 'c3856d49ecf453f0'
+ - '09d3f08395e05d1c'
+ - '0593ddf8a1bb5a57'
+ - 'c0b386ab15db56f9'
+ - '0ef0f369529e54a9'
+ - 'c754b1af814a5f23'
+ - 'b214f8e744075e96'
+ - '5cbacc029a9f5cb3'
+ - 'cb46ac2ddfdf506e'
+ - '108d77bad2275975'
+ - '3978246a10a25ab0'
+ - '41bb74b4738f5a8b'
+ - '3a8375c20b615fce'
+ - '82dc3fff070b5f80'
+ - '8bfb2d59b82057e6'
+ - 'e36d3626a55e54f9'
+ - '5b1c0e44a5505c06'
+ - '78e6ea95b854551c'
+ - '76af8c24431855c3'
+ - '1a84e817c1875ec6'
+ - 'e7ea3ed9a30e5444'
+ - '8c837572950a5ac0'
+ - 'c18f8cfc41385d8c'
+ - '11aa12f4e5715b08'
+ - '702bdcfabe0755fe'
+ - 'c11854507e515b05'
+ - '828f0769bf365504'
+ - '1d2d2ddbbd5450a4'
+ - '640423c4ff21538a'
+ - '93fa463a455857f6'
+ - '79214a9a65225eda'
+ - 'cd9d78a1011c555f'
+ - '2a3f7fbaa10b5627'
+ - '5abf2148971855ad'
+ - 'd9200709d73756c3'
+ - 'cf94200201a75af8'
+ - 'c97bad66929c58d1'
+ - 'e45b782c83a550c1'
+ - 'e869951de22f5ecc'
+ - '9610b02bc4ec529c'
+ - '70ed6ff1471f5d74'
+ - 'f8a971a1e94553ce'
+ - '91e77e1873d75afe'
+ - 'dc86b9a3e2e05466'
+ - 'a3efdab7285751a6'
+ - 'ecca4f25f1cd5a85'
+ - '3c09e960d73758eb'
+ - '58fb7f78e39451bc'
+ - '0ce0aa336fe751a4'
+ - '759d96676b965349'
+ - 'e3b1564e52cd52db'
+ - '48333fc684d454a2'
+ - '62cae48b4e445254'
+ - 'e97256ddafa85705'
+ - '568aee30ea2655e2'
+ - '2b8645e05e8854f0'
+ - '1ce8022305ba565c'
+ - 'fd3f8f3310255030'
+ - 'f0b74302312b5241'
+ - 'd74e1e5648e35864'
+ - '5bff4e6fa9c95deb'
+ - '97d3764b7be652cf'
+ - 'de681a4826e35220'
+ - 'be2540e76b10519d'
+ - 'c7e91cc3157b5937'
+ - '12a68a4c440c5396'
+ - 'ac0c803827d65b80'
+ - 'c18771a3868f5868'
+ - 'a6340d3e28b95701'
+ - '24fff541744b573f'
+ - 'e7165cb777e65dac'
+ - '7c1553e7080b5a70'
+ - '6dffb4d149eb5089'
+ - '0773a8971c5e5e5a'
+ - '72dac45a812f56fb'
+ - '75c16dc4849b5726'
+ - '523eab76cc4653bd'
+ - 'f246f785c3455caa'
+ - 'baf59d54fb78575a'
+ - 'b29743e5885f5514'
+ - 'd213c35fc6055569'
+ - '3ba8190534b1554c'
+ - '26e297939af25760'
+ - 'da643d2d70785c76'
+ - '2137a540b5f05b48'
+ - 'ed795a36682f5728'
+ - '000afad751a95adb'
+ - '7543fb2f2dcf5c7e'
+ - '9b5c00687d4e590b'
+ - '16d0a19acfcd5668'
+ - 'd91da3c6f79b53f6'
+ - '154694dd0f6c565c'
+ - '9b4b3a0261595a47'
+ - '0df3061f21f4502a'
+ - '7e0b549208c75322'
+ - '74678e95029e52a2'
+ - '49196fecbe9a549f'
+ - '0decaed8d0f45b26'
+ - 'b3671d0ef61e5391'
+ - '7b990d22090f5a21'
+ - '4fea3406427a52de'
+ - 'e7ac9da207d05a7f'
+ - '69b772bf2aa15e8b'
+ - '09300186157e51e9'
+ - 'c61c26797b2d52f8'
+ - 'eac8efd956975d88'
+ - 'ad0ca9004c1e56c6'
+ - '9c48c3a7714e5850'
+ - '1bac9ad3b5795fb9'
+ - '5dad11490b425565'
+ - '1f6cea56be625f10'
+ - 'f2fa70a966055b14'
+ - '68520950dcca56d2'
+ - 'e905af2fb80f5802'
+ - 'e5445523551c573a'
+ - '5a3b197e54495443'
+ - '35d813d8de5854f9'
+ - '25e0169687d659c0'
+ - '88f7863088bc593e'
+ - '06767022b8445e7f'
+ - '4fcdad926f4a5568'
+ - '8f5b483a5dd956d3'
+ - 'a64cd79798845d53'
+ - 'de864917fc075773'
+ - '50418b03a9345e7f'
+ - 'e991b5b1ef9d5fcd'
+ - 'ea75df402b6a5d37'
+ - '17b4e23eb78b547b'
+ - '79388c5790cf5b02'
+ - '7b9cc1b02566583e'
+ - 'a8b415f811cb5bfa'
+ - 'f4e49919c3d35a1a'
+ - '79ca73b34554570a'
+ - 'f9902a62c80c511a'
+ - '71057951bf9a5e81'
+ - '411cc15794895e1e'
+ - '7c4fca218b0854d7'
+ - '8498fd37028051b7'
+ - '27decc74a57b53ac'
+ - '50480a33ca215770'
+ - '47f300be059c5734'
+ - '70f2ea8358ed55f1'
+ - '471f7ca3148659cd'
+ - '4800f9f234c050fa'
+ - '64c71ae3532a5efb'
+ - '5e8f9f6ab5695769'
+ - '2d9168675ce355a2'
+ - '3c077c8da4615b33'
+ - 'c7e8c07beb135247'
+ - '2f8055010b905651'
+ - '340d245e2ee854fe'
+ - '70df39aae7b05204'
+ - '388782e615ec5bba'
+ - '7cb3886f8bb557d3'
+ - 'b37a0e95ac4055ba'
+ - '8be138812f1459d2'
+ - '3ff2c6494d63527b'
+ - '05fab28931d55ff9'
+ - '333189d65a42540d'
+ - '73bb3d277424505f'
+ - 'cbe6088df42d55dc'
+ - 'aa784b6564cb56a3'
+ - 'cd30af3a16945a92'
+ - 'c3a15b9f7dd55cce'
+ - '44b6e898e157569a'
+ - '4e4062c303565251'
+ - 'd74f9dfdb4125eaf'
+ - 'c0365ee92dec511d'
+ - '4e98aff61c5e57b1'
+ - '7200dcdd4ad05210'
+ - 'c8124080125a5278'
+ - '1586145ff7ae5b89'
+ - '6b7f1a53f7d3524c'
+ - '3bf37bad40c55175'
+ - 'bdde0c029ec25326'
+ - 'cd0a777bac035272'
+ - '67b76696aa305cdc'
+ - '614111a5d6045ae7'
+ - 'f383acca25ff59eb'
+ - 'cea15449dc0356bd'
+ - 'b80387b22e0c55b5'
+ - '065a0963a4125096'
+ - 'c9e06d789998518d'
+ - '4615024da7765d62'
+ - 'ef336e8b83245733'
+ - 'be4ec4d7ce745612'
+ - '5169ec4362225b58'
+ - 'c6f905906f9654a2'
+ - '194216a5f85d592d'
+ - '6529aed422f35336'
+ - '497ac853176d59b6'
+ - 'f280ba623a7f5321'
+ - 'b5fe876937af504a'
+ - 'c6b62c299ccc5274'
+ - 'dcb2a35ae605510a'
+ - 'd1c281e277d1532d'
+ - '8f3366be46c05d5f'
+ - 'af9f5f6fa1ad5182'
+ - '5054593a6d795256'
+ - '159b9b7451195c9c'
+ - '7687f25bf8845686'
+ - '560f3ccbaa5b53ef'
+ - 'e5a146299341551a'
+ - 'b794c616319352c3'
+ - 'fb68b32ec8a251da'
+ - '9fce6f03ef0351b0'
+ - '046fd63cb514581a'
+ - '0ce82a1caffc56af'
+ - '7cc94c33bbe052d7'
+ - 'b5126e9ddea25889'
+ - 'c123273de19d5c2f'
+ - 'df570b3785a95295'
+ - 'a5efa651fec451b5'
+ - '216f7065c13c5ec9'
+ - '4754eb209bc452e4'
+ - 'ce28728cdb6f50c9'
+ - '33461776a24d554f'
+ - '0920187661745605'
+ - '0633cb3809935cb7'
+ - 'f3e9317326955421'
+ - '1c371291fdc1551a'
+ - '37185bcf00de5be6'
+ - '224510571ce95a3f'
+ - 'e38a6e1fd4c55393'
+ - '3a0b00f0840658e5'
+ - '0d6abcbad24652c0'
+ - '4789245424875682'
+ - 'fba38dd9492a5341'
+ - 'b649dcb158a75dcd'
+ - '1a5182ccbf1b5955'
+ - '1ac622ff2d2e5210'
+ - 'f63cff56784d5cb9'
+ - '0ea876c450bb5aa6'
+ - '6fc06c6e4d1752a1'
+ - '88396ca47dcf5361'
+ - '7e1f829a0de95258'
+ - '5f9a9890f1a75602'
+ - '5a60c57493885588'
+ - '67be2615438d55fb'
+ - 'bda2fb6ea7735b5a'
+ - '55aa596e131d5734'
+ - 'd1a786625a885023'
+ - '8ec0cd02d7705766'
+ - 'e378bb756641598d'
+ - 'c853ae7a361f54d9'
+ - 'b1db6a099fea55f5'
+ - 'ca8bc031163a5765'
+ - 'eee8261221df5048'
+ - 'b33131090ada5f2d'

det_map/config/splits/default_train_val_test_log_split.yaml ADDED Viewed

The diff for this file is too large to render. See raw diff

det_map/config/train_det.yaml ADDED Viewed

	@@ -0,0 +1,48 @@

+hydra:
+  run:
+    dir: ${output_dir}
+  output_subdir: ${output_dir}/code/hydra           # Store hydra's config breakdown here for debugging
+  searchpath:                                       # Only <exp_dir> in these paths are discoverable
+    - det_map/config/defaults
+    - det_map/config
+    - det_map/config/splits
+    - det_map/config/agent
+    # - pkg://navsim.planning.script.config.training
+defaults:
+  - default_common
+  - default_evaluation
+  - default_train_val_test_log_split
+  - agent: map_agent
+  - scene_filter: det_all_scenes
+split: mini
+dataloader:
+  params:
+    batch_size: 32  # number of samples per batch
+    num_workers: 4  # number of workers for data loading
+    pin_memory: true  # pin memory for faster GPU transfer
+    prefetch_factor: 1
+trainer:
+  params:
+    max_epochs: 20  # maximum number of training epochs
+    check_val_every_n_epoch: 1  # run validation set every n training epochs
+    val_check_interval: 1.0  # [%] run validation set every X% of training set
+    limit_train_batches: 1.0  # how much of training dataset to check (float = fraction, int = num_batches)
+    limit_val_batches: 1.0  # how much of validation dataset to check (float = fraction, int = num_batches)
+    accelerator: gpu  # distribution method
+    strategy: ddp
+    precision: 32  # floating point precision
+    num_nodes: 1  # Number of nodes used for training
+    num_sanity_val_steps: 0  # number of validation steps to run before training begins
+    fast_dev_run: false  # runs 1 batch of train/val/test for sanity
+    accumulate_grad_batches: 1  # accumulates gradients every n batches
+    # track_grad_norm: -1  # logs the p-norm for inspection
+    gradient_clip_val: 0.0  # value to clip gradients
+    gradient_clip_algorithm: norm  # [value, norm] method to clip gradients

det_map/data/__init__.py ADDED Viewed

File without changes

det_map/data/datasets/__init__.py ADDED Viewed

File without changes

det_map/data/datasets/dataclasses.py ADDED Viewed

	@@ -0,0 +1,521 @@

+from __future__ import annotations
+import io
+import os
+from dataclasses import dataclass, asdict
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, BinaryIO, Union
+from nuplan.database.maps_db.gpkg_mapsdb import MAP_LOCATIONS
+from nuplan.common.maps.nuplan_map.map_factory import get_maps_api
+import numpy as np
+import numpy.typing as npt
+from PIL import Image
+from nuplan.common.actor_state.state_representation import StateSE2
+from nuplan.common.maps.abstract_map import AbstractMap
+from nuplan.database.utils.pointclouds.lidar import LidarPointCloud
+from nuplan.planning.simulation.trajectory.trajectory_sampling import TrajectorySampling
+from pyquaternion import Quaternion
+from navsim.planning.simulation.planner.pdm_planner.utils.pdm_geometry_utils import (
+    convert_absolute_to_relative_se2_array,
+)
+NAVSIM_INTERVAL_LENGTH: float = 0.5
+OPENSCENE_DATA_ROOT = os.environ.get("OPENSCENE_DATA_ROOT")
+NUPLAN_MAPS_ROOT = os.environ.get("NUPLAN_MAPS_ROOT")
+@dataclass
+class Camera:
+    image: Optional[npt.NDArray[np.float32]] = None
+    canvas: Optional[npt.NDArray[np.float32]] = None
+    sensor2lidar_rotation: Optional[npt.NDArray[np.float32]] = None
+    sensor2lidar_translation: Optional[npt.NDArray[np.float32]] = None
+    intrinsics: Optional[npt.NDArray[np.float32]] = None
+    distortion: Optional[npt.NDArray[np.float32]] = None
+    post_rot: Optional[npt.NDArray[np.float32]] = None
+    post_tran: Optional[npt.NDArray[np.float32]] = None
+    def to_dict(self):
+        return {
+            'image': self.image,
+            'canvas': self.canvas,
+            'sensor2lidar_rotation': self.sensor2lidar_rotation,
+            'sensor2lidar_translation': self.sensor2lidar_translation,
+            'intrinsics': self.intrinsics,
+            'distortion': self.distortion,
+            'post_rot': self.post_rot,
+            'post_tran': self.post_tran
+        }
+@dataclass
+class Cameras:
+    cam_f0: Camera
+    cam_l0: Camera
+    cam_l1: Camera
+    cam_l2: Camera
+    cam_r0: Camera
+    cam_r1: Camera
+    cam_r2: Camera
+    cam_b0: Camera
+    @classmethod
+    def from_camera_dict(
+            cls,
+            sensor_blobs_path: Path,
+            camera_dict: Dict[str, Any],
+            sensor_names: List[str],
+    ) -> Cameras:
+        data_dict: Dict[str, Camera] = {}
+        for camera_name in camera_dict.keys():
+            camera_identifier = camera_name.lower()
+            if camera_identifier in sensor_names:
+                image_path = sensor_blobs_path / camera_dict[camera_name]["data_path"]
+                data_dict[camera_identifier] = Camera(
+                    image=np.array(Image.open(image_path)),
+                    sensor2lidar_rotation=camera_dict[camera_name]["sensor2lidar_rotation"],
+                    sensor2lidar_translation=camera_dict[camera_name]["sensor2lidar_translation"],
+                    intrinsics=camera_dict[camera_name]["cam_intrinsic"],
+                    distortion=camera_dict[camera_name]["distortion"],
+                )
+            else:
+                data_dict[camera_identifier] = Camera()  # empty camera
+        return Cameras(
+            cam_f0=data_dict["cam_f0"],
+            cam_l0=data_dict["cam_l0"],
+            cam_l1=data_dict["cam_l1"],
+            cam_l2=data_dict["cam_l2"],
+            cam_r0=data_dict["cam_r0"],
+            cam_r1=data_dict["cam_r1"],
+            cam_r2=data_dict["cam_r2"],
+            cam_b0=data_dict["cam_b0"],
+        )
+@dataclass
+class Lidar:
+    # merged lidar point cloud as (6,n) float32 array with n points
+    # first axis: (x, y, z, intensity, ring, lidar_id)
+    lidar_pc: Optional[npt.NDArray[np.float32]] = None
+    @staticmethod
+    def _load_bytes(lidar_path: Path) -> BinaryIO:
+        with open(lidar_path, "rb") as fp:
+            return io.BytesIO(fp.read())
+    @classmethod
+    def from_paths(
+            cls,
+            sensor_blobs_path: Path,
+            lidar_path: Path,
+            sensor_names: List[str],
+    ) -> Lidar:
+        # NOTE: this could be extended to load specific LiDARs in the merged pc
+        if "lidar_pc" in sensor_names:
+            global_lidar_path = sensor_blobs_path / lidar_path
+            lidar_pc = LidarPointCloud.from_buffer(cls._load_bytes(global_lidar_path), "pcd").points
+            return Lidar(lidar_pc)
+        return Lidar()  # empty lidar
+@dataclass
+class EgoStatus:
+    ego_pose: npt.NDArray[np.float64]
+    ego_velocity: npt.NDArray[np.float32]
+    ego_acceleration: npt.NDArray[np.float32]
+    driving_command: npt.NDArray[np.int]
+    in_global_frame: bool = False  # False for AgentInput
+@dataclass
+class AgentInput:
+    tokens: List[str]
+    timestamps: List[int]
+    ego_statuses: List[EgoStatus]
+    cameras: List[Cameras]
+    lidars: List[Lidar]
+    ego2globals: List[np.ndarray]
+    def __post_init__(self):
+        pass
+    @classmethod
+    def from_scene_dict_list(
+            cls,
+            scene_dict_list: List[Dict],
+            sensor_blobs_path: Path,
+            num_history_frames: int,
+            sensor_config: SensorConfig,
+    ) -> AgentInput:
+        assert len(scene_dict_list) > 0, "Scene list is empty!"
+        global_ego_poses = []
+        for frame_idx in range(num_history_frames):
+            ego_translation = scene_dict_list[frame_idx]["ego2global_translation"]
+            ego_quaternion = Quaternion(*scene_dict_list[frame_idx]["ego2global_rotation"])
+            global_ego_pose = np.array(
+                [ego_translation[0], ego_translation[1], ego_quaternion.yaw_pitch_roll[0]],
+                dtype=np.float64,
+            )
+            global_ego_poses.append(global_ego_pose)
+        local_ego_poses = convert_absolute_to_relative_se2_array(
+            StateSE2(*global_ego_poses[-1]), np.array(global_ego_poses, dtype=np.float64)
+        )
+        ego_statuses: List[EgoStatus] = []
+        cameras: List[Cameras] = []
+        lidars: List[Lidar] = []
+        ego2globals = []
+        tokens = []
+        timestamps = []
+        for frame_idx in range(num_history_frames):
+            tokens.append(scene_dict_list[frame_idx]['token'])
+            timestamps.append(scene_dict_list[frame_idx]['timestamp'])
+            ego_dynamic_state = scene_dict_list[frame_idx]["ego_dynamic_state"]
+            ego_status = EgoStatus(
+                ego_pose=np.array(local_ego_poses[frame_idx], dtype=np.float32),
+                ego_velocity=np.array(ego_dynamic_state[:2], dtype=np.float32),
+                ego_acceleration=np.array(ego_dynamic_state[2:], dtype=np.float32),
+                driving_command=scene_dict_list[frame_idx]["driving_command"],
+            )
+            ego_statuses.append(ego_status)
+            sensor_names = sensor_config.get_sensors_at_iteration(frame_idx)
+            cameras.append(
+                Cameras.from_camera_dict(
+                    sensor_blobs_path=sensor_blobs_path,
+                    camera_dict=scene_dict_list[frame_idx]["cams"],
+                    sensor_names=sensor_names,
+                )
+            )
+            lidars.append(
+                Lidar.from_paths(
+                    sensor_blobs_path=sensor_blobs_path,
+                    lidar_path=Path(scene_dict_list[frame_idx]["lidar_path"]),
+                    sensor_names=sensor_names,
+                )
+            )
+            ego2globals.append(scene_dict_list[frame_idx]['ego2global'])
+        return AgentInput(tokens, timestamps, ego_statuses, cameras, lidars, ego2globals)
+@dataclass
+class Annotations:
+    boxes: npt.NDArray[np.float32]
+    names: List[str]
+    velocity_3d: npt.NDArray[np.float32]
+    instance_tokens: List[str]
+    track_tokens: List[str]
+    def __post_init__(self):
+        annotation_lengths: Dict[str, int] = {
+            attribute_name: len(attribute) for attribute_name, attribute in vars(self).items()
+        }
+        assert (
+                len(set(annotation_lengths.values())) == 1
+        ), f"Annotations expects all attributes to have equal length, but got {annotation_lengths}"
+@dataclass
+class Trajectory:
+    poses: npt.NDArray[np.float32]  # local coordinates
+    trajectory_sampling: TrajectorySampling = TrajectorySampling(
+        time_horizon=4, interval_length=0.5
+    )
+    def __post_init__(self):
+        assert (
+                self.poses.ndim == 2
+        ), "Trajectory poses should have two dimensions for samples and poses."
+        assert (
+                self.poses.shape[0] == self.trajectory_sampling.num_poses
+        ), "Trajectory poses and sampling have unequal number of poses."
+        assert self.poses.shape[1] == 3, "Trajectory requires (x, y, heading) at last dim."
+@dataclass
+class SceneMetadata:
+    log_name: str
+    scene_token: str
+    map_name: str
+    initial_token: str
+    num_history_frames: int
+    num_future_frames: int
+@dataclass
+class Frame:
+    token: str
+    timestamp: int
+    roadblock_ids: List[str]
+    traffic_lights: List[Tuple[str, bool]]
+    annotations: Annotations
+    ego_status: EgoStatus
+    lidar: Lidar
+    cameras: Cameras
+    ego2global: np.ndarray
+@dataclass
+class Scene:
+    # Ground truth information
+    scene_metadata: SceneMetadata
+    map_api: AbstractMap
+    frames: List[Frame]
+    def get_future_trajectory(self, num_trajectory_frames: Optional[int] = None) -> Trajectory:
+        if num_trajectory_frames is None:
+            num_trajectory_frames = self.scene_metadata.num_future_frames
+        start_frame_idx = self.scene_metadata.num_history_frames - 1
+        global_ego_poses = []
+        for frame_idx in range(start_frame_idx, start_frame_idx + num_trajectory_frames + 1):
+            global_ego_poses.append(self.frames[frame_idx].ego_status.ego_pose)
+        local_ego_poses = convert_absolute_to_relative_se2_array(
+            StateSE2(*global_ego_poses[0]), np.array(global_ego_poses[1:], dtype=np.float64)
+        )
+        return Trajectory(
+            local_ego_poses,
+            TrajectorySampling(
+                num_poses=len(local_ego_poses),
+                interval_length=NAVSIM_INTERVAL_LENGTH,
+            ),
+        )
+    def get_history_trajectory(self, num_trajectory_frames: Optional[int] = None) -> Trajectory:
+        if num_trajectory_frames is None:
+            num_trajectory_frames = self.scene_metadata.num_history_frames
+        global_ego_poses = []
+        for frame_idx in range(num_trajectory_frames):
+            global_ego_poses.append(self.frames[frame_idx].ego_status.ego_pose)
+        origin = StateSE2(*global_ego_poses[-1])
+        local_ego_poses = convert_absolute_to_relative_se2_array(
+            origin, np.array(global_ego_poses, dtype=np.float64)
+        )
+        return Trajectory(
+            local_ego_poses,
+            TrajectorySampling(
+                num_poses=len(local_ego_poses),
+                interval_length=NAVSIM_INTERVAL_LENGTH,
+            ),
+        )
+    def get_agent_input(self) -> AgentInput:
+        # NOTE: this function is unused and might be removed.
+        local_ego_poses = self.get_history_trajectory().poses
+        ego_statuses: List[EgoStatus] = []
+        cameras: List[Cameras] = []
+        lidars: List[Lidar] = []
+        ego2globals = []
+        tokens, timestamps = [], []
+        for frame_idx in range(self.scene_metadata.num_history_frames):
+            frame_ego_status = self.frames[frame_idx].ego_status
+            tokens.append(self.frames[frame_idx].token)
+            timestamps.append(self.frames[frame_idx].timestamp)
+            ego_statuses.append(
+                EgoStatus(
+                    ego_pose=local_ego_poses[frame_idx],
+                    ego_velocity=frame_ego_status.ego_velocity,
+                    ego_acceleration=frame_ego_status.ego_acceleration,
+                    driving_command=frame_ego_status.driving_command,
+                )
+            )
+            cameras.append(self.frames[frame_idx].cameras)
+            lidars.append(self.frames[frame_idx].lidar)
+            ego2globals.append(self.frames[frame_idx].ego2global)
+        return AgentInput(tokens, timestamps, ego_statuses, cameras, lidars, ego2globals)
+    @classmethod
+    def _build_annotations(
+            cls,
+            scene_frame: Dict,
+    ) -> Annotations:
+        return Annotations(
+            boxes=scene_frame["anns"]["gt_boxes"],
+            names=scene_frame["anns"]["gt_names"],
+            velocity_3d=scene_frame["anns"]["gt_velocity_3d"],
+            instance_tokens=scene_frame["anns"]["instance_tokens"],
+            track_tokens=scene_frame["anns"]["track_tokens"],
+        )
+    @classmethod
+    def _build_ego_status(
+            cls,
+            scene_frame: Dict,
+    ) -> EgoStatus:
+        ego_translation = scene_frame["ego2global_translation"]
+        ego_quaternion = Quaternion(*scene_frame["ego2global_rotation"])
+        global_ego_pose = np.array(
+            [ego_translation[0], ego_translation[1], ego_quaternion.yaw_pitch_roll[0]],
+            dtype=np.float64,
+        )
+        ego_dynamic_state = scene_frame["ego_dynamic_state"]
+        return EgoStatus(
+            ego_pose=global_ego_pose,
+            ego_velocity=np.array(ego_dynamic_state[:2], dtype=np.float32),
+            ego_acceleration=np.array(ego_dynamic_state[2:], dtype=np.float32),
+            driving_command=scene_frame["driving_command"],
+            in_global_frame=True,
+        )
+    @classmethod
+    def _build_map_api(cls, map_name: str) -> AbstractMap:
+        assert (
+            map_name in MAP_LOCATIONS
+        ), f"The map name {map_name} is invalid, must be in {MAP_LOCATIONS}"
+        return get_maps_api(NUPLAN_MAPS_ROOT, "nuplan-maps-v1.0", map_name)
+    @classmethod
+    def from_scene_dict_list(
+            cls,
+            scene_dict_list: List[Dict],
+            sensor_blobs_path: Path,
+            num_history_frames: int,
+            num_future_frames: int,
+            sensor_config: SensorConfig,
+    ) -> Scene:
+        assert len(scene_dict_list) >= 0, "Scene list is empty!"
+        scene_metadata = SceneMetadata(
+            log_name=scene_dict_list[num_history_frames - 1]["log_name"],
+            scene_token=scene_dict_list[num_history_frames - 1]["scene_token"],
+            map_name=scene_dict_list[num_history_frames - 1]["map_location"],
+            initial_token=scene_dict_list[num_history_frames - 1]["token"],
+            num_history_frames=num_history_frames,
+            num_future_frames=num_future_frames,
+        )
+        map_api = cls._build_map_api(scene_metadata.map_name)
+        frames: List[Frame] = []
+        for frame_idx in range(len(scene_dict_list)):
+            global_ego_status = cls._build_ego_status(scene_dict_list[frame_idx])
+            annotations = cls._build_annotations(scene_dict_list[frame_idx])
+            sensor_names = sensor_config.get_sensors_at_iteration(frame_idx)
+            cameras = Cameras.from_camera_dict(
+                sensor_blobs_path=sensor_blobs_path,
+                camera_dict=scene_dict_list[frame_idx]["cams"],
+                sensor_names=sensor_names,
+            )
+            lidar = Lidar.from_paths(
+                sensor_blobs_path=sensor_blobs_path,
+                lidar_path=Path(scene_dict_list[frame_idx]["lidar_path"]),
+                sensor_names=sensor_names,
+            )
+            frame = Frame(
+                token=scene_dict_list[frame_idx]["token"],
+                timestamp=scene_dict_list[frame_idx]["timestamp"],
+                roadblock_ids=scene_dict_list[frame_idx]["roadblock_ids"],
+                traffic_lights=scene_dict_list[frame_idx]["traffic_lights"],
+                annotations=annotations,
+                ego_status=global_ego_status,
+                lidar=lidar,
+                cameras=cameras,
+                ego2global=scene_dict_list[frame_idx]['ego2global']
+            )
+            frames.append(frame)
+        return Scene(scene_metadata=scene_metadata, frames=frames, map_api=map_api)
+@dataclass
+class SceneFilter:
+    num_history_frames: int = 4
+    num_future_frames: int = 10
+    has_route: bool = True
+    max_scenes: Optional[int] = None
+    log_names: Optional[List[str]] = None
+    tokens: Optional[List[str]] = None
+    @property
+    def num_frames(self) -> int:
+        return self.num_history_frames
+@dataclass
+class SensorConfig:
+    # Config values of sensors are either
+    # - bool: Whether to load history or not
+    # - List[int]: For loading specific history steps
+    cam_f0: Union[bool, List[int]]
+    cam_l0: Union[bool, List[int]]
+    cam_l1: Union[bool, List[int]]
+    cam_l2: Union[bool, List[int]]
+    cam_r0: Union[bool, List[int]]
+    cam_r1: Union[bool, List[int]]
+    cam_r2: Union[bool, List[int]]
+    cam_b0: Union[bool, List[int]]
+    lidar_pc: Union[bool, List[int]]
+    def get_sensors_at_iteration(self, iteration: int) -> List[str]:
+        sensors_at_iteration: List[str] = []
+        for sensor_name, sensor_include in asdict(self).items():
+            if isinstance(sensor_include, bool) and sensor_include:
+                sensors_at_iteration.append(sensor_name)
+            elif isinstance(sensor_include, list) and iteration in sensor_include:
+                sensors_at_iteration.append(sensor_name)
+        return sensors_at_iteration
+    @classmethod
+    def build_all_sensors(cls, include: Union[bool, List[int]] = True) -> SensorConfig:
+        return SensorConfig(
+            cam_f0=include,
+            cam_l0=include,
+            cam_l1=include,
+            cam_l2=include,
+            cam_r0=include,
+            cam_r1=include,
+            cam_r2=include,
+            cam_b0=include,
+            lidar_pc=include,
+        )
+    @classmethod
+    def build_no_sensors(cls) -> SensorConfig:
+        return cls.build_all_sensors(include=False)
+@dataclass
+class PDMResults:
+    no_at_fault_collisions: float
+    drivable_area_compliance: float
+    driving_direction_compliance: float
+    ego_progress: float
+    time_to_collision_within_bound: float
+    comfort: float
+    score: float

det_map/data/datasets/dataloader.py ADDED Viewed

	@@ -0,0 +1,172 @@

+from __future__ import annotations
+import lzma
+import pickle
+from pathlib import Path
+from typing import Any, Dict, List
+from tqdm import tqdm
+from navsim.common.dataclasses import AgentInput, Scene, SceneFilter, SensorConfig
+from navsim.planning.metric_caching.metric_cache import MetricCache
+def filter_scenes(data_path: Path, scene_filter: SceneFilter) -> Dict[str, List[Dict[str, Any]]]:
+    def split_list(input_list: List[Any], num_frames: int, frame_interval: int) -> List[List[Any]]:
+        return [input_list[i : i + num_frames] for i in range(0, len(input_list), frame_interval)]
+    filtered_scenes: Dict[str, Scene] = {}
+    stop_loading: bool = False
+    # filter logs
+    log_files = list(data_path.iterdir())
+    if scene_filter.log_names is not None:
+        log_files = [
+            log_file
+            for log_file in log_files
+            if log_file.name.replace(".pkl", "") in scene_filter.log_names
+        ]
+    if scene_filter.tokens is not None:
+        filter_tokens = True
+        tokens = set(scene_filter.tokens)
+    else:
+        filter_tokens = False
+    for log_pickle_path in tqdm(log_files, desc="Loading logs"):
+        scene_dict_list = pickle.load(open(log_pickle_path, "rb"))
+        for frame_list in split_list(
+            scene_dict_list, scene_filter.num_frames, scene_filter.frame_interval
+        ):
+            # Filter scenes which are too short
+            if len(frame_list) < scene_filter.num_frames:
+                continue
+            # Filter scenes with no route
+            if (
+                scene_filter.has_route
+                and len(frame_list[scene_filter.num_history_frames - 1]["roadblock_ids"]) == 0
+            ):
+                continue
+            # Filter by token
+            token = frame_list[scene_filter.num_history_frames - 1]["token"]
+            if filter_tokens and token not in tokens:
+                continue
+            filtered_scenes[token] = frame_list
+            if (scene_filter.max_scenes is not None) and (
+                len(filtered_scenes) >= scene_filter.max_scenes
+            ):
+                stop_loading = True
+                break
+        if stop_loading:
+            break
+    return filtered_scenes
+class SceneLoader:
+    def __init__(
+        self,
+        data_path: Path,
+        sensor_blobs_path: Path,
+        scene_filter: SceneFilter,
+        sensor_config: SensorConfig = SensorConfig.build_no_sensors(),
+    ):
+        self.scene_frames_dicts = filter_scenes(data_path, scene_filter)
+        self._sensor_blobs_path = sensor_blobs_path
+        self._scene_filter = scene_filter
+        self._sensor_config = sensor_config
+    @property
+    def tokens(self) -> List[str]:
+        return list(self.scene_frames_dicts.keys())
+    def __len__(self):
+        return len(self.tokens)
+    def __getitem__(self, idx) -> str:
+        return self.tokens[idx]
+    def get_scene_from_token(self, token: str) -> Scene:
+        assert token in self.tokens
+        return Scene.from_scene_dict_list(
+            self.scene_frames_dicts[token],
+            self._sensor_blobs_path,
+            num_history_frames=self._scene_filter.num_history_frames,
+            num_future_frames=self._scene_filter.num_future_frames,
+            sensor_config=self._sensor_config,
+        )
+    def get_agent_input_from_token(self, token: str) -> AgentInput:
+        assert token in self.tokens
+        return AgentInput.from_scene_dict_list(
+            self.scene_frames_dicts[token],
+            self._sensor_blobs_path,
+            num_history_frames=self._scene_filter.num_history_frames,
+            sensor_config=self._sensor_config,
+        )
+    def get_tokens_list_per_log(self) -> Dict[str, List[str]]:
+        # generate a dict that contains a list of tokens for each log-name
+        tokens_per_logs: Dict[str, List[str]] = {}
+        for token, scene_dict_list in self.scene_frames_dicts.items():
+            log_name = scene_dict_list[0]["log_name"]
+            if tokens_per_logs.get(log_name):
+                tokens_per_logs[log_name].append(token)
+            else:
+                tokens_per_logs.update({log_name: [token]})
+        return tokens_per_logs
+class MetricCacheLoader:
+    def __init__(
+        self,
+        cache_path: Path,
+        file_name: str = "metric_cache.pkl",
+    ):
+        self._file_name = file_name
+        self.metric_cache_paths = self._load_metric_cache_paths(cache_path)
+    def _load_metric_cache_paths(self, cache_path: Path) -> Dict[str, Path]:
+        metadata_dir = cache_path / "metadata"
+        metadata_file = [file for file in metadata_dir.iterdir() if ".csv" in str(file)][0]
+        with open(str(metadata_file), "r") as f:
+            cache_paths=f.read().splitlines()[1:]
+        metric_cache_dict = {
+            cache_path.split("/")[-2]: cache_path
+            for cache_path in cache_paths
+        }
+        return metric_cache_dict
+    @property
+    def tokens(self) -> List[str]:
+        return list(self.metric_cache_paths.keys())
+    def __len__(self):
+        return len(self.metric_cache_paths)
+    def __getitem__(self, idx: int) -> MetricCache:
+        return self.get_from_token(self.tokens[idx])
+    def get_from_token(self, token: str) -> MetricCache:
+        with lzma.open(self.metric_cache_paths[token], "rb") as f:
+            metric_cache: MetricCache = pickle.load(f)
+        return metric_cache
+    def to_pickle(self, path: Path) -> None:
+        full_metric_cache = {}
+        for token in tqdm(self.tokens):
+            full_metric_cache[token] = self.get_from_token(token)
+        with open(path, "wb") as f:
+            pickle.dump(full_metric_cache, f)

det_map/data/datasets/dataset.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from typing import Dict, List, Tuple
+import torch
+from det_map.data.datasets.dataloader import SceneLoader
+from navsim.planning.training.abstract_feature_target_builder import AbstractFeatureBuilder, AbstractTargetBuilder
+class Dataset(torch.utils.data.Dataset):
+    def __init__(
+        self,
+        pipelines, is_train,
+        scene_loader: SceneLoader,
+        feature_builders: List[AbstractFeatureBuilder],
+        target_builders: List[AbstractTargetBuilder]
+    ):
+        super().__init__()
+        self._scene_loader = scene_loader
+        self._feature_builders = feature_builders
+        self._target_builders = target_builders
+        self.pipelines = pipelines
+        self.is_train = is_train
+    def __len__(self):
+        return len(self._scene_loader)
+    def __getitem__(self, idx: int) -> Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor]]:
+        scene = self._scene_loader.get_scene_from_token(self._scene_loader.tokens[idx])
+        features: Dict[str, torch.Tensor] = {}
+        for builder in self._feature_builders:
+            features.update(builder.compute_features(scene.get_agent_input()))
+        targets: Dict[str, torch.Tensor] = {}
+        for builder in self._target_builders:
+            targets.update(builder.compute_targets(scene))
+        # aug for four frames respectively
+        features, targets = self.pipelines['lidar_aug'](features, targets)
+        # project lidar at frame i to image i
+        features, targets = self.pipelines['depth'](features, targets)
+        # concat all lidar points, remove points too far/close
+        features, targets = self.pipelines['lidar_filter'](features, targets)
+        # shuffle all lidar points
+        features, targets = self.pipelines['point_shuffle'](features, targets)
+        return (features, targets)

det_map/data/datasets/dataset_det.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from typing import Dict, List, Tuple
+import torch
+from det_map.data.datasets.dataloader import SceneLoader
+from det_map.data.datasets.dataset import Dataset
+from navsim.planning.training.abstract_feature_target_builder import AbstractFeatureBuilder, AbstractTargetBuilder
+class DetDataset(Dataset):
+    def __init__(
+        self, **kwargs
+    ):
+        super().__init__(**kwargs)
+    def __getitem__(self, idx: int) -> Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor]]:
+        scene = self._scene_loader.get_scene_from_token(self._scene_loader.tokens[idx])
+        features: Dict[str, torch.Tensor] = {}
+        for builder in self._feature_builders:
+            features.update(builder.compute_features(scene.get_agent_input()))
+        targets: Dict[str, torch.Tensor] = {}
+        for builder in self._target_builders:
+            targets.update(builder.compute_targets(scene))
+        # todo sampler
+        features, targets = self.pipelines['lidar_aug'](features, targets)
+        features, targets = self.pipelines['depth'](features, targets)
+        features, targets = self.pipelines['lidar_filter'](features, targets)
+        features, targets = self.pipelines['point_shuffle'](features, targets)
+        return (features, targets)

det_map/data/datasets/feature_builders.py ADDED Viewed

	@@ -0,0 +1,94 @@

+from __future__ import annotations
+from typing import Dict
+import numpy as np
+import torch
+from det_map.data.datasets.dataclasses import AgentInput, Camera
+from det_map.data.datasets.lidar_utils import transform_points, render_image
+from navsim.planning.training.abstract_feature_target_builder import AbstractFeatureBuilder
+from mmcv.parallel import DataContainer as DC
+class LiDARCameraFeatureBuilder(AbstractFeatureBuilder):
+    def __init__(self, pipelines):
+        super().__init__()
+        self.pipelines = pipelines
+    def compute_features(self, agent_input: AgentInput) -> Dict[str, torch.Tensor]:
+        img_pipeline = self.pipelines['img']
+        timestamps_ori = agent_input.timestamps
+        timestamps = [(timestamps_ori[-1] - tmp) / 1e6 for tmp in timestamps_ori]
+        lidars = [np.copy(tmp.lidar_pc) for tmp in agent_input.lidars]
+        ego2globals = [tmp for tmp in agent_input.ego2globals]
+        # last frame is the key frame
+        global2ego_key = np.linalg.inv(ego2globals[-1])
+        # ego2global, global2ego key frame
+        lidars_warped = [transform_points(transform_points(pts, mat), global2ego_key)
+                  for pts, mat in zip(lidars[:-1], ego2globals[:-1])]
+        lidars_warped.append(lidars[-1])
+        for i, l in enumerate(lidars_warped):
+            # x,y,z,intensity,timestamp
+            l[4] = timestamps[i]
+            lidars_warped[i] = torch.from_numpy(l[:5]).t()
+        # debug visualize lidar pc
+        # for idx, lidar in enumerate(lidars_warped):
+        #     render_image(lidar, str('warped'+ str(idx)))
+        # for idx, lidar in enumerate([tmp.lidar_pc for tmp in agent_input.lidars]):
+        #     render_image(lidar, str('ori'+ str(idx)))
+        cams_all_frames = [[
+            tmp.cam_f0,
+            # tmp.cam_l0,
+            # tmp.cam_l1,
+            # tmp.cam_l2,
+            # tmp.cam_r0,
+            # tmp.cam_r1,
+            # tmp.cam_r2,
+            tmp.cam_b0
+        ] for tmp in agent_input.cameras]
+        image, canvas, sensor2lidar_rotation, sensor2lidar_translation, intrinsics, distortion, post_rot, post_tran = [], [], [], [], [], [], [], []
+        for cams_frame_t in cams_all_frames:
+            image_t, canvas_t, sensor2lidar_rotation_t, sensor2lidar_translation_t, intrinsics_t, distortion_t, post_rot_t, post_tran_t = [], [], [], [], [], [], [], []
+            for cam in cams_frame_t:
+                cam_processed: Camera = img_pipeline(cam)
+                image_t.append(cam_processed.image)
+                canvas_t.append(cam_processed.canvas)
+                sensor2lidar_rotation_t.append(cam_processed.sensor2lidar_rotation)
+                sensor2lidar_translation_t.append(cam_processed.sensor2lidar_translation)
+                intrinsics_t.append(cam_processed.intrinsics)
+                distortion_t.append(cam_processed.distortion)
+                post_rot_t.append(cam_processed.post_rot)
+                post_tran_t.append(cam_processed.post_tran)
+            image.append(torch.stack(image_t))
+            canvas.append(torch.stack(canvas_t))
+            sensor2lidar_rotation.append(torch.stack(sensor2lidar_rotation_t))
+            sensor2lidar_translation.append(torch.stack(sensor2lidar_translation_t))
+            intrinsics.append(torch.stack(intrinsics_t))
+            distortion.append(torch.stack(distortion_t))
+            post_rot.append(torch.stack(post_rot_t))
+            post_tran.append(torch.stack(post_tran_t))
+        # img: T, N_CAM, C, H, W
+        # imgs = DC(torch.stack(image), cpu_only=False, stack=True)
+        #combine = torch.matmul(sensor2lidar_rotation, torch.inverse(intrinsics))
+        #coords = torch.matmul(combine, coords)
+        #coords += sensor2lidar_translation
+        imgs = torch.stack(image)
+        return {
+            "image": imgs,
+            'canvas': torch.stack(canvas).to(imgs),
+            'sensor2lidar_rotation': torch.stack(sensor2lidar_rotation).to(imgs),
+            'sensor2lidar_translation': torch.stack(sensor2lidar_translation).to(imgs),
+            'intrinsics': torch.stack(intrinsics).to(imgs),
+            'distortion': torch.stack(distortion).to(imgs),
+            'post_rot': torch.stack(post_rot).to(imgs),
+            'post_tran': torch.stack(post_tran).to(imgs),
+            "lidars_warped": lidars_warped
+        }

det_map/data/datasets/lidar_utils.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from __future__ import annotations
+from typing import Tuple
+import numpy as np
+import numpy.typing as npt
+from PIL import Image
+from matplotlib import cm
+from nuplan.database.utils.geometry import view_points
+def transform_points(points, transf_matrix: npt.NDArray[np.float64]):
+    """
+    Applies a homogeneous transform.
+    :param transf_matrix: <np.float: 4, 4>. Homogeneous transformation matrix.
+    """
+    transf_matrix = transf_matrix.astype(np.float32)
+    points[:3, :] = transf_matrix[:3, :3] @ points[:3] + transf_matrix[:3, 3].reshape((-1, 1))
+    return points
+def render_image(
+        points, name,
+        canvas_size: Tuple[int, int] = (1001, 1001),
+        view: npt.NDArray[np.float64] = np.array([[10, 0, 0, 500], [0, 10, 0, 500], [0, 0, 10, 0]]),
+        color_dim: int = 2,
+):
+    """
+    Renders pointcloud to an array with 3 channels appropriate for viewing as an image. The image is color coded
+    according the color_dim dimension of points (typically the height).
+    :param canvas_size: (width, height). Size of the canvas on which to render the image.
+    :param view: <np.float: n, n>. Defines an arbitrary projection (n <= 4).
+    :param color_dim: The dimension of the points to be visualized as color. Default is 2 for height.
+    :return: A Image instance.
+    """
+    # Apply desired transformation to the point cloud. (height is here considered independent of the view).
+    heights = points[2, :]
+    points = view_points(points[:3, :], view, normalize=False)
+    points[2, :] = heights
+    # Remove points that fall outside the canvas.
+    mask = np.ones(points.shape[1], dtype=bool)  # type: ignore
+    mask = np.logical_and(mask, points[0, :] < canvas_size[0] - 1)
+    mask = np.logical_and(mask, points[0, :] > 0)
+    mask = np.logical_and(mask, points[1, :] < canvas_size[1] - 1)
+    mask = np.logical_and(mask, points[1, :] > 0)
+    points = points[:, mask]
+    # Scale color_values to be between 0 and 255.
+    color_values = points[color_dim, :]
+    color_values = 255.0 * (color_values - np.amin(color_values)) / (np.amax(color_values) - np.amin(color_values))
+    # Rounds to ints and generate colors that will be used in the image.
+    points = np.int16(np.round(points[:2, :]))
+    color_values = np.int16(np.round(color_values))
+    cmap = [cm.jet(i / 255, bytes=True)[:3] for i in range(256)]
+    # Populate canvas, use maximum color_value for each bin
+    render = np.tile(np.expand_dims(np.zeros(canvas_size, dtype=np.uint8), axis=2), [1, 1, 3])  # type: ignore
+    color_value_array: npt.NDArray[np.float64] = -1 * np.ones(canvas_size, dtype=float)  # type: ignore
+    for (col, row), color_value in zip(points.T, color_values.T):
+        if color_value > color_value_array[row, col]:
+            color_value_array[row, col] = color_value
+            render[row, col] = cmap[color_value]
+    Image.fromarray(render).save(f'/mnt/f/e2e/navsim_ours/debug/{name}.png')

det_map/data/pipelines/__init__.py ADDED Viewed

File without changes

det_map/data/pipelines/color_utils.py ADDED Viewed

	@@ -0,0 +1,357 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import Callable, Union
+import cv2
+import numpy as np
+import torch
+def imnormalize_(img, mean, std, to_rgb=True):
+    """Inplace normalize an image with mean and std.
+    Args:
+        img (ndarray): Image to be normalized.
+        mean (ndarray): The mean to be used for normalize.
+        std (ndarray): The std to be used for normalize.
+        to_rgb (bool): Whether to convert to rgb.
+    Returns:
+        ndarray: The normalized image.
+    """
+    # cv2 inplace normalization does not accept uint8
+    assert img.dtype != np.uint8
+    mean = np.float64(mean.reshape(1, -1))
+    stdinv = 1 / np.float64(std.reshape(1, -1))
+    if to_rgb:
+        cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img)  # inplace
+    cv2.subtract(img, mean, img)  # inplace
+    cv2.multiply(img, stdinv, img)  # inplace
+    return img
+def imnormalize(img, mean, std, to_rgb=True):
+    """Normalize an image with mean and std.
+    Args:
+        img (ndarray): Image to be normalized.
+        mean (ndarray): The mean to be used for normalize.
+        std (ndarray): The std to be used for normalize.
+        to_rgb (bool): Whether to convert to rgb.
+    Returns:
+        ndarray: The normalized image.
+    """
+    img = img.copy().astype(np.float32)
+    return imnormalize_(img, mean, std, to_rgb)
+def mmlabNormalize(img):
+    mean = np.array([123.675, 116.28, 103.53], dtype=np.float32)
+    std = np.array([58.395, 57.12, 57.375], dtype=np.float32)
+    to_rgb = True
+    img = imnormalize(np.array(img), mean, std, to_rgb)
+    img = torch.tensor(img).float().permute(2, 0, 1).contiguous()
+    return img
+def imconvert(img: np.ndarray, src: str, dst: str) -> np.ndarray:
+    """Convert an image from the src colorspace to dst colorspace.
+    Args:
+        img (ndarray): The input image.
+        src (str): The source colorspace, e.g., 'rgb', 'hsv'.
+        dst (str): The destination colorspace, e.g., 'rgb', 'hsv'.
+    Returns:
+        ndarray: The converted image.
+    """
+    code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}')
+    out_img = cv2.cvtColor(img, code)
+    return out_img
+def bgr2gray(img: np.ndarray, keepdim: bool = False) -> np.ndarray:
+    """Convert a BGR image to grayscale image.
+    Args:
+        img (ndarray): The input image.
+        keepdim (bool): If False (by default), then return the grayscale image
+            with 2 dims, otherwise 3 dims.
+    Returns:
+        ndarray: The converted grayscale image.
+    """
+    out_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    if keepdim:
+        out_img = out_img[..., None]
+    return out_img
+def rgb2gray(img: np.ndarray, keepdim: bool = False) -> np.ndarray:
+    """Convert a RGB image to grayscale image.
+    Args:
+        img (ndarray): The input image.
+        keepdim (bool): If False (by default), then return the grayscale image
+            with 2 dims, otherwise 3 dims.
+    Returns:
+        ndarray: The converted grayscale image.
+    """
+    out_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
+    if keepdim:
+        out_img = out_img[..., None]
+    return out_img
+def gray2bgr(img: np.ndarray) -> np.ndarray:
+    """Convert a grayscale image to BGR image.
+    Args:
+        img (ndarray): The input image.
+    Returns:
+        ndarray: The converted BGR image.
+    """
+    img = img[..., None] if img.ndim == 2 else img
+    out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+    return out_img
+def gray2rgb(img: np.ndarray) -> np.ndarray:
+    """Convert a grayscale image to RGB image.
+    Args:
+        img (ndarray): The input image.
+    Returns:
+        ndarray: The converted RGB image.
+    """
+    img = img[..., None] if img.ndim == 2 else img
+    out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+    return out_img
+def _convert_input_type_range(img: np.ndarray) -> np.ndarray:
+    """Convert the type and range of the input image.
+    It converts the input image to np.float32 type and range of [0, 1].
+    It is mainly used for pre-processing the input image in colorspace
+    conversion functions such as rgb2ycbcr and ycbcr2rgb.
+    Args:
+        img (ndarray): The input image. It accepts:
+            1. np.uint8 type with range [0, 255];
+            2. np.float32 type with range [0, 1].
+    Returns:
+        (ndarray): The converted image with type of np.float32 and range of
+            [0, 1].
+    """
+    img_type = img.dtype
+    img = img.astype(np.float32)
+    if img_type == np.float32:
+        pass
+    elif img_type == np.uint8:
+        img /= 255.
+    else:
+        raise TypeError('The img type should be np.float32 or np.uint8, '
+                        f'but got {img_type}')
+    return img
+def _convert_output_type_range(
+        img: np.ndarray, dst_type: Union[np.uint8, np.float32]) -> np.ndarray:
+    """Convert the type and range of the image according to dst_type.
+    It converts the image to desired type and range. If `dst_type` is np.uint8,
+    images will be converted to np.uint8 type with range [0, 255]. If
+    `dst_type` is np.float32, it converts the image to np.float32 type with
+    range [0, 1].
+    It is mainly used for post-processing images in colorspace conversion
+    functions such as rgb2ycbcr and ycbcr2rgb.
+    Args:
+        img (ndarray): The image to be converted with np.float32 type and
+            range [0, 255].
+        dst_type (np.uint8 | np.float32): If dst_type is np.uint8, it
+            converts the image to np.uint8 type with range [0, 255]. If
+            dst_type is np.float32, it converts the image to np.float32 type
+            with range [0, 1].
+    Returns:
+        (ndarray): The converted image with desired type and range.
+    """
+    if dst_type not in (np.uint8, np.float32):
+        raise TypeError('The dst_type should be np.float32 or np.uint8, '
+                        f'but got {dst_type}')
+    if dst_type == np.uint8:
+        img = img.round()
+    else:
+        img /= 255.
+    return img.astype(dst_type)
+def rgb2ycbcr(img: np.ndarray, y_only: bool = False) -> np.ndarray:
+    """Convert a RGB image to YCbCr image.
+    This function produces the same results as Matlab's `rgb2ycbcr` function.
+    It implements the ITU-R BT.601 conversion for standard-definition
+    television. See more details in
+    https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
+    It differs from a similar function in cv2.cvtColor: `RGB <-> YCrCb`.
+    In OpenCV, it implements a JPEG conversion. See more details in
+    https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
+    Args:
+        img (ndarray): The input image. It accepts:
+            1. np.uint8 type with range [0, 255];
+            2. np.float32 type with range [0, 1].
+        y_only (bool): Whether to only return Y channel. Default: False.
+    Returns:
+        ndarray: The converted YCbCr image. The output image has the same type
+        and range as input image.
+    """
+    img_type = img.dtype
+    img = _convert_input_type_range(img)
+    if y_only:
+        out_img = np.dot(img, [65.481, 128.553, 24.966]) + 16.0
+    else:
+        out_img = np.matmul(
+            img, [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786],
+                  [24.966, 112.0, -18.214]]) + [16, 128, 128]
+    out_img = _convert_output_type_range(out_img, img_type)
+    return out_img
+def bgr2ycbcr(img: np.ndarray, y_only: bool = False) -> np.ndarray:
+    """Convert a BGR image to YCbCr image.
+    The bgr version of rgb2ycbcr.
+    It implements the ITU-R BT.601 conversion for standard-definition
+    television. See more details in
+    https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
+    It differs from a similar function in cv2.cvtColor: `BGR <-> YCrCb`.
+    In OpenCV, it implements a JPEG conversion. See more details in
+    https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
+    Args:
+        img (ndarray): The input image. It accepts:
+            1. np.uint8 type with range [0, 255];
+            2. np.float32 type with range [0, 1].
+        y_only (bool): Whether to only return Y channel. Default: False.
+    Returns:
+        ndarray: The converted YCbCr image. The output image has the same type
+        and range as input image.
+    """
+    img_type = img.dtype
+    img = _convert_input_type_range(img)
+    if y_only:
+        out_img = np.dot(img, [24.966, 128.553, 65.481]) + 16.0
+    else:
+        out_img = np.matmul(
+            img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786],
+                  [65.481, -37.797, 112.0]]) + [16, 128, 128]
+    out_img = _convert_output_type_range(out_img, img_type)
+    return out_img
+def ycbcr2rgb(img: np.ndarray) -> np.ndarray:
+    """Convert a YCbCr image to RGB image.
+    This function produces the same results as Matlab's ycbcr2rgb function.
+    It implements the ITU-R BT.601 conversion for standard-definition
+    television. See more details in
+    https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
+    It differs from a similar function in cv2.cvtColor: `YCrCb <-> RGB`.
+    In OpenCV, it implements a JPEG conversion. See more details in
+    https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
+    Args:
+        img (ndarray): The input image. It accepts:
+            1. np.uint8 type with range [0, 255];
+            2. np.float32 type with range [0, 1].
+    Returns:
+        ndarray: The converted RGB image. The output image has the same type
+        and range as input image.
+    """
+    img_type = img.dtype
+    img = _convert_input_type_range(img) * 255
+    out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621],
+                              [0, -0.00153632, 0.00791071],
+                              [0.00625893, -0.00318811, 0]]) * 255.0 + [
+                                  -222.921, 135.576, -276.836
+                              ]
+    out_img = _convert_output_type_range(out_img, img_type)
+    return out_img
+def ycbcr2bgr(img: np.ndarray) -> np.ndarray:
+    """Convert a YCbCr image to BGR image.
+    The bgr version of ycbcr2rgb.
+    It implements the ITU-R BT.601 conversion for standard-definition
+    television. See more details in
+    https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
+    It differs from a similar function in cv2.cvtColor: `YCrCb <-> BGR`.
+    In OpenCV, it implements a JPEG conversion. See more details in
+    https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
+    Args:
+        img (ndarray): The input image. It accepts:
+            1. np.uint8 type with range [0, 255];
+            2. np.float32 type with range [0, 1].
+    Returns:
+        ndarray: The converted BGR image. The output image has the same type
+        and range as input image.
+    """
+    img_type = img.dtype
+    img = _convert_input_type_range(img) * 255
+    out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621],
+                              [0.00791071, -0.00153632, 0],
+                              [0, -0.00318811, 0.00625893]]) * 255.0 + [
+                                  -276.836, 135.576, -222.921
+                              ]
+    out_img = _convert_output_type_range(out_img, img_type)
+    return out_img
+def convert_color_factory(src: str, dst: str) -> Callable:
+    code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}')
+    def convert_color(img: np.ndarray) -> np.ndarray:
+        out_img = cv2.cvtColor(img, code)
+        return out_img
+    convert_color.__doc__ = f"""Convert a {src.upper()} image to {dst.upper()}
+        image.
+    Args:
+        img (ndarray or str): The input image.
+    Returns:
+        ndarray: The converted {dst.upper()} image.
+    """
+    return convert_color
+bgr2rgb = convert_color_factory('bgr', 'rgb')
+rgb2bgr = convert_color_factory('rgb', 'bgr')
+bgr2hsv = convert_color_factory('bgr', 'hsv')
+hsv2bgr = convert_color_factory('hsv', 'bgr')
+bgr2hls = convert_color_factory('bgr', 'hls')
+hls2bgr = convert_color_factory('hls', 'bgr')

det_map/data/pipelines/filter_lidar.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import numpy as np
+from typing import Tuple
+import torch
+class LiDARFilter(object):
+    def __init__(self,
+                 close_radius=1.0,
+                 x_range='(-50.0, 50.0)',
+                 y_range='(-50.0, 50.0)',
+                 z_range='(-5, 20)',
+                 ):
+        self.radius = close_radius
+        self.x_range = eval(x_range)
+        self.y_range = eval(y_range)
+        self.z_range = eval(z_range)
+    def _remove_close(self, points, radius=1.0):
+        """Removes point too close within a certain radius from origin.
+        Args:
+            points (np.ndarray | :obj:`BasePoints`): Sweep points.
+            radius (float, optional): Radius below which points are removed.
+                Defaults to 1.0.
+        Returns:
+            np.ndarray: Points after removing.
+        """
+        x_filt = torch.abs(points[:, 0]) < radius
+        y_filt = torch.abs(points[:, 1]) < radius
+        not_close = torch.logical_not(torch.logical_and(x_filt, y_filt))
+        return points[not_close]
+    def range_filter(
+        self,
+        points,
+        xrange: Tuple[float, float] = (-np.inf, np.inf),
+        yrange: Tuple[float, float] = (-np.inf, np.inf),
+        zrange: Tuple[float, float] = (-np.inf, np.inf),
+    ) -> None:
+        """
+        Restricts points to specified ranges.
+        :param xrange: (xmin, xmax).
+        :param yrange: (ymin, ymax).
+        :param zrange: (zmin, zmax).
+        """
+        # Figure out which points to keep.
+        keep_x = torch.logical_and(xrange[0] <= points[:, 0], points[:, 0] <= xrange[1])
+        keep_y = torch.logical_and(yrange[0] <= points[:, 1], points[:, 1] <= yrange[1])
+        keep_z = torch.logical_and(zrange[0] <= points[:, 2], points[:, 2] <= zrange[1])
+        keep = torch.logical_and(keep_x, torch.logical_and(keep_y, keep_z))
+        return points[keep]
+    def __call__(self, features, targets):
+        """Call function to load multi-sweep point clouds from files.
+        Args:
+            results (dict): Result dict containing multi-sweep point cloud
+                filenames.
+        Returns:
+            dict: The result dict containing the multi-sweep points data.
+                Added key and value are described below.
+                - points (np.ndarray | :obj:`BasePoints`): Multi-sweep point
+                    cloud arrays.
+        """
+        points = torch.cat(features['lidars_warped'], 0)
+        points = self._remove_close(points, self.radius)
+        points = self.range_filter(points, self.x_range, self.y_range, self.z_range)
+        features['lidar'] = points
+        return features, targets

det_map/data/pipelines/lidar_aug.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import numpy as np
+import torch
+from nuplan.common.actor_state.tracked_objects_types import (
+    TrackedObjectType,
+)
+OBJECT_TYPE_DICT = {
+    "vehicle": TrackedObjectType.VEHICLE,
+    "pedestrian": TrackedObjectType.PEDESTRIAN,
+    "bicycle": TrackedObjectType.BICYCLE,
+    "traffic_cone": TrackedObjectType.TRAFFIC_CONE,
+    "barrier": TrackedObjectType.BARRIER,
+    "czone_sign": TrackedObjectType.CZONE_SIGN,
+    "generic_object": TrackedObjectType.GENERIC_OBJECT,
+}
+def limit_period(val, offset=0.5, period=2 * np.pi):
+    """Limit the value into a period for periodic function.
+    Args:
+        val (torch.Tensor | np.ndarray): The value to be converted.
+        offset (float, optional): Offset to set the value range.
+            Defaults to 0.5.
+        period ([type], optional): Period of the value. Defaults to np.pi.
+    Returns:
+        (torch.Tensor | np.ndarray): Value in the range of
+            [-offset * period, (1-offset) * period]
+    """
+    limited_val = val - torch.floor(val / period + offset) * period
+    return limited_val
+class LiDARAug(object):
+    def __init__(self,
+                 bda_aug_conf, is_train,
+                 x_range='(-50.0, 50.0)',
+                 y_range='(-50.0, 50.0)',
+                 z_range='(-5, 20)',
+                 ):
+        for k in ['rot_lim', 'scale_lim', 'tran_lim']:
+            bda_aug_conf[k] = eval(bda_aug_conf[k])
+        self.bda_aug_conf = bda_aug_conf
+        self.is_train = False
+        self.x_range = eval(x_range)
+        self.y_range = eval(y_range)
+        self.z_range = eval(z_range)
+    def sample_bda_augmentation(self):
+        """Generate bda augmentation values based on bda_config."""
+        if self.is_train:
+            rotate_bda = np.random.uniform(*self.bda_aug_conf['rot_lim'])
+            scale_bda = np.random.uniform(*self.bda_aug_conf['scale_lim'])
+            flip_dx = np.random.uniform() < self.bda_aug_conf['flip_dx_ratio']
+            flip_dy = np.random.uniform() < self.bda_aug_conf['flip_dy_ratio']
+            translation_std = self.bda_aug_conf.get('tran_lim', [0.0, 0.0, 0.0])
+            tran_bda = np.random.normal(scale=translation_std, size=3).T
+        else:
+            rotate_bda = 0
+            scale_bda = 1.0
+            flip_dx = False
+            flip_dy = False
+            tran_bda = np.zeros((1, 3), dtype=np.float32)
+        return rotate_bda, scale_bda, flip_dx, flip_dy, tran_bda
+    def bev_transform(self, gt_boxes, rotate_angle, scale_ratio, flip_dx,
+                      flip_dy, tran_bda, rot_mat):
+        if gt_boxes.shape[0] > 0:
+            gt_boxes[:, :3] = (
+                rot_mat @ gt_boxes[:, :3].unsqueeze(-1)).squeeze(-1)
+            gt_boxes[:, 3:6] *= scale_ratio
+            gt_boxes[:, 6] += rotate_angle
+            if flip_dx:
+                gt_boxes[:,
+                         6] = 2 * torch.asin(torch.tensor(1.0)) - gt_boxes[:,
+                                                                           6]
+            if flip_dy:
+                gt_boxes[:, 6] = -gt_boxes[:, 6]
+            gt_boxes[:, 7:] = (
+                rot_mat[:2, :2] @ gt_boxes[:, 7:].unsqueeze(-1)).squeeze(-1)
+            gt_boxes[:, :3] = gt_boxes[:, :3] + tran_bda
+        return gt_boxes
+    def __call__(self, features, targets):
+        # 1. filter box based on ranges
+        # 2. filter label based on classes
+        if 'dets' in targets and 'labels' in targets:
+            boxes = targets['dets']
+            labels = targets['labels']
+            for t, (box, label) in enumerate(zip(boxes, labels)):
+                label_mask = np.array([n in OBJECT_TYPE_DICT for n in label], dtype=np.bool_)
+                label_mask = torch.from_numpy(label_mask)
+                range_mask = ((box[:, 0] > self.x_range[0]) &
+                              (box[:, 0] < self.x_range[1]) &
+                              (box[:, 1] > self.y_range[0]) &
+                              (box[:, 1] < self.y_range[1]))
+                mask = range_mask & label_mask
+                box_of_interest = box[mask]
+                box_of_interest[:, 6] = limit_period(box_of_interest[:, 6])
+                boxes[t] = box_of_interest.float()
+                labels[t] = torch.from_numpy(np.array([OBJECT_TYPE_DICT[x].value for
+                                                       x in label], dtype=np.int64))[mask]
+            targets['dets'] = boxes
+            targets['labels'] = labels
+        rotate_bda, scale_bda, flip_dx, flip_dy, tran_bda = \
+            self.sample_bda_augmentation()
+        bda_mat = torch.zeros(4, 4)
+        bda_mat[3, 3] = 1
+        rotate_angle = torch.tensor(rotate_bda / 180 * np.pi)
+        rot_sin = torch.sin(rotate_angle)
+        rot_cos = torch.cos(rotate_angle)
+        rot_mat = torch.Tensor([[rot_cos, -rot_sin, 0], [rot_sin, rot_cos, 0],
+                                [0, 0, 1]])
+        scale_mat = torch.Tensor([[scale_bda, 0, 0], [0, scale_bda, 0],
+                                  [0, 0, scale_bda]])
+        flip_mat = torch.Tensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
+        if flip_dx:
+            flip_mat = flip_mat @ torch.Tensor([[-1, 0, 0], [0, 1, 0],
+                                                [0, 0, 1]])
+        if flip_dy:
+            flip_mat = flip_mat @ torch.Tensor([[1, 0, 0], [0, -1, 0],
+                                                [0, 0, 1]])
+        bda_rot = flip_mat @ (scale_mat @ rot_mat)
+        if 'dets' in targets:
+            for idx, boxes in enumerate(targets['dets']):
+                targets['dets'][idx] = self.bev_transform(boxes, rotate_bda, scale_bda,
+                                                       flip_dx, flip_dy, tran_bda, bda_rot)
+        # print('before bda')
+        # print(features['lidars_warped'][-1][:, 0].max())
+        # print(features['lidars_warped'][-1][:, 0].min())
+        # print(features['lidars_warped'][-1][:, 1].max())
+        # print(features['lidars_warped'][-1][:, 1].min())
+        for idx, points in enumerate(features['lidars_warped']):
+            points_aug = (bda_rot @ points[:, :3].unsqueeze(-1)).squeeze(-1)
+            points[:, :3] = points_aug + tran_bda
+            features['lidars_warped'][idx] = points
+        # print('after bda')
+        # print(features['lidars_warped'][-1][:, 0].max())
+        # print(features['lidars_warped'][-1][:, 0].min())
+        # print(features['lidars_warped'][-1][:, 1].max())
+        # print(features['lidars_warped'][-1][:, 1].min())
+        bda_mat[:3, :3] = bda_rot
+        bda_mat[:3, 3] = torch.from_numpy(tran_bda)
+        features['bda'] = bda_mat
+        return features, targets

det_map/data/pipelines/point_shuffle.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import numpy as np
+from typing import Tuple
+import torch
+class PointShuffle(object):
+    def __init__(self, is_train):
+        self.is_train = is_train
+    def __call__(self, features, targets):
+        if self.is_train:
+            points = features['lidar']
+            cnt = points.shape[0]
+            idx = torch.randperm(cnt, device=points.device)
+            features['lidar'] = points[idx]
+        return features, targets

det_map/data/pipelines/prepare_depth.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import torch
+import numpy as np
+import PIL.Image as Image
+class LiDAR2Depth(object):
+    def __init__(self,
+                 grid_config,
+                 ):
+        self.x = eval(grid_config['x'])
+        self.y = eval(grid_config['y'])
+        self.z = eval(grid_config['z'])
+        self.depth = eval(grid_config['depth'])
+    def points2depthmap(self, points, height, width):
+        height, width = height, width
+        depth_map = torch.zeros((height, width), dtype=torch.float32)
+        coor = torch.round(points[:, :2])
+        depth = points[:, 2]
+        kept1 = (coor[:, 0] >= 0) & (coor[:, 0] < width) & (
+            coor[:, 1] >= 0) & (coor[:, 1] < height) & (
+                depth < self.depth[1]) & (
+                    depth >= self.depth[0])
+        coor, depth = coor[kept1], depth[kept1]
+        ranks = coor[:, 0] + coor[:, 1] * width
+        sort = (ranks + depth / 100.).argsort()
+        coor, depth, ranks = coor[sort], depth[sort], ranks[sort]
+        kept2 = torch.ones(coor.shape[0], device=coor.device, dtype=torch.bool)
+        kept2[1:] = (ranks[1:] != ranks[:-1])
+        coor, depth = coor[kept2], depth[kept2]
+        coor = coor.to(torch.long)
+        depth_map[coor[:, 1], coor[:, 0]] = depth
+        return depth_map
+    def __call__(self, features, targets):
+        # points, img, sensor2lidar_rotation, sensor2lidar_translation, intrinsics,
+        # post_rot, post_tran
+        # List: length=frames
+        lidar_all_frames = features['lidars_warped']
+        # image: T, N_CAMS, C, H, W
+        T, N, _, H, W = features['image'].shape
+        rots, trans, intrinsics = (features['sensor2lidar_rotation'],
+                                  features['sensor2lidar_translation'],
+                                  features['intrinsics'])
+        post_rot, post_tran, bda = (features['post_rot'],
+                               features['post_tran'], features['bda'])
+        t = -1
+        depth_t = []
+        lidar_t = lidar_all_frames[t][:, :3]
+        lidar_t = lidar_t - bda[:3, 3].view(1, 3)
+        lidar_t = lidar_t.matmul(torch.inverse(bda[:3, :3]).T)
+        # print('cancel bda')
+        # print(lidar_t[:, 0].max())
+        # print(lidar_t[:, 0].min())
+        # print(lidar_t[:, 1].max())
+        # print(lidar_t[:, 1].min())
+        for n in range(N):
+            points_img = lidar_t - trans[t, n:n + 1, :]
+            lidar2cam_rot = torch.inverse(rots[t, n])
+            # lidar2cam, cam2img
+            points_img = points_img.matmul(lidar2cam_rot.T).matmul(intrinsics[t, n].T)
+            points_img = torch.cat(
+                    [points_img[:, :2] / points_img[:, 2:3], points_img[:, 2:3]],
+                    1)
+            points_img = points_img.matmul(
+                    post_rot[t, n].T) + post_tran[t, n:n + 1, :]
+            depth_curr = self.points2depthmap(points_img, features['canvas'][-1, n].shape[0], features['canvas'][-1, n].shape[1])
+            depth_t.append(depth_curr)
+            # Image.fromarray((1- depth_curr.clamp(0,1)).cpu().numpy() * 255).convert('L').save(f'/mnt/f/e2e/navsim_ours/debug/depth{n}.png')
+            # Image.fromarray(features['canvas'][-1, n].cpu().numpy().astype(np.uint8)).convert('RGB').save(f'/mnt/f/e2e/navsim_ours/debug/canvas{n}.png')
+        features['gt_depth'] = torch.stack(depth_t)
+        return features, targets

det_map/data/pipelines/prepare_img.py ADDED Viewed

	@@ -0,0 +1,218 @@

+import cv2
+import numpy as np
+import torch
+from PIL import Image
+from det_map.data.datasets.dataclasses import Camera
+from det_map.data.pipelines.color_utils import bgr2hsv, hsv2bgr, mmlabNormalize
+class PrepareImageInputs(object):
+    """Load multi channel images from a list of separate channel files.
+    Expects results['img_filename'] to be a list of filenames.
+    Args:
+        to_float32 (bool): Whether to convert the img to float32.
+            Defaults to False.
+        color_type (str): Color type of the file. Defaults to 'unchanged'.
+    """
+    def __init__(
+            self,
+            data_config,
+            is_train=False,
+            opencv_pp=False,
+    ):
+        self.is_train = is_train
+        self.data_config = data_config
+        self.normalize_img = mmlabNormalize
+        self.opencv_pp = opencv_pp
+    def get_rot(self, h):
+        return torch.Tensor([
+            [np.cos(h), np.sin(h)],
+            [-np.sin(h), np.cos(h)],
+        ])
+    def img_transform(self, img, post_rot, post_tran, resize, resize_dims,
+                      crop, flip, rotate):
+        # adjust image
+        if not self.opencv_pp:
+            img = self.img_transform_core(img, resize_dims, crop, flip, rotate)
+        # post-homography transformation
+        post_rot *= resize
+        post_tran -= torch.Tensor(crop[:2])
+        if flip:
+            A = torch.Tensor([[-1, 0], [0, 1]])
+            b = torch.Tensor([crop[2] - crop[0], 0])
+            post_rot = A.matmul(post_rot)
+            post_tran = A.matmul(post_tran) + b
+        A = self.get_rot(rotate / 180 * np.pi)
+        b = torch.Tensor([crop[2] - crop[0], crop[3] - crop[1]]) / 2
+        b = A.matmul(-b) + b
+        post_rot = A.matmul(post_rot)
+        post_tran = A.matmul(post_tran) + b
+        if self.opencv_pp:
+            img = self.img_transform_core_opencv(img, post_rot, post_tran, crop)
+        return img, post_rot, post_tran
+    def img_transform_core_opencv(self, img, post_rot, post_tran,
+                                  crop):
+        img = np.array(img).astype(np.float32)
+        img = cv2.warpAffine(img,
+                             np.concatenate([post_rot,
+                                             post_tran.reshape(2, 1)],
+                                            axis=1),
+                             (crop[2] - crop[0], crop[3] - crop[1]),
+                             flags=cv2.INTER_LINEAR)
+        return img
+    def img_transform_core(self, img, resize_dims, crop, flip, rotate):
+        # adjust image
+        img = img.resize(resize_dims)
+        img = img.crop(crop)
+        if flip:
+            img = img.transpose(method=Image.FLIP_LEFT_RIGHT)
+        img = img.rotate(rotate)
+        return img
+    def sample_augmentation(self, H, W, flip=None, scale=None):
+        fH, fW = eval(self.data_config['input_size'])
+        if self.is_train:
+            resize = float(fW) / float(W)
+            resize += np.random.uniform(*eval(self.data_config['resize']))
+            resize_dims = (int(W * resize), int(H * resize))
+            newW, newH = resize_dims
+            random_crop_height = \
+                self.data_config.get('random_crop_height', False)
+            if random_crop_height:
+                crop_h = int(np.random.uniform(max(0.3 * newH, newH - fH),
+                                               newH - fH))
+            else:
+                crop_h = \
+                    int((1 - np.random.uniform(*eval(self.data_config['crop_h']))) *
+                        newH) - fH
+            crop_w = int(np.random.uniform(0, max(0, newW - fW)))
+            crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)
+            flip = self.data_config['flip'] and np.random.choice([0, 1])
+            rotate = np.random.uniform(*eval(self.data_config['rot']))
+            if self.data_config.get('vflip', False) and np.random.choice([0, 1]):
+                rotate += 180
+        else:
+            resize = float(fW) / float(W)
+            if scale is not None:
+                resize += scale
+            else:
+                resize += self.data_config.get('resize_test', 0.0)
+            resize_dims = (int(W * resize), int(H * resize))
+            newW, newH = resize_dims
+            crop_h = int((1 - np.mean(eval(self.data_config['crop_h']))) * newH) - fH
+            crop_w = int(max(0, newW - fW) / 2)
+            crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)
+            flip = False if flip is None else flip
+            rotate = 0
+        return resize, resize_dims, crop, flip, rotate
+    def photo_metric_distortion(self, img, pmd):
+        """Call function to perform photometric distortion on images.
+        Args:
+            results (dict): Result dict from loading pipeline.
+        Returns:
+            dict: Result dict with images distorted.
+        """
+        if np.random.rand() > pmd.get('rate', 1.0):
+            return img
+        img = np.array(img).astype(np.float32)
+        assert img.dtype == np.float32, \
+            'PhotoMetricDistortion needs the input image of dtype np.float32,' \
+            ' please set "to_float32=True" in "LoadImageFromFile" pipeline'
+        # random brightness
+        if np.random.randint(2):
+            delta = np.random.uniform(-pmd['brightness_delta'],
+                                      pmd['brightness_delta'])
+            img += delta
+        # mode == 0 --> do random contrast first
+        # mode == 1 --> do random contrast last
+        mode = np.random.randint(2)
+        if mode == 1:
+            if np.random.randint(2):
+                alpha = np.random.uniform(pmd['contrast_lower'],
+                                          pmd['contrast_upper'])
+                img *= alpha
+        # convert color from BGR to HSV
+        img = bgr2hsv(img)
+        # random saturation
+        if np.random.randint(2):
+            img[..., 1] *= np.random.uniform(pmd['saturation_lower'],
+                                             pmd['saturation_upper'])
+        # random hue
+        if np.random.randint(2):
+            img[..., 0] += np.random.uniform(-pmd['hue_delta'], pmd['hue_delta'])
+            img[..., 0][img[..., 0] > 360] -= 360
+            img[..., 0][img[..., 0] < 0] += 360
+        # convert color from HSV to BGR
+        img = hsv2bgr(img)
+        # random contrast
+        if mode == 0:
+            if np.random.randint(2):
+                alpha = np.random.uniform(pmd['contrast_lower'],
+                                          pmd['contrast_upper'])
+                img *= alpha
+        # randomly swap channels
+        if np.random.randint(2):
+            img = img[..., np.random.permutation(3)]
+        return Image.fromarray(img.astype(np.uint8))
+    def get_inputs(self, cam: Camera, flip=None, scale=None):
+        img = Image.fromarray(cam.image)
+        # original copy of image
+        cam.canvas = torch.tensor(np.array(img))
+        post_rot = torch.eye(2)
+        post_tran = torch.zeros(2)
+        # image view augmentation (resize, crop, horizontal flip, rotate)
+        img_augs = self.sample_augmentation(
+            H=img.height, W=img.width, flip=flip, scale=scale)
+        resize, resize_dims, crop, flip, rotate = img_augs
+        img, post_rot2, post_tran2 = \
+            self.img_transform(img, post_rot,
+                               post_tran,
+                               resize=resize,
+                               resize_dims=resize_dims,
+                               crop=crop,
+                               flip=flip,
+                               rotate=rotate)
+        # for convenience, make augmentation matrices 3x3
+        post_tran = torch.zeros(3)
+        post_rot = torch.eye(3)
+        post_tran[:2] = post_tran2
+        post_rot[:2, :2] = post_rot2
+        if self.is_train and self.data_config.get('pmd', None) is not None:
+            img = self.photo_metric_distortion(img, self.data_config['pmd'])
+        # original image
+        cam.image = self.normalize_img(img)
+        cam.post_rot = post_rot
+        cam.post_tran = post_tran
+        cam.sensor2lidar_rotation = torch.tensor(cam.sensor2lidar_rotation)
+        cam.sensor2lidar_translation = torch.tensor(cam.sensor2lidar_translation)
+        cam.intrinsics = torch.tensor(cam.intrinsics)
+        cam.distortion = torch.tensor(cam.distortion)
+        return cam
+    def __call__(self, results):
+        return self.get_inputs(results)

det_map/det/__init__.py ADDED Viewed

File without changes

det_map/det/dal/__init__.py ADDED Viewed

File without changes

det_map/det/dal/dal.py ADDED Viewed

	@@ -0,0 +1,159 @@

+# Copyright (c) Phigent Robotics. All rights reserved.
+import torch
+from det_map.det.dal.mmdet3d.models.detectors.bevdet import BEVDet
+from det_map.det.dal.mmdet3d.models.utils import FFN
+from det_map.det.dal.mmdet3d.models.utils.spconv_voxelize import SPConvVoxelization
+try:
+    from det_map.det.dal.mmdet3d.models import *
+    from det_map.det.dal.mmdet3d.core import *
+except Exception:
+    raise Exception
+class DAL(BEVDet):
+    def __init__(self, **kwargs):
+        super(DAL, self).__init__(**kwargs)
+        # image view auxiliary task heads
+        self.num_cls = self.pts_bbox_head.num_classes
+        heads = dict(heatmap=(self.num_cls, 2))
+        input_feat_dim = kwargs['pts_bbox_head']['hidden_channel']
+        self.auxiliary_heads = FFN(
+            input_feat_dim,
+            heads,
+            conv_cfg=dict(type="Conv1d"),
+            norm_cfg=dict(type="BN1d"),
+            bias=True)
+        self.auxiliary_heads.init_weights()
+        pts_voxel_cfg = kwargs.get('pts_voxel_layer', None)
+        if pts_voxel_cfg:
+            pts_voxel_cfg['num_point_features'] = 5
+            self.pts_voxel_layer = SPConvVoxelization(**pts_voxel_cfg)
+    def extract_img_feat(self, img, img_metas):
+        """Extract features of images."""
+        img = self.prepare_inputs(img)
+        x, _ = self.image_encoder(img[0])
+        return [x] + img[1:]
+    def extract_feat(self, points, img, img_metas):
+        """Extract features from images and points."""
+        img_feats = self.extract_img_feat(img, img_metas)
+        pts_feats = self.extract_pts_feat(points, img_feats, img_metas)
+        return (img_feats, pts_feats)
+    def forward_img_auxiliary_train(self,
+                                    x,
+                                    img_metas,
+                                    gt_bboxes,
+                                    gt_labels,
+                                    gt_bboxes_ignore=None,
+                                    proposals=None,
+                                    **kwargs):
+        max_instance = 150
+        num_pos = 0
+        centers_augego = x[0].new_zeros((len(gt_bboxes), max_instance, 3))
+        box_targets_all = x[0].new_zeros((len(gt_bboxes), max_instance, 10))
+        valid_mask = x[0].new_zeros((len(gt_bboxes), max_instance, 1))
+        label = x[0].new_zeros((len(gt_bboxes), max_instance, 1)).to(torch.long)
+        for sid in range(len(gt_bboxes)):
+            centers_augego_tmp = gt_bboxes[sid].gravity_center.to(x[0])
+            box_targets_tmp = self.pts_bbox_head.bbox_coder.encode(gt_bboxes[sid].tensor)
+            if gt_bboxes_ignore is not None:
+                centers_augego_tmp = centers_augego_tmp[gt_bboxes_ignore[sid], :]
+                box_targets_tmp = box_targets_tmp[gt_bboxes_ignore[sid], :]
+            num_valid_samples = centers_augego_tmp.shape[0]
+            num_pos += num_valid_samples
+            valid_mask[sid, :num_valid_samples, :] = 1.0
+            centers_augego[sid, :num_valid_samples, :] = centers_augego_tmp
+            box_targets_all[sid, :num_valid_samples, :] = box_targets_tmp
+            label_tmp = gt_labels[sid].unsqueeze(-1)
+            if gt_bboxes_ignore is not None:
+                label_tmp = label_tmp[gt_bboxes_ignore[sid], :]
+            label[sid, :num_valid_samples, :] = label_tmp
+        img_feats = self.pts_bbox_head.extract_img_feat_from_3dpoints(
+            centers_augego, x, fuse=False)
+        heatmap = self.auxiliary_heads.heatmap(img_feats)
+        loss_cls_img = self.pts_bbox_head.loss_cls(
+            heatmap.permute(0, 2, 1).reshape(-1, self.num_cls),
+            label.flatten(),
+            valid_mask.flatten(),
+            avg_factor=max(num_pos, 1))
+        return dict(loss_cls_img=loss_cls_img)
+    def forward_train(self,
+                      points=None,
+                      img_metas=None,
+                      gt_bboxes_3d=None,
+                      gt_labels_3d=None,
+                      gt_labels=None,
+                      gt_bboxes=None,
+                      img_inputs=None,
+                      proposals=None,
+                      gt_bboxes_ignore=None,
+                      **kwargs):
+        """Forward training function.
+        Args:
+            points (list[torch.Tensor], optional): Points of each sample.
+                Defaults to None.
+            img_metas (list[dict], optional): Meta information of each sample.
+                Defaults to None.
+            gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`], optional):
+                Ground truth 3D boxes. Defaults to None.
+            gt_labels_3d (list[torch.Tensor], optional): Ground truth labels
+                of 3D boxes. Defaults to None.
+            gt_labels (list[torch.Tensor], optional): Ground truth labels
+                of 2D boxes in images. Defaults to None.
+            gt_bboxes (list[torch.Tensor], optional): Ground truth 2D boxes in
+                images. Defaults to None.
+            img (torch.Tensor optional): Images of each sample with shape
+                (N, C, H, W). Defaults to None.
+            proposals ([list[torch.Tensor], optional): Predicted proposals
+                used for training Fast RCNN. Defaults to None.
+            gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth
+                2D boxes in images to be ignored. Defaults to None.
+        Returns:
+            dict: Losses of different branches.
+        """
+        img_feats, pts_feats = self.extract_feat(
+            points, img=img_inputs, img_metas=img_metas)
+        img_feats_bev = \
+            self.img_view_transformer(img_feats + img_inputs[1:7],
+                                      depth_from_lidar=kwargs['gt_depth'])
+        losses = dict()
+        losses_pts = \
+            self.forward_pts_train([img_feats, pts_feats, img_feats_bev],
+                                   gt_bboxes_3d, gt_labels_3d, img_metas,
+                                   gt_bboxes_ignore)
+        losses.update(losses_pts)
+        losses_img_auxiliary = \
+            self.forward_img_auxiliary_train(img_feats, img_metas,
+                                             gt_bboxes_3d, gt_labels_3d,
+                                             gt_bboxes_ignore,
+                                             **kwargs)
+        losses.update(losses_img_auxiliary)
+        return losses
+    def simple_test(self,
+                    points,
+                    img_metas,
+                    img_inputs=None,
+                    rescale=False,
+                    **kwargs):
+        """Test function without augmentaiton."""
+        img_feats, pts_feats = self.extract_feat(
+            points, img=img_inputs, img_metas=img_metas)
+        img_feats_bev = \
+            self.img_view_transformer(img_feats + img_inputs[1:7],
+                                      depth_from_lidar=kwargs['gt_depth'][0])
+        bbox_list = [dict() for _ in range(len(img_metas))]
+        bbox_pts = self.simple_test_pts([img_feats, pts_feats, img_feats_bev],
+                                        img_metas, rescale=rescale)
+        for result_dict, pts_bbox in zip(bbox_list, bbox_pts):
+            result_dict['pts_bbox'] = pts_bbox
+        return bbox_list

det_map/det/dal/mmdet3d/__init__.py ADDED Viewed

File without changes

det_map/det/dal/mmdet3d/core/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from .bbox import *  # noqa: F401, F403
+from .points import *  # noqa: F401, F403
+from .post_processing import *  # noqa: F401, F403
+from .utils import *  # noqa: F401, F403
+from .samplers import *

det_map/det/dal/mmdet3d/core/bbox/__init__.py ADDED Viewed

	@@ -0,0 +1,24 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner
+# from .bbox_target import bbox_target
+from .iou_calculators import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D,
+                              axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d,
+                              )
+from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes,
+                         Coord3DMode, DepthInstance3DBoxes,
+                         LiDARInstance3DBoxes, get_box_type, limit_period,
+                         mono_cam_box2vis, points_cam2img, points_img2cam,
+                         xywhr2xyxyr)
+from .transforms import bbox3d2result, bbox3d2roi, bbox3d_mapping_back
+from .coders import *
+__all__ = [
+    'AssignResult', 'BaseAssigner', 'MaxIoUAssigner','TransFusionBBoxCoder'
+    , 'bbox_overlaps_3d',
+    'AxisAlignedBboxOverlaps3D', 'axis_aligned_bbox_overlaps_3d', 'Box3DMode',
+    'LiDARInstance3DBoxes', 'CameraInstance3DBoxes', 'bbox3d2roi',
+    'bbox3d2result', 'DepthInstance3DBoxes', 'BaseInstance3DBoxes',
+    'bbox3d_mapping_back', 'xywhr2xyxyr', 'limit_period', 'points_cam2img',
+    'points_img2cam', 'get_box_type', 'Coord3DMode', 'mono_cam_box2vis'
+]

det_map/det/dal/mmdet3d/core/bbox/assigners/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmdet.core.bbox import AssignResult, BaseAssigner, MaxIoUAssigner
+from .hungarian_assigner_3d import HungarianAssigner3D
+__all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult',
+           'HungarianAssigner3D']

det_map/det/dal/mmdet3d/core/bbox/assigners/hungarian_assigner_3d.py ADDED Viewed

	@@ -0,0 +1,148 @@

+from mmdet.core.bbox.builder import BBOX_ASSIGNERS
+from mmdet.core.bbox.assigners import AssignResult, BaseAssigner
+from mmdet.core.bbox.match_costs import build_match_cost
+from mmdet.core.bbox.match_costs.builder import MATCH_COST
+from mmdet.core.bbox.iou_calculators import build_iou_calculator
+import torch
+try:
+    from scipy.optimize import linear_sum_assignment
+except ImportError:
+    linear_sum_assignment = None
+@MATCH_COST.register_module()
+class BBoxBEVL1Cost(object):
+    def __init__(self, weight):
+        self.weight = weight
+    def __call__(self, bboxes, gt_bboxes, train_cfg):
+        pc_start = bboxes.new(train_cfg['point_cloud_range'][0:2])
+        pc_range = bboxes.new(train_cfg['point_cloud_range'][3:5]) - bboxes.new(train_cfg['point_cloud_range'][0:2])
+        # normalize the box center to [0, 1]
+        normalized_bboxes_xy = (bboxes[:, :2] - pc_start) / pc_range
+        normalized_gt_bboxes_xy = (gt_bboxes[:, :2] - pc_start) / pc_range
+        reg_cost = torch.cdist(normalized_bboxes_xy, normalized_gt_bboxes_xy, p=1)
+        return reg_cost * self.weight
+@MATCH_COST.register_module()
+class IoU3DCost(object):
+    def __init__(self, weight):
+        self.weight = weight
+    def __call__(self, iou):
+        iou_cost = - iou
+        return iou_cost * self.weight
+@BBOX_ASSIGNERS.register_module()
+class HeuristicAssigner3D(BaseAssigner):
+    def __init__(self,
+                 dist_thre=100,
+                 iou_calculator=dict(type='BboxOverlaps3D')
+                 ):
+        self.dist_thre = dist_thre  # distance in meter
+        self.iou_calculator = build_iou_calculator(iou_calculator)
+    def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None, query_labels=None):
+        dist_thre = self.dist_thre
+        num_gts, num_bboxes = len(gt_bboxes), len(bboxes)
+        bev_dist = torch.norm(bboxes[:, 0:2][None, :, :] - gt_bboxes[:, 0:2][:, None, :], dim=-1)  # [num_gts, num_bboxes]
+        if query_labels is not None:
+            # only match the gt box and query with same category
+            not_same_class = (query_labels[None] != gt_labels[:, None])
+            bev_dist += not_same_class * dist_thre
+        # for each gt box, assign it to the nearest pred box
+        nearest_values, nearest_indices = bev_dist.min(1)  # [num_gts]
+        assigned_gt_inds = torch.ones([num_bboxes, ]).to(bboxes) * 0
+        assigned_gt_vals = torch.ones([num_bboxes, ]).to(bboxes) * 10000
+        assigned_gt_labels = torch.ones([num_bboxes, ]).to(bboxes) * -1
+        for idx_gts in range(num_gts):
+            # for idx_pred in torch.where(bev_dist[idx_gts] < dist_thre)[0]: # each gt match to all the pred box within some radius
+            idx_pred = nearest_indices[idx_gts]  # each gt only match to the nearest pred box
+            if bev_dist[idx_gts, idx_pred] <= dist_thre:
+                if bev_dist[idx_gts, idx_pred] < assigned_gt_vals[idx_pred]:  # if this pred box is assigned, then compare
+                    assigned_gt_vals[idx_pred] = bev_dist[idx_gts, idx_pred]
+                    assigned_gt_inds[idx_pred] = idx_gts + 1  # for AssignResult, 0 is negative, -1 is ignore, 1-based indices are positive
+                    assigned_gt_labels[idx_pred] = gt_labels[idx_gts]
+        max_overlaps = torch.zeros([num_bboxes, ]).to(bboxes)
+        matched_indices = torch.where(assigned_gt_inds > 0)
+        matched_iou = self.iou_calculator(gt_bboxes[assigned_gt_inds[matched_indices].long() - 1], bboxes[matched_indices]).diag()
+        max_overlaps[matched_indices] = matched_iou
+        return AssignResult(
+            num_gts, assigned_gt_inds.long(), max_overlaps, labels=assigned_gt_labels
+        )
+@BBOX_ASSIGNERS.register_module()
+class HungarianAssigner3D(BaseAssigner):
+    def __init__(self,
+                 cls_cost=dict(type='ClassificationCost', weight=1.),
+                 reg_cost=dict(type='BBoxBEVL1Cost', weight=1.0),
+                 iou_cost=dict(type='IoU3DCost', weight=1.0),
+                 iou_calculator=dict(type='BboxOverlaps3D'),
+                 ):
+        self.cls_cost = build_match_cost(cls_cost)
+        self.reg_cost = build_match_cost(reg_cost)
+        self.iou_cost = build_match_cost(iou_cost)
+        self.iou_calculator = build_iou_calculator(iou_calculator)
+    def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, train_cfg):
+        num_gts, num_bboxes = gt_bboxes.size(0), bboxes.size(0)
+        # 1. assign -1 by default
+        assigned_gt_inds = bboxes.new_full((num_bboxes,),
+                                           -1,
+                                           dtype=torch.long)
+        assigned_labels = bboxes.new_full((num_bboxes,),
+                                          -1,
+                                          dtype=torch.long)
+        if num_gts == 0 or num_bboxes == 0:
+            # No ground truth or boxes, return empty assignment
+            if num_gts == 0:
+                # No ground truth, assign all to background
+                assigned_gt_inds[:] = 0
+            return AssignResult(
+                num_gts, assigned_gt_inds, None, labels=assigned_labels)
+        # 2. compute the weighted costs
+        # see mmdetection/mmdet/core/bbox/match_costs/match_cost.py
+        cls_cost = self.cls_cost(cls_pred[0].T, gt_labels)
+        reg_cost = self.reg_cost(bboxes, gt_bboxes, train_cfg)
+        iou = self.iou_calculator(bboxes, gt_bboxes)
+        iou_cost = self.iou_cost(iou)
+        # weighted sum of above three costs
+        cost = cls_cost + reg_cost + iou_cost
+        # 3. do Hungarian matching on CPU using linear_sum_assignment
+        cost = cost.detach().cpu()
+        if linear_sum_assignment is None:
+            raise ImportError('Please run "pip install scipy" '
+                              'to install scipy first.')
+        try:
+            matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
+        except:
+            assigned_gt_inds[:] = 0
+            return AssignResult(
+                num_gts, assigned_gt_inds, None, labels=assigned_labels)
+        matched_row_inds = torch.from_numpy(matched_row_inds).to(bboxes.device)
+        matched_col_inds = torch.from_numpy(matched_col_inds).to(bboxes.device)
+        # 4. assign backgrounds and foregrounds
+        # assign all indices to backgrounds first
+        assigned_gt_inds[:] = 0
+        # assign foregrounds based on matching results
+        assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
+        assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
+        max_overlaps = torch.zeros_like(iou.max(1).values)
+        max_overlaps[matched_row_inds] = iou[matched_row_inds, matched_col_inds]
+        # max_overlaps = iou.max(1).values
+        return AssignResult(
+            num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels)

det_map/det/dal/mmdet3d/core/bbox/box_np_ops.py ADDED Viewed

	@@ -0,0 +1,827 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# TODO: clean the functions in this file and move the APIs into box structures
+# in the future
+# NOTICE: All functions in this file are valid for LiDAR or depth boxes only
+# if we use default parameters.
+import numba
+import numpy as np
+from .structures.utils import limit_period, points_cam2img, rotation_3d_in_axis
+def camera_to_lidar(points, r_rect, velo2cam):
+    """Convert points in camera coordinate to lidar coordinate.
+    Note:
+        This function is for KITTI only.
+    Args:
+        points (np.ndarray, shape=[N, 3]): Points in camera coordinate.
+        r_rect (np.ndarray, shape=[4, 4]): Matrix to project points in
+            specific camera coordinate (e.g. CAM2) to CAM0.
+        velo2cam (np.ndarray, shape=[4, 4]): Matrix to project points in
+            camera coordinate to lidar coordinate.
+    Returns:
+        np.ndarray, shape=[N, 3]: Points in lidar coordinate.
+    """
+    points_shape = list(points.shape[0:-1])
+    if points.shape[-1] == 3:
+        points = np.concatenate([points, np.ones(points_shape + [1])], axis=-1)
+    lidar_points = points @ np.linalg.inv((r_rect @ velo2cam).T)
+    return lidar_points[..., :3]
+def box_camera_to_lidar(data, r_rect, velo2cam):
+    """Convert boxes in camera coordinate to lidar coordinate.
+    Note:
+        This function is for KITTI only.
+    Args:
+        data (np.ndarray, shape=[N, 7]): Boxes in camera coordinate.
+        r_rect (np.ndarray, shape=[4, 4]): Matrix to project points in
+            specific camera coordinate (e.g. CAM2) to CAM0.
+        velo2cam (np.ndarray, shape=[4, 4]): Matrix to project points in
+            camera coordinate to lidar coordinate.
+    Returns:
+        np.ndarray, shape=[N, 3]: Boxes in lidar coordinate.
+    """
+    xyz = data[:, 0:3]
+    x_size, y_size, z_size = data[:, 3:4], data[:, 4:5], data[:, 5:6]
+    r = data[:, 6:7]
+    xyz_lidar = camera_to_lidar(xyz, r_rect, velo2cam)
+    # yaw and dims also needs to be converted
+    r_new = -r - np.pi / 2
+    r_new = limit_period(r_new, period=np.pi * 2)
+    return np.concatenate([xyz_lidar, x_size, z_size, y_size, r_new], axis=1)
+def corners_nd(dims, origin=0.5):
+    """Generate relative box corners based on length per dim and origin point.
+    Args:
+        dims (np.ndarray, shape=[N, ndim]): Array of length per dim
+        origin (list or array or float, optional): origin point relate to
+            smallest point. Defaults to 0.5
+    Returns:
+        np.ndarray, shape=[N, 2 ** ndim, ndim]: Returned corners.
+        point layout example: (2d) x0y0, x0y1, x1y0, x1y1;
+            (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1
+            where x0 < x1, y0 < y1, z0 < z1.
+    """
+    ndim = int(dims.shape[1])
+    corners_norm = np.stack(
+        np.unravel_index(np.arange(2**ndim), [2] * ndim),
+        axis=1).astype(dims.dtype)
+    # now corners_norm has format: (2d) x0y0, x0y1, x1y0, x1y1
+    # (3d) x0y0z0, x0y0z1, x0y1z0, x0y1z1, x1y0z0, x1y0z1, x1y1z0, x1y1z1
+    # so need to convert to a format which is convenient to do other computing.
+    # for 2d boxes, format is clockwise start with minimum point
+    # for 3d boxes, please draw lines by your hand.
+    if ndim == 2:
+        # generate clockwise box corners
+        corners_norm = corners_norm[[0, 1, 3, 2]]
+    elif ndim == 3:
+        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
+    corners_norm = corners_norm - np.array(origin, dtype=dims.dtype)
+    corners = dims.reshape([-1, 1, ndim]) * corners_norm.reshape(
+        [1, 2**ndim, ndim])
+    return corners
+def center_to_corner_box2d(centers, dims, angles=None, origin=0.5):
+    """Convert kitti locations, dimensions and angles to corners.
+    format: center(xy), dims(xy), angles(counterclockwise when positive)
+    Args:
+        centers (np.ndarray): Locations in kitti label file with shape (N, 2).
+        dims (np.ndarray): Dimensions in kitti label file with shape (N, 2).
+        angles (np.ndarray, optional): Rotation_y in kitti label file with
+            shape (N). Defaults to None.
+        origin (list or array or float, optional): origin point relate to
+            smallest point. Defaults to 0.5.
+    Returns:
+        np.ndarray: Corners with the shape of (N, 4, 2).
+    """
+    # 'length' in kitti format is in x axis.
+    # xyz(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar)
+    # center in kitti format is [0.5, 1.0, 0.5] in xyz.
+    corners = corners_nd(dims, origin=origin)
+    # corners: [N, 4, 2]
+    if angles is not None:
+        corners = rotation_3d_in_axis(corners, angles)
+    corners += centers.reshape([-1, 1, 2])
+    return corners
+@numba.jit(nopython=True)
+def depth_to_points(depth, trunc_pixel):
+    """Convert depth map to points.
+    Args:
+        depth (np.array, shape=[H, W]): Depth map which
+            the row of [0~`trunc_pixel`] are truncated.
+        trunc_pixel (int): The number of truncated row.
+    Returns:
+        np.ndarray: Points in camera coordinates.
+    """
+    num_pts = np.sum(depth[trunc_pixel:, ] > 0.1)
+    points = np.zeros((num_pts, 3), dtype=depth.dtype)
+    x = np.array([0, 0, 1], dtype=depth.dtype)
+    k = 0
+    for i in range(trunc_pixel, depth.shape[0]):
+        for j in range(depth.shape[1]):
+            if depth[i, j] > 0.1:
+                x = np.array([j, i, 1], dtype=depth.dtype)
+                points[k] = x * depth[i, j]
+                k += 1
+    return points
+def depth_to_lidar_points(depth, trunc_pixel, P2, r_rect, velo2cam):
+    """Convert depth map to points in lidar coordinate.
+    Args:
+        depth (np.array, shape=[H, W]): Depth map which
+            the row of [0~`trunc_pixel`] are truncated.
+        trunc_pixel (int): The number of truncated row.
+        P2 (p.array, shape=[4, 4]): Intrinsics of Camera2.
+        r_rect (np.ndarray, shape=[4, 4]): Matrix to project points in
+            specific camera coordinate (e.g. CAM2) to CAM0.
+        velo2cam (np.ndarray, shape=[4, 4]): Matrix to project points in
+            camera coordinate to lidar coordinate.
+    Returns:
+        np.ndarray: Points in lidar coordinates.
+    """
+    pts = depth_to_points(depth, trunc_pixel)
+    points_shape = list(pts.shape[0:-1])
+    points = np.concatenate([pts, np.ones(points_shape + [1])], axis=-1)
+    points = points @ np.linalg.inv(P2.T)
+    lidar_points = camera_to_lidar(points, r_rect, velo2cam)
+    return lidar_points
+def center_to_corner_box3d(centers,
+                           dims,
+                           angles=None,
+                           origin=(0.5, 1.0, 0.5),
+                           axis=1):
+    """Convert kitti locations, dimensions and angles to corners.
+    Args:
+        centers (np.ndarray): Locations in kitti label file with shape (N, 3).
+        dims (np.ndarray): Dimensions in kitti label file with shape (N, 3).
+        angles (np.ndarray, optional): Rotation_y in kitti label file with
+            shape (N). Defaults to None.
+        origin (list or array or float, optional): Origin point relate to
+            smallest point. Use (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0)
+            in lidar. Defaults to (0.5, 1.0, 0.5).
+        axis (int, optional): Rotation axis. 1 for camera and 2 for lidar.
+            Defaults to 1.
+    Returns:
+        np.ndarray: Corners with the shape of (N, 8, 3).
+    """
+    # 'length' in kitti format is in x axis.
+    # yzx(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(lwh)(lidar)
+    # center in kitti format is [0.5, 1.0, 0.5] in xyz.
+    corners = corners_nd(dims, origin=origin)
+    # corners: [N, 8, 3]
+    if angles is not None:
+        corners = rotation_3d_in_axis(corners, angles, axis=axis)
+    corners += centers.reshape([-1, 1, 3])
+    return corners
+@numba.jit(nopython=True)
+def box2d_to_corner_jit(boxes):
+    """Convert box2d to corner.
+    Args:
+        boxes (np.ndarray, shape=[N, 5]): Boxes2d with rotation.
+    Returns:
+        box_corners (np.ndarray, shape=[N, 4, 2]): Box corners.
+    """
+    num_box = boxes.shape[0]
+    corners_norm = np.zeros((4, 2), dtype=boxes.dtype)
+    corners_norm[1, 1] = 1.0
+    corners_norm[2] = 1.0
+    corners_norm[3, 0] = 1.0
+    corners_norm -= np.array([0.5, 0.5], dtype=boxes.dtype)
+    corners = boxes.reshape(num_box, 1, 5)[:, :, 2:4] * corners_norm.reshape(
+        1, 4, 2)
+    rot_mat_T = np.zeros((2, 2), dtype=boxes.dtype)
+    box_corners = np.zeros((num_box, 4, 2), dtype=boxes.dtype)
+    for i in range(num_box):
+        rot_sin = np.sin(boxes[i, -1])
+        rot_cos = np.cos(boxes[i, -1])
+        rot_mat_T[0, 0] = rot_cos
+        rot_mat_T[0, 1] = rot_sin
+        rot_mat_T[1, 0] = -rot_sin
+        rot_mat_T[1, 1] = rot_cos
+        box_corners[i] = corners[i] @ rot_mat_T + boxes[i, :2]
+    return box_corners
+@numba.njit
+def corner_to_standup_nd_jit(boxes_corner):
+    """Convert boxes_corner to aligned (min-max) boxes.
+    Args:
+        boxes_corner (np.ndarray, shape=[N, 2**dim, dim]): Boxes corners.
+    Returns:
+        np.ndarray, shape=[N, dim*2]: Aligned (min-max) boxes.
+    """
+    num_boxes = boxes_corner.shape[0]
+    ndim = boxes_corner.shape[-1]
+    result = np.zeros((num_boxes, ndim * 2), dtype=boxes_corner.dtype)
+    for i in range(num_boxes):
+        for j in range(ndim):
+            result[i, j] = np.min(boxes_corner[i, :, j])
+        for j in range(ndim):
+            result[i, j + ndim] = np.max(boxes_corner[i, :, j])
+    return result
+@numba.jit(nopython=True)
+def corner_to_surfaces_3d_jit(corners):
+    """Convert 3d box corners from corner function above to surfaces that
+    normal vectors all direct to internal.
+    Args:
+        corners (np.ndarray): 3d box corners with the shape of (N, 8, 3).
+    Returns:
+        np.ndarray: Surfaces with the shape of (N, 6, 4, 3).
+    """
+    # box_corners: [N, 8, 3], must from corner functions in this module
+    num_boxes = corners.shape[0]
+    surfaces = np.zeros((num_boxes, 6, 4, 3), dtype=corners.dtype)
+    corner_idxes = np.array([
+        0, 1, 2, 3, 7, 6, 5, 4, 0, 3, 7, 4, 1, 5, 6, 2, 0, 4, 5, 1, 3, 2, 6, 7
+    ]).reshape(6, 4)
+    for i in range(num_boxes):
+        for j in range(6):
+            for k in range(4):
+                surfaces[i, j, k] = corners[i, corner_idxes[j, k]]
+    return surfaces
+def rotation_points_single_angle(points, angle, axis=0):
+    """Rotate points with a single angle.
+    Args:
+        points (np.ndarray, shape=[N, 3]]):
+        angle (np.ndarray, shape=[1]]):
+        axis (int, optional): Axis to rotate at. Defaults to 0.
+    Returns:
+        np.ndarray: Rotated points.
+    """
+    # points: [N, 3]
+    rot_sin = np.sin(angle)
+    rot_cos = np.cos(angle)
+    if axis == 1:
+        rot_mat_T = np.array(
+            [[rot_cos, 0, rot_sin], [0, 1, 0], [-rot_sin, 0, rot_cos]],
+            dtype=points.dtype)
+    elif axis == 2 or axis == -1:
+        rot_mat_T = np.array(
+            [[rot_cos, rot_sin, 0], [-rot_sin, rot_cos, 0], [0, 0, 1]],
+            dtype=points.dtype)
+    elif axis == 0:
+        rot_mat_T = np.array(
+            [[1, 0, 0], [0, rot_cos, rot_sin], [0, -rot_sin, rot_cos]],
+            dtype=points.dtype)
+    else:
+        raise ValueError('axis should in range')
+    return points @ rot_mat_T, rot_mat_T
+def box3d_to_bbox(box3d, P2):
+    """Convert box3d in camera coordinates to bbox in image coordinates.
+    Args:
+        box3d (np.ndarray, shape=[N, 7]): Boxes in camera coordinate.
+        P2 (np.array, shape=[4, 4]): Intrinsics of Camera2.
+    Returns:
+        np.ndarray, shape=[N, 4]: Boxes 2d in image coordinates.
+    """
+    box_corners = center_to_corner_box3d(
+        box3d[:, :3], box3d[:, 3:6], box3d[:, 6], [0.5, 1.0, 0.5], axis=1)
+    box_corners_in_image = points_cam2img(box_corners, P2)
+    # box_corners_in_image: [N, 8, 2]
+    minxy = np.min(box_corners_in_image, axis=1)
+    maxxy = np.max(box_corners_in_image, axis=1)
+    bbox = np.concatenate([minxy, maxxy], axis=1)
+    return bbox
+def corner_to_surfaces_3d(corners):
+    """convert 3d box corners from corner function above to surfaces that
+    normal vectors all direct to internal.
+    Args:
+        corners (np.ndarray): 3D box corners with shape of (N, 8, 3).
+    Returns:
+        np.ndarray: Surfaces with the shape of (N, 6, 4, 3).
+    """
+    # box_corners: [N, 8, 3], must from corner functions in this module
+    surfaces = np.array([
+        [corners[:, 0], corners[:, 1], corners[:, 2], corners[:, 3]],
+        [corners[:, 7], corners[:, 6], corners[:, 5], corners[:, 4]],
+        [corners[:, 0], corners[:, 3], corners[:, 7], corners[:, 4]],
+        [corners[:, 1], corners[:, 5], corners[:, 6], corners[:, 2]],
+        [corners[:, 0], corners[:, 4], corners[:, 5], corners[:, 1]],
+        [corners[:, 3], corners[:, 2], corners[:, 6], corners[:, 7]],
+    ]).transpose([2, 0, 1, 3])
+    return surfaces
+def points_in_rbbox(points, rbbox, z_axis=2, origin=(0.5, 0.5, 0)):
+    """Check points in rotated bbox and return indices.
+    Note:
+        This function is for counterclockwise boxes.
+    Args:
+        points (np.ndarray, shape=[N, 3+dim]): Points to query.
+        rbbox (np.ndarray, shape=[M, 7]): Boxes3d with rotation.
+        z_axis (int, optional): Indicate which axis is height.
+            Defaults to 2.
+        origin (tuple[int], optional): Indicate the position of
+            box center. Defaults to (0.5, 0.5, 0).
+    Returns:
+        np.ndarray, shape=[N, M]: Indices of points in each box.
+    """
+    # TODO: this function is different from PointCloud3D, be careful
+    # when start to use nuscene, check the input
+    rbbox_corners = center_to_corner_box3d(
+        rbbox[:, :3], rbbox[:, 3:6], rbbox[:, 6], origin=origin, axis=z_axis)
+    surfaces = corner_to_surfaces_3d(rbbox_corners)
+    indices = points_in_convex_polygon_3d_jit(points[:, :3], surfaces)
+    return indices
+def minmax_to_corner_2d(minmax_box):
+    """Convert minmax box to corners2d.
+    Args:
+        minmax_box (np.ndarray, shape=[N, dims]): minmax boxes.
+    Returns:
+        np.ndarray: 2d corners of boxes
+    """
+    ndim = minmax_box.shape[-1] // 2
+    center = minmax_box[..., :ndim]
+    dims = minmax_box[..., ndim:] - center
+    return center_to_corner_box2d(center, dims, origin=0.0)
+def create_anchors_3d_range(feature_size,
+                            anchor_range,
+                            sizes=((3.9, 1.6, 1.56), ),
+                            rotations=(0, np.pi / 2),
+                            dtype=np.float32):
+    """Create anchors 3d by range.
+    Args:
+        feature_size (list[float] | tuple[float]): Feature map size. It is
+            either a list of a tuple of [D, H, W](in order of z, y, and x).
+        anchor_range (torch.Tensor | list[float]): Range of anchors with
+            shape [6]. The order is consistent with that of anchors, i.e.,
+            (x_min, y_min, z_min, x_max, y_max, z_max).
+        sizes (list[list] | np.ndarray | torch.Tensor, optional):
+            Anchor size with shape [N, 3], in order of x, y, z.
+            Defaults to ((3.9, 1.6, 1.56), ).
+        rotations (list[float] | np.ndarray | torch.Tensor, optional):
+            Rotations of anchors in a single feature grid.
+            Defaults to (0, np.pi / 2).
+        dtype (type, optional): Data type. Defaults to np.float32.
+    Returns:
+        np.ndarray: Range based anchors with shape of
+            (*feature_size, num_sizes, num_rots, 7).
+    """
+    anchor_range = np.array(anchor_range, dtype)
+    z_centers = np.linspace(
+        anchor_range[2], anchor_range[5], feature_size[0], dtype=dtype)
+    y_centers = np.linspace(
+        anchor_range[1], anchor_range[4], feature_size[1], dtype=dtype)
+    x_centers = np.linspace(
+        anchor_range[0], anchor_range[3], feature_size[2], dtype=dtype)
+    sizes = np.reshape(np.array(sizes, dtype=dtype), [-1, 3])
+    rotations = np.array(rotations, dtype=dtype)
+    rets = np.meshgrid(
+        x_centers, y_centers, z_centers, rotations, indexing='ij')
+    tile_shape = [1] * 5
+    tile_shape[-2] = int(sizes.shape[0])
+    for i in range(len(rets)):
+        rets[i] = np.tile(rets[i][..., np.newaxis, :], tile_shape)
+        rets[i] = rets[i][..., np.newaxis]  # for concat
+    sizes = np.reshape(sizes, [1, 1, 1, -1, 1, 3])
+    tile_size_shape = list(rets[0].shape)
+    tile_size_shape[3] = 1
+    sizes = np.tile(sizes, tile_size_shape)
+    rets.insert(3, sizes)
+    ret = np.concatenate(rets, axis=-1)
+    return np.transpose(ret, [2, 1, 0, 3, 4, 5])
+def center_to_minmax_2d(centers, dims, origin=0.5):
+    """Center to minmax.
+    Args:
+        centers (np.ndarray): Center points.
+        dims (np.ndarray): Dimensions.
+        origin (list or array or float, optional): Origin point relate
+            to smallest point. Defaults to 0.5.
+    Returns:
+        np.ndarray: Minmax points.
+    """
+    if origin == 0.5:
+        return np.concatenate([centers - dims / 2, centers + dims / 2],
+                              axis=-1)
+    corners = center_to_corner_box2d(centers, dims, origin=origin)
+    return corners[:, [0, 2]].reshape([-1, 4])
+def rbbox2d_to_near_bbox(rbboxes):
+    """convert rotated bbox to nearest 'standing' or 'lying' bbox.
+    Args:
+        rbboxes (np.ndarray): Rotated bboxes with shape of
+            (N, 5(x, y, xdim, ydim, rad)).
+    Returns:
+        np.ndarray: Bounding boxes with the shape of
+            (N, 4(xmin, ymin, xmax, ymax)).
+    """
+    rots = rbboxes[..., -1]
+    rots_0_pi_div_2 = np.abs(limit_period(rots, 0.5, np.pi))
+    cond = (rots_0_pi_div_2 > np.pi / 4)[..., np.newaxis]
+    bboxes_center = np.where(cond, rbboxes[:, [0, 1, 3, 2]], rbboxes[:, :4])
+    bboxes = center_to_minmax_2d(bboxes_center[:, :2], bboxes_center[:, 2:])
+    return bboxes
+@numba.jit(nopython=True)
+def iou_jit(boxes, query_boxes, mode='iou', eps=0.0):
+    """Calculate box iou. Note that jit version runs ~10x faster than the
+    box_overlaps function in mmdet3d.core.evaluation.
+    Note:
+        This function is for counterclockwise boxes.
+    Args:
+        boxes (np.ndarray): Input bounding boxes with shape of (N, 4).
+        query_boxes (np.ndarray): Query boxes with shape of (K, 4).
+        mode (str, optional): IoU mode. Defaults to 'iou'.
+        eps (float, optional): Value added to denominator. Defaults to 0.
+    Returns:
+        np.ndarray: Overlap between boxes and query_boxes
+            with the shape of [N, K].
+    """
+    N = boxes.shape[0]
+    K = query_boxes.shape[0]
+    overlaps = np.zeros((N, K), dtype=boxes.dtype)
+    for k in range(K):
+        box_area = ((query_boxes[k, 2] - query_boxes[k, 0] + eps) *
+                    (query_boxes[k, 3] - query_boxes[k, 1] + eps))
+        for n in range(N):
+            iw = (
+                min(boxes[n, 2], query_boxes[k, 2]) -
+                max(boxes[n, 0], query_boxes[k, 0]) + eps)
+            if iw > 0:
+                ih = (
+                    min(boxes[n, 3], query_boxes[k, 3]) -
+                    max(boxes[n, 1], query_boxes[k, 1]) + eps)
+                if ih > 0:
+                    if mode == 'iou':
+                        ua = ((boxes[n, 2] - boxes[n, 0] + eps) *
+                              (boxes[n, 3] - boxes[n, 1] + eps) + box_area -
+                              iw * ih)
+                    else:
+                        ua = ((boxes[n, 2] - boxes[n, 0] + eps) *
+                              (boxes[n, 3] - boxes[n, 1] + eps))
+                    overlaps[n, k] = iw * ih / ua
+    return overlaps
+def projection_matrix_to_CRT_kitti(proj):
+    """Split projection matrix of KITTI.
+    Note:
+        This function is for KITTI only.
+    P = C @ [R|T]
+    C is upper triangular matrix, so we need to inverse CR and use QR
+    stable for all kitti camera projection matrix.
+    Args:
+        proj (p.array, shape=[4, 4]): Intrinsics of camera.
+    Returns:
+        tuple[np.ndarray]: Splited matrix of C, R and T.
+    """
+    CR = proj[0:3, 0:3]
+    CT = proj[0:3, 3]
+    RinvCinv = np.linalg.inv(CR)
+    Rinv, Cinv = np.linalg.qr(RinvCinv)
+    C = np.linalg.inv(Cinv)
+    R = np.linalg.inv(Rinv)
+    T = Cinv @ CT
+    return C, R, T
+def remove_outside_points(points, rect, Trv2c, P2, image_shape):
+    """Remove points which are outside of image.
+    Note:
+        This function is for KITTI only.
+    Args:
+        points (np.ndarray, shape=[N, 3+dims]): Total points.
+        rect (np.ndarray, shape=[4, 4]): Matrix to project points in
+            specific camera coordinate (e.g. CAM2) to CAM0.
+        Trv2c (np.ndarray, shape=[4, 4]): Matrix to project points in
+            camera coordinate to lidar coordinate.
+        P2 (p.array, shape=[4, 4]): Intrinsics of Camera2.
+        image_shape (list[int]): Shape of image.
+    Returns:
+        np.ndarray, shape=[N, 3+dims]: Filtered points.
+    """
+    # 5x faster than remove_outside_points_v1(2ms vs 10ms)
+    C, R, T = projection_matrix_to_CRT_kitti(P2)
+    image_bbox = [0, 0, image_shape[1], image_shape[0]]
+    frustum = get_frustum(image_bbox, C)
+    frustum -= T
+    frustum = np.linalg.inv(R) @ frustum.T
+    frustum = camera_to_lidar(frustum.T, rect, Trv2c)
+    frustum_surfaces = corner_to_surfaces_3d_jit(frustum[np.newaxis, ...])
+    indices = points_in_convex_polygon_3d_jit(points[:, :3], frustum_surfaces)
+    points = points[indices.reshape([-1])]
+    return points
+def get_frustum(bbox_image, C, near_clip=0.001, far_clip=100):
+    """Get frustum corners in camera coordinates.
+    Args:
+        bbox_image (list[int]): box in image coordinates.
+        C (np.ndarray): Intrinsics.
+        near_clip (float, optional): Nearest distance of frustum.
+            Defaults to 0.001.
+        far_clip (float, optional): Farthest distance of frustum.
+            Defaults to 100.
+    Returns:
+        np.ndarray, shape=[8, 3]: coordinates of frustum corners.
+    """
+    fku = C[0, 0]
+    fkv = -C[1, 1]
+    u0v0 = C[0:2, 2]
+    z_points = np.array(
+        [near_clip] * 4 + [far_clip] * 4, dtype=C.dtype)[:, np.newaxis]
+    b = bbox_image
+    box_corners = np.array(
+        [[b[0], b[1]], [b[0], b[3]], [b[2], b[3]], [b[2], b[1]]],
+        dtype=C.dtype)
+    near_box_corners = (box_corners - u0v0) / np.array(
+        [fku / near_clip, -fkv / near_clip], dtype=C.dtype)
+    far_box_corners = (box_corners - u0v0) / np.array(
+        [fku / far_clip, -fkv / far_clip], dtype=C.dtype)
+    ret_xy = np.concatenate([near_box_corners, far_box_corners],
+                            axis=0)  # [8, 2]
+    ret_xyz = np.concatenate([ret_xy, z_points], axis=1)
+    return ret_xyz
+def surface_equ_3d(polygon_surfaces):
+    """
+    Args:
+        polygon_surfaces (np.ndarray): Polygon surfaces with shape of
+            [num_polygon, max_num_surfaces, max_num_points_of_surface, 3].
+            All surfaces' normal vector must direct to internal.
+            Max_num_points_of_surface must at least 3.
+    Returns:
+        tuple: normal vector and its direction.
+    """
+    # return [a, b, c], d in ax+by+cz+d=0
+    # polygon_surfaces: [num_polygon, num_surfaces, num_points_of_polygon, 3]
+    surface_vec = polygon_surfaces[:, :, :2, :] - \
+        polygon_surfaces[:, :, 1:3, :]
+    # normal_vec: [..., 3]
+    normal_vec = np.cross(surface_vec[:, :, 0, :], surface_vec[:, :, 1, :])
+    # print(normal_vec.shape, points[..., 0, :].shape)
+    # d = -np.inner(normal_vec, points[..., 0, :])
+    d = np.einsum('aij, aij->ai', normal_vec, polygon_surfaces[:, :, 0, :])
+    return normal_vec, -d
+@numba.njit
+def _points_in_convex_polygon_3d_jit(points, polygon_surfaces, normal_vec, d,
+                                     num_surfaces):
+    """
+    Args:
+        points (np.ndarray): Input points with shape of (num_points, 3).
+        polygon_surfaces (np.ndarray): Polygon surfaces with shape of
+            (num_polygon, max_num_surfaces, max_num_points_of_surface, 3).
+            All surfaces' normal vector must direct to internal.
+            Max_num_points_of_surface must at least 3.
+        normal_vec (np.ndarray): Normal vector of polygon_surfaces.
+        d (int): Directions of normal vector.
+        num_surfaces (np.ndarray): Number of surfaces a polygon contains
+            shape of (num_polygon).
+    Returns:
+        np.ndarray: Result matrix with the shape of [num_points, num_polygon].
+    """
+    max_num_surfaces, max_num_points_of_surface = polygon_surfaces.shape[1:3]
+    num_points = points.shape[0]
+    num_polygons = polygon_surfaces.shape[0]
+    ret = np.ones((num_points, num_polygons), dtype=np.bool_)
+    sign = 0.0
+    for i in range(num_points):
+        for j in range(num_polygons):
+            for k in range(max_num_surfaces):
+                if k > num_surfaces[j]:
+                    break
+                sign = (
+                    points[i, 0] * normal_vec[j, k, 0] +
+                    points[i, 1] * normal_vec[j, k, 1] +
+                    points[i, 2] * normal_vec[j, k, 2] + d[j, k])
+                if sign >= 0:
+                    ret[i, j] = False
+                    break
+    return ret
+def points_in_convex_polygon_3d_jit(points,
+                                    polygon_surfaces,
+                                    num_surfaces=None):
+    """Check points is in 3d convex polygons.
+    Args:
+        points (np.ndarray): Input points with shape of (num_points, 3).
+        polygon_surfaces (np.ndarray): Polygon surfaces with shape of
+            (num_polygon, max_num_surfaces, max_num_points_of_surface, 3).
+            All surfaces' normal vector must direct to internal.
+            Max_num_points_of_surface must at least 3.
+        num_surfaces (np.ndarray, optional): Number of surfaces a polygon
+            contains shape of (num_polygon). Defaults to None.
+    Returns:
+        np.ndarray: Result matrix with the shape of [num_points, num_polygon].
+    """
+    max_num_surfaces, max_num_points_of_surface = polygon_surfaces.shape[1:3]
+    # num_points = points.shape[0]
+    num_polygons = polygon_surfaces.shape[0]
+    if num_surfaces is None:
+        num_surfaces = np.full((num_polygons, ), 9999999, dtype=np.int64)
+    normal_vec, d = surface_equ_3d(polygon_surfaces[:, :, :3, :])
+    # normal_vec: [num_polygon, max_num_surfaces, 3]
+    # d: [num_polygon, max_num_surfaces]
+    return _points_in_convex_polygon_3d_jit(points, polygon_surfaces,
+                                            normal_vec, d, num_surfaces)
+@numba.njit
+def points_in_convex_polygon_jit(points, polygon, clockwise=False):
+    """Check points is in 2d convex polygons. True when point in polygon.
+    Args:
+        points (np.ndarray): Input points with the shape of [num_points, 2].
+        polygon (np.ndarray): Input polygon with the shape of
+            [num_polygon, num_points_of_polygon, 2].
+        clockwise (bool, optional): Indicate polygon is clockwise. Defaults
+            to True.
+    Returns:
+        np.ndarray: Result matrix with the shape of [num_points, num_polygon].
+    """
+    # first convert polygon to directed lines
+    num_points_of_polygon = polygon.shape[1]
+    num_points = points.shape[0]
+    num_polygons = polygon.shape[0]
+    # vec for all the polygons
+    if clockwise:
+        vec1 = polygon - polygon[:,
+                                 np.array([num_points_of_polygon - 1] + list(
+                                     range(num_points_of_polygon - 1))), :]
+    else:
+        vec1 = polygon[:,
+                       np.array([num_points_of_polygon - 1] +
+                                list(range(num_points_of_polygon -
+                                           1))), :] - polygon
+    ret = np.zeros((num_points, num_polygons), dtype=np.bool_)
+    success = True
+    cross = 0.0
+    for i in range(num_points):
+        for j in range(num_polygons):
+            success = True
+            for k in range(num_points_of_polygon):
+                vec = vec1[j, k]
+                cross = vec[1] * (polygon[j, k, 0] - points[i, 0])
+                cross -= vec[0] * (polygon[j, k, 1] - points[i, 1])
+                if cross >= 0:
+                    success = False
+                    break
+            ret[i, j] = success
+    return ret
+def boxes3d_to_corners3d_lidar(boxes3d, bottom_center=True):
+    """Convert kitti center boxes to corners.
+        7 -------- 4
+       /|         /|
+      6 -------- 5 .
+      | |        | |
+      . 3 -------- 0
+      |/         |/
+      2 -------- 1
+    Note:
+        This function is for LiDAR boxes only.
+    Args:
+        boxes3d (np.ndarray): Boxes with shape of (N, 7)
+            [x, y, z, x_size, y_size, z_size, ry] in LiDAR coords,
+            see the definition of ry in KITTI dataset.
+        bottom_center (bool, optional): Whether z is on the bottom center
+            of object. Defaults to True.
+    Returns:
+        np.ndarray: Box corners with the shape of [N, 8, 3].
+    """
+    boxes_num = boxes3d.shape[0]
+    x_size, y_size, z_size = boxes3d[:, 3], boxes3d[:, 4], boxes3d[:, 5]
+    x_corners = np.array([
+        x_size / 2., -x_size / 2., -x_size / 2., x_size / 2., x_size / 2.,
+        -x_size / 2., -x_size / 2., x_size / 2.
+    ],
+                         dtype=np.float32).T
+    y_corners = np.array([
+        -y_size / 2., -y_size / 2., y_size / 2., y_size / 2., -y_size / 2.,
+        -y_size / 2., y_size / 2., y_size / 2.
+    ],
+                         dtype=np.float32).T
+    if bottom_center:
+        z_corners = np.zeros((boxes_num, 8), dtype=np.float32)
+        z_corners[:, 4:8] = z_size.reshape(boxes_num, 1).repeat(
+            4, axis=1)  # (N, 8)
+    else:
+        z_corners = np.array([
+            -z_size / 2., -z_size / 2., -z_size / 2., -z_size / 2.,
+            z_size / 2., z_size / 2., z_size / 2., z_size / 2.
+        ],
+                             dtype=np.float32).T
+    ry = boxes3d[:, 6]
+    zeros, ones = np.zeros(
+        ry.size, dtype=np.float32), np.ones(
+            ry.size, dtype=np.float32)
+    rot_list = np.array([[np.cos(ry), np.sin(ry), zeros],
+                         [-np.sin(ry), np.cos(ry), zeros],
+                         [zeros, zeros, ones]])  # (3, 3, N)
+    R_list = np.transpose(rot_list, (2, 0, 1))  # (N, 3, 3)
+    temp_corners = np.concatenate((x_corners.reshape(
+        -1, 8, 1), y_corners.reshape(-1, 8, 1), z_corners.reshape(-1, 8, 1)),
+                                  axis=2)  # (N, 8, 3)
+    rotated_corners = np.matmul(temp_corners, R_list)  # (N, 8, 3)
+    x_corners = rotated_corners[:, :, 0]
+    y_corners = rotated_corners[:, :, 1]
+    z_corners = rotated_corners[:, :, 2]
+    x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2]
+    x = x_loc.reshape(-1, 1) + x_corners.reshape(-1, 8)
+    y = y_loc.reshape(-1, 1) + y_corners.reshape(-1, 8)
+    z = z_loc.reshape(-1, 1) + z_corners.reshape(-1, 8)
+    corners = np.concatenate(
+        (x.reshape(-1, 8, 1), y.reshape(-1, 8, 1), z.reshape(-1, 8, 1)),
+        axis=2)
+    return corners.astype(np.float32)

det_map/det/dal/mmdet3d/core/bbox/coders/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmdet.core.bbox import build_bbox_coder
+from .transfusion_bbox_coder import TransFusionBBoxCoder
+__all__ = [
+    'build_bbox_coder','TransFusionBBoxCoder'
+]

det_map/det/dal/mmdet3d/core/bbox/coders/transfusion_bbox_coder.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import torch
+from mmdet.core.bbox import BaseBBoxCoder
+from mmdet.core.bbox.builder import BBOX_CODERS
+@BBOX_CODERS.register_module()
+class TransFusionBBoxCoder(BaseBBoxCoder):
+    def __init__(self,
+                 pc_range,
+                 out_size_factor,
+                 voxel_size,
+                 post_center_range=None,
+                 score_threshold=None,
+                 code_size=8,
+                 ):
+        self.pc_range = pc_range
+        self.out_size_factor = out_size_factor
+        self.voxel_size = voxel_size
+        self.post_center_range = post_center_range
+        self.score_threshold = score_threshold
+        self.code_size = code_size
+    def encode(self, dst_boxes):
+        targets = torch.zeros([dst_boxes.shape[0], self.code_size]).to(dst_boxes.device)
+        targets[:, 0] = (dst_boxes[:, 0] - self.pc_range[0]) / (self.out_size_factor * self.voxel_size[0])
+        targets[:, 1] = (dst_boxes[:, 1] - self.pc_range[1]) / (self.out_size_factor * self.voxel_size[1])
+        # targets[:, 2] = (dst_boxes[:, 2] - self.post_center_range[2]) / (self.post_center_range[5] - self.post_center_range[2])
+        targets[:, 3] = dst_boxes[:, 3].log()
+        targets[:, 4] = dst_boxes[:, 4].log()
+        targets[:, 5] = dst_boxes[:, 5].log()
+        targets[:, 2] = dst_boxes[:, 2] + dst_boxes[:, 5] * 0.5  # bottom center to gravity center
+        targets[:, 6] = torch.sin(dst_boxes[:, 6])
+        targets[:, 7] = torch.cos(dst_boxes[:, 6])
+        if self.code_size == 10:
+            targets[:, 8:10] = dst_boxes[:, 7:]
+        return targets
+    def decode(self, heatmap, rot, dim, center, height, vel, filter=False):
+        """Decode bboxes.
+        Args:
+            heat (torch.Tensor): Heatmap with the shape of [B, num_cls, num_proposals].
+            rot (torch.Tensor): Rotation with the shape of
+                [B, 1, num_proposals].
+            dim (torch.Tensor): Dim of the boxes with the shape of
+                [B, 3, num_proposals].
+            center (torch.Tensor): bev center of the boxes with the shape of
+                [B, 2, num_proposals]. (in feature map metric)
+            hieght (torch.Tensor): height of the boxes with the shape of
+                [B, 2, num_proposals]. (in real world metric)
+            vel (torch.Tensor): Velocity with the shape of [B, 2, num_proposals].
+            filter: if False, return all box without checking score and center_range
+        Returns:
+            list[dict]: Decoded boxes.
+        """
+        # class label
+        final_preds = heatmap.max(1, keepdims=False).indices
+        final_scores = heatmap.max(1, keepdims=False).values
+        # change size to real world metric
+        center[:, 0, :] = center[:, 0, :] * self.out_size_factor * self.voxel_size[0] + self.pc_range[0]
+        center[:, 1, :] = center[:, 1, :] * self.out_size_factor * self.voxel_size[1] + self.pc_range[1]
+        # center[:, 2, :] = center[:, 2, :] * (self.post_center_range[5] - self.post_center_range[2]) + self.post_center_range[2]
+        dim[:, 0, :] = dim[:, 0, :].exp()
+        dim[:, 1, :] = dim[:, 1, :].exp()
+        dim[:, 2, :] = dim[:, 2, :].exp()
+        height = height - dim[:, 2:3, :] * 0.5  # gravity center to bottom center
+        rots, rotc = rot[:, 0:1, :], rot[:, 1:2, :]
+        rot = torch.atan2(rots, rotc)
+        if vel is None:
+            final_box_preds = torch.cat([center, height, dim, rot], dim=1).permute(0, 2, 1)
+        else:
+            final_box_preds = torch.cat([center, height, dim, rot, vel], dim=1).permute(0, 2, 1)
+        predictions_dicts = []
+        for i in range(heatmap.shape[0]):
+            boxes3d = final_box_preds[i]
+            scores = final_scores[i]
+            labels = final_preds[i]
+            predictions_dict = {
+                'bboxes': boxes3d,
+                'scores': scores,
+                'labels': labels
+            }
+            predictions_dicts.append(predictions_dict)
+        if filter is False:
+            return predictions_dicts
+        # use score threshold
+        if self.score_threshold is not None:
+            thresh_mask = final_scores > self.score_threshold
+        if self.post_center_range is not None:
+            self.post_center_range = torch.tensor(
+                self.post_center_range, device=heatmap.device)
+            mask = (final_box_preds[..., :3] >=
+                    self.post_center_range[:3]).all(2)
+            mask &= (final_box_preds[..., :3] <=
+                     self.post_center_range[3:]).all(2)
+            predictions_dicts = []
+            for i in range(heatmap.shape[0]):
+                cmask = mask[i, :]
+                if self.score_threshold:
+                    cmask &= thresh_mask[i]
+                boxes3d = final_box_preds[i, cmask]
+                scores = final_scores[i, cmask]
+                labels = final_preds[i, cmask]
+                predictions_dict = {
+                    'bboxes': boxes3d,
+                    'scores': scores,
+                    'labels': labels
+                }
+                predictions_dicts.append(predictions_dict)
+        else:
+            raise NotImplementedError(
+                'Need to reorganize output as a batch, only '
+                'support post_center_range is not None for now!')
+        return predictions_dicts

det_map/det/dal/mmdet3d/core/bbox/iou_calculators/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from .iou3d_calculator import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D,
+                               axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d,
+                               )
+__all__ = [
+    'BboxOverlaps3D',
+    'bbox_overlaps_3d', 'AxisAlignedBboxOverlaps3D',
+    'axis_aligned_bbox_overlaps_3d'
+]

det_map/det/dal/mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py ADDED Viewed

	@@ -0,0 +1,232 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+from mmdet.core.bbox import bbox_overlaps
+from mmdet.core.bbox.iou_calculators.builder import IOU_CALCULATORS
+from ..structures import get_box_type
+@IOU_CALCULATORS.register_module()
+class BboxOverlaps3D(object):
+    """3D IoU Calculator.
+    Args:
+        coordinate (str): The coordinate system, valid options are
+            'camera', 'lidar', and 'depth'.
+    """
+    def __init__(self, coordinate):
+        assert coordinate in ['camera', 'lidar', 'depth']
+        self.coordinate = coordinate
+    def __call__(self, bboxes1, bboxes2, mode='iou'):
+        """Calculate 3D IoU using cuda implementation.
+        Note:
+            This function calculate the IoU of 3D boxes based on their volumes.
+            IoU calculator ``:class:BboxOverlaps3D`` uses this function to
+            calculate the actual 3D IoUs of boxes.
+        Args:
+            bboxes1 (torch.Tensor): with shape (N, 7+C),
+                (x, y, z, x_size, y_size, z_size, ry, v*).
+            bboxes2 (torch.Tensor): with shape (M, 7+C),
+                (x, y, z, x_size, y_size, z_size, ry, v*).
+            mode (str): "iou" (intersection over union) or
+                iof (intersection over foreground).
+        Return:
+            torch.Tensor: Bbox overlaps results of bboxes1 and bboxes2
+                with shape (M, N) (aligned mode is not supported currently).
+        """
+        return bbox_overlaps_3d(bboxes1, bboxes2, mode, self.coordinate)
+    def __repr__(self):
+        """str: return a string that describes the module"""
+        repr_str = self.__class__.__name__
+        repr_str += f'(coordinate={self.coordinate}'
+        return repr_str
+def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'):
+    """Calculate 3D IoU using cuda implementation.
+    Note:
+        This function calculates the IoU of 3D boxes based on their volumes.
+        IoU calculator :class:`BboxOverlaps3D` uses this function to
+        calculate the actual IoUs of boxes.
+    Args:
+        bboxes1 (torch.Tensor): with shape (N, 7+C),
+            (x, y, z, x_size, y_size, z_size, ry, v*).
+        bboxes2 (torch.Tensor): with shape (M, 7+C),
+            (x, y, z, x_size, y_size, z_size, ry, v*).
+        mode (str): "iou" (intersection over union) or
+            iof (intersection over foreground).
+        coordinate (str): 'camera' or 'lidar' coordinate system.
+    Return:
+        torch.Tensor: Bbox overlaps results of bboxes1 and bboxes2
+            with shape (M, N) (aligned mode is not supported currently).
+    """
+    assert bboxes1.size(-1) == bboxes2.size(-1) >= 7
+    box_type, _ = get_box_type(coordinate)
+    bboxes1 = box_type(bboxes1, box_dim=bboxes1.shape[-1])
+    bboxes2 = box_type(bboxes2, box_dim=bboxes2.shape[-1])
+    return bboxes1.overlaps(bboxes1, bboxes2, mode=mode)
+@IOU_CALCULATORS.register_module()
+class AxisAlignedBboxOverlaps3D(object):
+    """Axis-aligned 3D Overlaps (IoU) Calculator."""
+    def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False):
+        """Calculate IoU between 2D bboxes.
+        Args:
+            bboxes1 (Tensor): shape (B, m, 6) in <x1, y1, z1, x2, y2, z2>
+                format or empty.
+            bboxes2 (Tensor): shape (B, n, 6) in <x1, y1, z1, x2, y2, z2>
+                format or empty.
+                B indicates the batch dim, in shape (B1, B2, ..., Bn).
+                If ``is_aligned`` is ``True``, then m and n must be equal.
+            mode (str): "iou" (intersection over union) or "giou" (generalized
+                intersection over union).
+            is_aligned (bool, optional): If True, then m and n must be equal.
+                Defaults to False.
+        Returns:
+            Tensor: shape (m, n) if ``is_aligned`` is False else shape (m,)
+        """
+        assert bboxes1.size(-1) == bboxes2.size(-1) == 6
+        return axis_aligned_bbox_overlaps_3d(bboxes1, bboxes2, mode,
+                                             is_aligned)
+    def __repr__(self):
+        """str: a string describing the module"""
+        repr_str = self.__class__.__name__ + '()'
+        return repr_str
+def axis_aligned_bbox_overlaps_3d(bboxes1,
+                                  bboxes2,
+                                  mode='iou',
+                                  is_aligned=False,
+                                  eps=1e-6):
+    """Calculate overlap between two set of axis aligned 3D bboxes. If
+    ``is_aligned`` is ``False``, then calculate the overlaps between each bbox
+    of bboxes1 and bboxes2, otherwise the overlaps between each aligned pair of
+    bboxes1 and bboxes2.
+    Args:
+        bboxes1 (Tensor): shape (B, m, 6) in <x1, y1, z1, x2, y2, z2>
+            format or empty.
+        bboxes2 (Tensor): shape (B, n, 6) in <x1, y1, z1, x2, y2, z2>
+            format or empty.
+            B indicates the batch dim, in shape (B1, B2, ..., Bn).
+            If ``is_aligned`` is ``True``, then m and n must be equal.
+        mode (str): "iou" (intersection over union) or "giou" (generalized
+            intersection over union).
+        is_aligned (bool, optional): If True, then m and n must be equal.
+            Defaults to False.
+        eps (float, optional): A value added to the denominator for numerical
+            stability. Defaults to 1e-6.
+    Returns:
+        Tensor: shape (m, n) if ``is_aligned`` is False else shape (m,)
+    Example:
+        >>> bboxes1 = torch.FloatTensor([
+        >>>     [0, 0, 0, 10, 10, 10],
+        >>>     [10, 10, 10, 20, 20, 20],
+        >>>     [32, 32, 32, 38, 40, 42],
+        >>> ])
+        >>> bboxes2 = torch.FloatTensor([
+        >>>     [0, 0, 0, 10, 20, 20],
+        >>>     [0, 10, 10, 10, 19, 20],
+        >>>     [10, 10, 10, 20, 20, 20],
+        >>> ])
+        >>> overlaps = axis_aligned_bbox_overlaps_3d(bboxes1, bboxes2)
+        >>> assert overlaps.shape == (3, 3)
+        >>> overlaps = bbox_overlaps(bboxes1, bboxes2, is_aligned=True)
+        >>> assert overlaps.shape == (3, )
+    Example:
+        >>> empty = torch.empty(0, 6)
+        >>> nonempty = torch.FloatTensor([[0, 0, 0, 10, 9, 10]])
+        >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1)
+        >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0)
+        >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0)
+    """
+    assert mode in ['iou', 'giou'], f'Unsupported mode {mode}'
+    # Either the boxes are empty or the length of boxes's last dimension is 6
+    assert (bboxes1.size(-1) == 6 or bboxes1.size(0) == 0)
+    assert (bboxes2.size(-1) == 6 or bboxes2.size(0) == 0)
+    # Batch dim must be the same
+    # Batch dim: (B1, B2, ... Bn)
+    assert bboxes1.shape[:-2] == bboxes2.shape[:-2]
+    batch_shape = bboxes1.shape[:-2]
+    rows = bboxes1.size(-2)
+    cols = bboxes2.size(-2)
+    if is_aligned:
+        assert rows == cols
+    if rows * cols == 0:
+        if is_aligned:
+            return bboxes1.new(batch_shape + (rows,))
+        else:
+            return bboxes1.new(batch_shape + (rows, cols))
+    area1 = (bboxes1[..., 3] -
+             bboxes1[..., 0]) * (bboxes1[..., 4] - bboxes1[..., 1]) * (
+                    bboxes1[..., 5] - bboxes1[..., 2])
+    area2 = (bboxes2[..., 3] -
+             bboxes2[..., 0]) * (bboxes2[..., 4] - bboxes2[..., 1]) * (
+                    bboxes2[..., 5] - bboxes2[..., 2])
+    if is_aligned:
+        lt = torch.max(bboxes1[..., :3], bboxes2[..., :3])  # [B, rows, 3]
+        rb = torch.min(bboxes1[..., 3:], bboxes2[..., 3:])  # [B, rows, 3]
+        wh = (rb - lt).clamp(min=0)  # [B, rows, 2]
+        overlap = wh[..., 0] * wh[..., 1] * wh[..., 2]
+        if mode in ['iou', 'giou']:
+            union = area1 + area2 - overlap
+        else:
+            union = area1
+        if mode == 'giou':
+            enclosed_lt = torch.min(bboxes1[..., :3], bboxes2[..., :3])
+            enclosed_rb = torch.max(bboxes1[..., 3:], bboxes2[..., 3:])
+    else:
+        lt = torch.max(bboxes1[..., :, None, :3],
+                       bboxes2[..., None, :, :3])  # [B, rows, cols, 3]
+        rb = torch.min(bboxes1[..., :, None, 3:],
+                       bboxes2[..., None, :, 3:])  # [B, rows, cols, 3]
+        wh = (rb - lt).clamp(min=0)  # [B, rows, cols, 3]
+        overlap = wh[..., 0] * wh[..., 1] * wh[..., 2]
+        if mode in ['iou', 'giou']:
+            union = area1[..., None] + area2[..., None, :] - overlap
+        if mode == 'giou':
+            enclosed_lt = torch.min(bboxes1[..., :, None, :3],
+                                    bboxes2[..., None, :, :3])
+            enclosed_rb = torch.max(bboxes1[..., :, None, 3:],
+                                    bboxes2[..., None, :, 3:])
+    eps = union.new_tensor([eps])
+    union = torch.max(union, eps)
+    ious = overlap / union
+    if mode in ['iou']:
+        return ious
+    # calculate gious
+    enclose_wh = (enclosed_rb - enclosed_lt).clamp(min=0)
+    enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1] * enclose_wh[..., 2]
+    enclose_area = torch.max(enclose_area, eps)
+    gious = ious - (enclose_area - union) / enclose_area
+    return gious

det_map/det/dal/mmdet3d/core/bbox/structures/__init__.py ADDED Viewed

	@@ -0,0 +1,18 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from .base_box3d import BaseInstance3DBoxes
+from .box_3d_mode import Box3DMode
+from .cam_box3d import CameraInstance3DBoxes
+from .coord_3d_mode import Coord3DMode
+from .depth_box3d import DepthInstance3DBoxes
+from .lidar_box3d import LiDARInstance3DBoxes
+from .utils import (get_box_type, get_proj_mat_by_coord_type, limit_period,
+                    mono_cam_box2vis, points_cam2img, points_img2cam,
+                    rotation_3d_in_axis, xywhr2xyxyr)
+__all__ = [
+    'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes',
+    'CameraInstance3DBoxes', 'DepthInstance3DBoxes', 'xywhr2xyxyr',
+    'get_box_type', 'rotation_3d_in_axis', 'limit_period', 'points_cam2img',
+    'points_img2cam', 'Coord3DMode', 'mono_cam_box2vis',
+    'get_proj_mat_by_coord_type'
+]

det_map/det/dal/mmdet3d/core/bbox/structures/base_box3d.py ADDED Viewed

	@@ -0,0 +1,578 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+from abc import abstractmethod
+import numpy as np
+import torch
+from mmcv.ops import box_iou_rotated, points_in_boxes_all, points_in_boxes_part
+from .utils import limit_period
+class BaseInstance3DBoxes(object):
+    """Base class for 3D Boxes.
+    Note:
+        The box is bottom centered, i.e. the relative position of origin in
+        the box is (0.5, 0.5, 0).
+    Args:
+        tensor (torch.Tensor | np.ndarray | list): a N x box_dim matrix.
+        box_dim (int): Number of the dimension of a box.
+            Each row is (x, y, z, x_size, y_size, z_size, yaw).
+            Defaults to 7.
+        with_yaw (bool): Whether the box is with yaw rotation.
+            If False, the value of yaw will be set to 0 as minmax boxes.
+            Defaults to True.
+        origin (tuple[float], optional): Relative position of the box origin.
+            Defaults to (0.5, 0.5, 0). This will guide the box be converted to
+            (0.5, 0.5, 0) mode.
+    Attributes:
+        tensor (torch.Tensor): Float matrix of N x box_dim.
+        box_dim (int): Integer indicating the dimension of a box.
+            Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
+        with_yaw (bool): If True, the value of yaw will be set to 0 as minmax
+            boxes.
+    """
+    def __init__(self, tensor, box_dim=7, with_yaw=True, origin=(0.5, 0.5, 0)):
+        if isinstance(tensor, torch.Tensor):
+            device = tensor.device
+        else:
+            device = torch.device('cpu')
+        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
+        if tensor.numel() == 0:
+            # Use reshape, so we don't end up creating a new tensor that
+            # does not depend on the inputs (and consequently confuses jit)
+            tensor = tensor.reshape((0, box_dim)).to(
+                dtype=torch.float32, device=device)
+        assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()
+        if tensor.shape[-1] == 6:
+            # If the dimension of boxes is 6, we expand box_dim by padding
+            # 0 as a fake yaw and set with_yaw to False.
+            assert box_dim == 6
+            fake_rot = tensor.new_zeros(tensor.shape[0], 1)
+            tensor = torch.cat((tensor, fake_rot), dim=-1)
+            self.box_dim = box_dim + 1
+            self.with_yaw = False
+        else:
+            self.box_dim = box_dim
+            self.with_yaw = with_yaw
+        self.tensor = tensor.clone()
+        if origin != (0.5, 0.5, 0):
+            dst = self.tensor.new_tensor((0.5, 0.5, 0))
+            src = self.tensor.new_tensor(origin)
+            self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)
+    @property
+    def volume(self):
+        """torch.Tensor: A vector with volume of each box."""
+        return self.tensor[:, 3] * self.tensor[:, 4] * self.tensor[:, 5]
+    @property
+    def dims(self):
+        """torch.Tensor: Size dimensions of each box in shape (N, 3)."""
+        return self.tensor[:, 3:6]
+    @property
+    def yaw(self):
+        """torch.Tensor: A vector with yaw of each box in shape (N, )."""
+        return self.tensor[:, 6]
+    @property
+    def height(self):
+        """torch.Tensor: A vector with height of each box in shape (N, )."""
+        return self.tensor[:, 5]
+    @property
+    def top_height(self):
+        """torch.Tensor:
+            A vector with the top height of each box in shape (N, )."""
+        return self.bottom_height + self.height
+    @property
+    def bottom_height(self):
+        """torch.Tensor:
+            A vector with bottom's height of each box in shape (N, )."""
+        return self.tensor[:, 2]
+    @property
+    def center(self):
+        """Calculate the center of all the boxes.
+        Note:
+            In MMDetection3D's convention, the bottom center is
+            usually taken as the default center.
+            The relative position of the centers in different kinds of
+            boxes are different, e.g., the relative center of a boxes is
+            (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0) in lidar.
+            It is recommended to use ``bottom_center`` or ``gravity_center``
+            for clearer usage.
+        Returns:
+            torch.Tensor: A tensor with center of each box in shape (N, 3).
+        """
+        return self.bottom_center
+    @property
+    def bottom_center(self):
+        """torch.Tensor: A tensor with center of each box in shape (N, 3)."""
+        return self.tensor[:, :3]
+    @property
+    def gravity_center(self):
+        """torch.Tensor: A tensor with center of each box in shape (N, 3)."""
+        pass
+    @property
+    def corners(self):
+        """torch.Tensor:
+            a tensor with 8 corners of each box in shape (N, 8, 3)."""
+        pass
+    @property
+    def bev(self):
+        """torch.Tensor: 2D BEV box of each box with rotation
+            in XYWHR format, in shape (N, 5)."""
+        return self.tensor[:, [0, 1, 3, 4, 6]]
+    @property
+    def nearest_bev(self):
+        """torch.Tensor: A tensor of 2D BEV box of each box
+            without rotation."""
+        # Obtain BEV boxes with rotation in XYWHR format
+        bev_rotated_boxes = self.bev
+        # convert the rotation to a valid range
+        rotations = bev_rotated_boxes[:, -1]
+        normed_rotations = torch.abs(limit_period(rotations, 0.5, np.pi))
+        # find the center of boxes
+        conditions = (normed_rotations > np.pi / 4)[..., None]
+        bboxes_xywh = torch.where(conditions, bev_rotated_boxes[:,
+                                                                [0, 1, 3, 2]],
+                                  bev_rotated_boxes[:, :4])
+        centers = bboxes_xywh[:, :2]
+        dims = bboxes_xywh[:, 2:]
+        bev_boxes = torch.cat([centers - dims / 2, centers + dims / 2], dim=-1)
+        return bev_boxes
+    def in_range_bev(self, box_range):
+        """Check whether the boxes are in the given range.
+        Args:
+            box_range (list | torch.Tensor): the range of box
+                (x_min, y_min, x_max, y_max)
+        Note:
+            The original implementation of SECOND checks whether boxes in
+            a range by checking whether the points are in a convex
+            polygon, we reduce the burden for simpler cases.
+        Returns:
+            torch.Tensor: Whether each box is inside the reference range.
+        """
+        in_range_flags = ((self.bev[:, 0] > box_range[0])
+                          & (self.bev[:, 1] > box_range[1])
+                          & (self.bev[:, 0] < box_range[2])
+                          & (self.bev[:, 1] < box_range[3]))
+        return in_range_flags
+    @abstractmethod
+    def rotate(self, angle, points=None):
+        """Rotate boxes with points (optional) with the given angle or rotation
+        matrix.
+        Args:
+            angle (float | torch.Tensor | np.ndarray):
+                Rotation angle or rotation matrix.
+            points (torch.Tensor | numpy.ndarray |
+                :obj:`BasePoints`, optional):
+                Points to rotate. Defaults to None.
+        """
+        pass
+    @abstractmethod
+    def flip(self, bev_direction='horizontal'):
+        """Flip the boxes in BEV along given BEV direction.
+        Args:
+            bev_direction (str, optional): Direction by which to flip.
+                Can be chosen from 'horizontal' and 'vertical'.
+                Defaults to 'horizontal'.
+        """
+        pass
+    def translate(self, trans_vector):
+        """Translate boxes with the given translation vector.
+        Args:
+            trans_vector (torch.Tensor): Translation vector of size (1, 3).
+        """
+        if not isinstance(trans_vector, torch.Tensor):
+            trans_vector = self.tensor.new_tensor(trans_vector)
+        self.tensor[:, :3] += trans_vector
+    def in_range_3d(self, box_range):
+        """Check whether the boxes are in the given range.
+        Args:
+            box_range (list | torch.Tensor): The range of box
+                (x_min, y_min, z_min, x_max, y_max, z_max)
+        Note:
+            In the original implementation of SECOND, checking whether
+            a box in the range checks whether the points are in a convex
+            polygon, we try to reduce the burden for simpler cases.
+        Returns:
+            torch.Tensor: A binary vector indicating whether each box is
+                inside the reference range.
+        """
+        in_range_flags = ((self.tensor[:, 0] > box_range[0])
+                          & (self.tensor[:, 1] > box_range[1])
+                          & (self.tensor[:, 2] > box_range[2])
+                          & (self.tensor[:, 0] < box_range[3])
+                          & (self.tensor[:, 1] < box_range[4])
+                          & (self.tensor[:, 2] < box_range[5]))
+        return in_range_flags
+    @abstractmethod
+    def convert_to(self, dst, rt_mat=None):
+        """Convert self to ``dst`` mode.
+        Args:
+            dst (:obj:`Box3DMode`): The target Box mode.
+            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+                translation matrix between different coordinates.
+                Defaults to None.
+                The conversion from `src` coordinates to `dst` coordinates
+                usually comes along the change of sensors, e.g., from camera
+                to LiDAR. This requires a transformation matrix.
+        Returns:
+            :obj:`BaseInstance3DBoxes`: The converted box of the same type
+                in the `dst` mode.
+        """
+        pass
+    def scale(self, scale_factor):
+        """Scale the box with horizontal and vertical scaling factors.
+        Args:
+            scale_factors (float): Scale factors to scale the boxes.
+        """
+        self.tensor[:, :6] *= scale_factor
+        self.tensor[:, 7:] *= scale_factor  # velocity
+    def limit_yaw(self, offset=0.5, period=np.pi):
+        """Limit the yaw to a given period and offset.
+        Args:
+            offset (float, optional): The offset of the yaw. Defaults to 0.5.
+            period (float, optional): The expected period. Defaults to np.pi.
+        """
+        self.tensor[:, 6] = limit_period(self.tensor[:, 6], offset, period)
+    def nonempty(self, threshold=0.0):
+        """Find boxes that are non-empty.
+        A box is considered empty,
+        if either of its side is no larger than threshold.
+        Args:
+            threshold (float, optional): The threshold of minimal sizes.
+                Defaults to 0.0.
+        Returns:
+            torch.Tensor: A binary vector which represents whether each
+                box is empty (False) or non-empty (True).
+        """
+        box = self.tensor
+        size_x = box[..., 3]
+        size_y = box[..., 4]
+        size_z = box[..., 5]
+        keep = ((size_x > threshold)
+                & (size_y > threshold) & (size_z > threshold))
+        return keep
+    def __getitem__(self, item):
+        """
+        Note:
+            The following usage are allowed:
+            1. `new_boxes = boxes[3]`:
+                return a `Boxes` that contains only one box.
+            2. `new_boxes = boxes[2:10]`:
+                return a slice of boxes.
+            3. `new_boxes = boxes[vector]`:
+                where vector is a torch.BoolTensor with `length = len(boxes)`.
+                Nonzero elements in the vector will be selected.
+            Note that the returned Boxes might share storage with this Boxes,
+            subject to Pytorch's indexing semantics.
+        Returns:
+            :obj:`BaseInstance3DBoxes`: A new object of
+                :class:`BaseInstance3DBoxes` after indexing.
+        """
+        original_type = type(self)
+        if isinstance(item, int):
+            return original_type(
+                self.tensor[item].view(1, -1),
+                box_dim=self.box_dim,
+                with_yaw=self.with_yaw)
+        b = self.tensor[item]
+        assert b.dim() == 2, \
+            f'Indexing on Boxes with {item} failed to return a matrix!'
+        return original_type(b, box_dim=self.box_dim, with_yaw=self.with_yaw)
+    def __len__(self):
+        """int: Number of boxes in the current object."""
+        return self.tensor.shape[0]
+    def __repr__(self):
+        """str: Return a strings that describes the object."""
+        return self.__class__.__name__ + '(\n    ' + str(self.tensor) + ')'
+    @classmethod
+    def cat(cls, boxes_list):
+        """Concatenate a list of Boxes into a single Boxes.
+        Args:
+            boxes_list (list[:obj:`BaseInstance3DBoxes`]): List of boxes.
+        Returns:
+            :obj:`BaseInstance3DBoxes`: The concatenated Boxes.
+        """
+        assert isinstance(boxes_list, (list, tuple))
+        if len(boxes_list) == 0:
+            return cls(torch.empty(0))
+        assert all(isinstance(box, cls) for box in boxes_list)
+        # use torch.cat (v.s. layers.cat)
+        # so the returned boxes never share storage with input
+        cat_boxes = cls(
+            torch.cat([b.tensor for b in boxes_list], dim=0),
+            box_dim=boxes_list[0].tensor.shape[1],
+            with_yaw=boxes_list[0].with_yaw)
+        return cat_boxes
+    def to(self, device):
+        """Convert current boxes to a specific device.
+        Args:
+            device (str | :obj:`torch.device`): The name of the device.
+        Returns:
+            :obj:`BaseInstance3DBoxes`: A new boxes object on the
+                specific device.
+        """
+        original_type = type(self)
+        return original_type(
+            self.tensor.to(device),
+            box_dim=self.box_dim,
+            with_yaw=self.with_yaw)
+    def clone(self):
+        """Clone the Boxes.
+        Returns:
+            :obj:`BaseInstance3DBoxes`: Box object with the same properties
+                as self.
+        """
+        original_type = type(self)
+        return original_type(
+            self.tensor.clone(), box_dim=self.box_dim, with_yaw=self.with_yaw)
+    @property
+    def device(self):
+        """str: The device of the boxes are on."""
+        return self.tensor.device
+    def __iter__(self):
+        """Yield a box as a Tensor of shape (4,) at a time.
+        Returns:
+            torch.Tensor: A box of shape (4,).
+        """
+        yield from self.tensor
+    @classmethod
+    def height_overlaps(cls, boxes1, boxes2, mode='iou'):
+        """Calculate height overlaps of two boxes.
+        Note:
+            This function calculates the height overlaps between boxes1 and
+            boxes2,  boxes1 and boxes2 should be in the same type.
+        Args:
+            boxes1 (:obj:`BaseInstance3DBoxes`): Boxes 1 contain N boxes.
+            boxes2 (:obj:`BaseInstance3DBoxes`): Boxes 2 contain M boxes.
+            mode (str, optional): Mode of IoU calculation. Defaults to 'iou'.
+        Returns:
+            torch.Tensor: Calculated iou of boxes.
+        """
+        assert isinstance(boxes1, BaseInstance3DBoxes)
+        assert isinstance(boxes2, BaseInstance3DBoxes)
+        assert type(boxes1) == type(boxes2), '"boxes1" and "boxes2" should' \
+            f'be in the same type, got {type(boxes1)} and {type(boxes2)}.'
+        boxes1_top_height = boxes1.top_height.view(-1, 1)
+        boxes1_bottom_height = boxes1.bottom_height.view(-1, 1)
+        boxes2_top_height = boxes2.top_height.view(1, -1)
+        boxes2_bottom_height = boxes2.bottom_height.view(1, -1)
+        heighest_of_bottom = torch.max(boxes1_bottom_height,
+                                       boxes2_bottom_height)
+        lowest_of_top = torch.min(boxes1_top_height, boxes2_top_height)
+        overlaps_h = torch.clamp(lowest_of_top - heighest_of_bottom, min=0)
+        return overlaps_h
+    @classmethod
+    def overlaps(cls, boxes1, boxes2, mode='iou'):
+        """Calculate 3D overlaps of two boxes.
+        Note:
+            This function calculates the overlaps between ``boxes1`` and
+            ``boxes2``, ``boxes1`` and ``boxes2`` should be in the same type.
+        Args:
+            boxes1 (:obj:`BaseInstance3DBoxes`): Boxes 1 contain N boxes.
+            boxes2 (:obj:`BaseInstance3DBoxes`): Boxes 2 contain M boxes.
+            mode (str, optional): Mode of iou calculation. Defaults to 'iou'.
+        Returns:
+            torch.Tensor: Calculated 3D overlaps of the boxes.
+        """
+        assert isinstance(boxes1, BaseInstance3DBoxes)
+        assert isinstance(boxes2, BaseInstance3DBoxes)
+        assert type(boxes1) == type(boxes2), '"boxes1" and "boxes2" should' \
+            f'be in the same type, got {type(boxes1)} and {type(boxes2)}.'
+        assert mode in ['iou', 'iof']
+        rows = len(boxes1)
+        cols = len(boxes2)
+        if rows * cols == 0:
+            return boxes1.tensor.new(rows, cols)
+        # height overlap
+        overlaps_h = cls.height_overlaps(boxes1, boxes2)
+        # bev overlap
+        iou2d = box_iou_rotated(boxes1.bev, boxes2.bev)
+        areas1 = (boxes1.bev[:, 2] * boxes1.bev[:, 3]).unsqueeze(1).expand(
+            rows, cols)
+        areas2 = (boxes2.bev[:, 2] * boxes2.bev[:, 3]).unsqueeze(0).expand(
+            rows, cols)
+        overlaps_bev = iou2d * (areas1 + areas2) / (1 + iou2d)
+        # 3d overlaps
+        overlaps_3d = overlaps_bev.to(boxes1.device) * overlaps_h
+        volume1 = boxes1.volume.view(-1, 1)
+        volume2 = boxes2.volume.view(1, -1)
+        if mode == 'iou':
+            # the clamp func is used to avoid division of 0
+            iou3d = overlaps_3d / torch.clamp(
+                volume1 + volume2 - overlaps_3d, min=1e-8)
+        else:
+            iou3d = overlaps_3d / torch.clamp(volume1, min=1e-8)
+        return iou3d
+    def new_box(self, data):
+        """Create a new box object with data.
+        The new box and its tensor has the similar properties
+            as self and self.tensor, respectively.
+        Args:
+            data (torch.Tensor | numpy.array | list): Data to be copied.
+        Returns:
+            :obj:`BaseInstance3DBoxes`: A new bbox object with ``data``,
+                the object's other properties are similar to ``self``.
+        """
+        new_tensor = self.tensor.new_tensor(data) \
+            if not isinstance(data, torch.Tensor) else data.to(self.device)
+        original_type = type(self)
+        return original_type(
+            new_tensor, box_dim=self.box_dim, with_yaw=self.with_yaw)
+    def points_in_boxes_part(self, points, boxes_override=None):
+        """Find the box in which each point is.
+        Args:
+            points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),
+                3 dimensions are (x, y, z) in LiDAR or depth coordinate.
+            boxes_override (torch.Tensor, optional): Boxes to override
+                `self.tensor`. Defaults to None.
+        Returns:
+            torch.Tensor: The index of the first box that each point
+                is in, in shape (M, ). Default value is -1
+                (if the point is not enclosed by any box).
+        Note:
+            If a point is enclosed by multiple boxes, the index of the
+            first box will be returned.
+        """
+        if boxes_override is not None:
+            boxes = boxes_override
+        else:
+            boxes = self.tensor
+        if points.dim() == 2:
+            points = points.unsqueeze(0)
+        box_idx = points_in_boxes_part(points,
+                                       boxes.unsqueeze(0).to(
+                                           points.device)).squeeze(0)
+        return box_idx
+    def points_in_boxes_all(self, points, boxes_override=None):
+        """Find all boxes in which each point is.
+        Args:
+            points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),
+                3 dimensions are (x, y, z) in LiDAR or depth coordinate.
+            boxes_override (torch.Tensor, optional): Boxes to override
+                `self.tensor`. Defaults to None.
+        Returns:
+            torch.Tensor: A tensor indicating whether a point is in a box,
+                in shape (M, T). T is the number of boxes. Denote this
+                tensor as A, if the m^th point is in the t^th box, then
+                `A[m, t] == 1`, elsewise `A[m, t] == 0`.
+        """
+        if boxes_override is not None:
+            boxes = boxes_override
+        else:
+            boxes = self.tensor
+        points_clone = points.clone()[..., :3]
+        if points_clone.dim() == 2:
+            points_clone = points_clone.unsqueeze(0)
+        else:
+            assert points_clone.dim() == 3 and points_clone.shape[0] == 1
+        boxes = boxes.to(points_clone.device).unsqueeze(0)
+        box_idxs_of_pts = points_in_boxes_all(points_clone, boxes)
+        return box_idxs_of_pts.squeeze(0)
+    def points_in_boxes(self, points, boxes_override=None):
+        warnings.warn('DeprecationWarning: points_in_boxes is a '
+                      'deprecated method, please consider using '
+                      'points_in_boxes_part.')
+        return self.points_in_boxes_part(points, boxes_override)
+    def points_in_boxes_batch(self, points, boxes_override=None):
+        warnings.warn('DeprecationWarning: points_in_boxes_batch is a '
+                      'deprecated method, please consider using '
+                      'points_in_boxes_all.')
+        return self.points_in_boxes_all(points, boxes_override)

det_map/det/dal/mmdet3d/core/bbox/structures/box_3d_mode.py ADDED Viewed

	@@ -0,0 +1,197 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from enum import IntEnum, unique
+import numpy as np
+import torch
+from .base_box3d import BaseInstance3DBoxes
+from .cam_box3d import CameraInstance3DBoxes
+from .depth_box3d import DepthInstance3DBoxes
+from .lidar_box3d import LiDARInstance3DBoxes
+from .utils import limit_period
+@unique
+class Box3DMode(IntEnum):
+    r"""Enum of different ways to represent a box.
+    Coordinates in LiDAR:
+    .. code-block:: none
+                    up z
+                       ^   x front
+                       |  /
+                       | /
+        left y <------ 0
+    The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),
+    and the yaw is around the z axis, thus the rotation axis=2.
+    Coordinates in camera:
+    .. code-block:: none
+                z front
+               /
+              /
+             0 ------> x right
+             |
+             |
+             v
+        down y
+    The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],
+    and the yaw is around the y axis, thus the rotation axis=1.
+    Coordinates in Depth mode:
+    .. code-block:: none
+        up z
+           ^   y front
+           |  /
+           | /
+           0 ------> x right
+    The relative coordinate of bottom center in a DEPTH box is (0.5, 0.5, 0),
+    and the yaw is around the z axis, thus the rotation axis=2.
+    """
+    LIDAR = 0
+    CAM = 1
+    DEPTH = 2
+    @staticmethod
+    def convert(box, src, dst, rt_mat=None, with_yaw=True):
+        """Convert boxes from `src` mode to `dst` mode.
+        Args:
+            box (tuple | list | np.ndarray |
+                torch.Tensor | :obj:`BaseInstance3DBoxes`):
+                Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7.
+            src (:obj:`Box3DMode`): The src Box mode.
+            dst (:obj:`Box3DMode`): The target Box mode.
+            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+                translation matrix between different coordinates.
+                Defaults to None.
+                The conversion from `src` coordinates to `dst` coordinates
+                usually comes along the change of sensors, e.g., from camera
+                to LiDAR. This requires a transformation matrix.
+            with_yaw (bool, optional): If `box` is an instance of
+                :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle.
+                Defaults to True.
+        Returns:
+            (tuple | list | np.ndarray | torch.Tensor |
+                :obj:`BaseInstance3DBoxes`):
+                The converted box of the same type.
+        """
+        if src == dst:
+            return box
+        is_numpy = isinstance(box, np.ndarray)
+        is_Instance3DBoxes = isinstance(box, BaseInstance3DBoxes)
+        single_box = isinstance(box, (list, tuple))
+        if single_box:
+            assert len(box) >= 7, (
+                'Box3DMode.convert takes either a k-tuple/list or '
+                'an Nxk array/tensor, where k >= 7')
+            arr = torch.tensor(box)[None, :]
+        else:
+            # avoid modifying the input box
+            if is_numpy:
+                arr = torch.from_numpy(np.asarray(box)).clone()
+            elif is_Instance3DBoxes:
+                arr = box.tensor.clone()
+            else:
+                arr = box.clone()
+        if is_Instance3DBoxes:
+            with_yaw = box.with_yaw
+        # convert box from `src` mode to `dst` mode.
+        x_size, y_size, z_size = arr[..., 3:4], arr[..., 4:5], arr[..., 5:6]
+        if with_yaw:
+            yaw = arr[..., 6:7]
+        if src == Box3DMode.LIDAR and dst == Box3DMode.CAM:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])
+            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
+            if with_yaw:
+                yaw = -yaw - np.pi / 2
+                yaw = limit_period(yaw, period=np.pi * 2)
+        elif src == Box3DMode.CAM and dst == Box3DMode.LIDAR:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])
+            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
+            if with_yaw:
+                yaw = -yaw - np.pi / 2
+                yaw = limit_period(yaw, period=np.pi * 2)
+        elif src == Box3DMode.DEPTH and dst == Box3DMode.CAM:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
+            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
+            if with_yaw:
+                yaw = -yaw
+        elif src == Box3DMode.CAM and dst == Box3DMode.DEPTH:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])
+            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
+            if with_yaw:
+                yaw = -yaw
+        elif src == Box3DMode.LIDAR and dst == Box3DMode.DEPTH:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
+            xyz_size = torch.cat([x_size, y_size, z_size], dim=-1)
+            if with_yaw:
+                yaw = yaw + np.pi / 2
+                yaw = limit_period(yaw, period=np.pi * 2)
+        elif src == Box3DMode.DEPTH and dst == Box3DMode.LIDAR:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])
+            xyz_size = torch.cat([x_size, y_size, z_size], dim=-1)
+            if with_yaw:
+                yaw = yaw - np.pi / 2
+                yaw = limit_period(yaw, period=np.pi * 2)
+        else:
+            raise NotImplementedError(
+                f'Conversion from Box3DMode {src} to {dst} '
+                'is not supported yet')
+        if not isinstance(rt_mat, torch.Tensor):
+            rt_mat = arr.new_tensor(rt_mat)
+        if rt_mat.size(1) == 4:
+            extended_xyz = torch.cat(
+                [arr[..., :3], arr.new_ones(arr.size(0), 1)], dim=-1)
+            xyz = extended_xyz @ rt_mat.t()
+        else:
+            xyz = arr[..., :3] @ rt_mat.t()
+        if with_yaw:
+            remains = arr[..., 7:]
+            arr = torch.cat([xyz[..., :3], xyz_size, yaw, remains], dim=-1)
+        else:
+            remains = arr[..., 6:]
+            arr = torch.cat([xyz[..., :3], xyz_size, remains], dim=-1)
+        # convert arr to the original type
+        original_type = type(box)
+        if single_box:
+            return original_type(arr.flatten().tolist())
+        if is_numpy:
+            return arr.numpy()
+        elif is_Instance3DBoxes:
+            if dst == Box3DMode.CAM:
+                target_type = CameraInstance3DBoxes
+            elif dst == Box3DMode.LIDAR:
+                target_type = LiDARInstance3DBoxes
+            elif dst == Box3DMode.DEPTH:
+                target_type = DepthInstance3DBoxes
+            else:
+                raise NotImplementedError(
+                    f'Conversion to {dst} through {original_type}'
+                    ' is not supported yet')
+            return target_type(arr, box_dim=arr.size(-1), with_yaw=with_yaw)
+        else:
+            return arr

det_map/det/dal/mmdet3d/core/bbox/structures/cam_box3d.py ADDED Viewed

	@@ -0,0 +1,354 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import numpy as np
+import torch
+from ...points import BasePoints
+from .base_box3d import BaseInstance3DBoxes
+from .utils import rotation_3d_in_axis, yaw2local
+class CameraInstance3DBoxes(BaseInstance3DBoxes):
+    """3D boxes of instances in CAM coordinates.
+    Coordinates in camera:
+    .. code-block:: none
+                z front (yaw=-0.5*pi)
+               /
+              /
+             0 ------> x right (yaw=0)
+             |
+             |
+             v
+        down y
+    The relative coordinate of bottom center in a CAM box is (0.5, 1.0, 0.5),
+    and the yaw is around the y axis, thus the rotation axis=1.
+    The yaw is 0 at the positive direction of x axis, and decreases from
+    the positive direction of x to the positive direction of z.
+    Attributes:
+        tensor (torch.Tensor): Float matrix in shape (N, box_dim).
+        box_dim (int): Integer indicating the dimension of a box
+            Each row is (x, y, z, x_size, y_size, z_size, yaw, ...).
+        with_yaw (bool): If True, the value of yaw will be set to 0 as
+            axis-aligned boxes tightly enclosing the original boxes.
+    """
+    YAW_AXIS = 1
+    def __init__(self,
+                 tensor,
+                 box_dim=7,
+                 with_yaw=True,
+                 origin=(0.5, 1.0, 0.5)):
+        if isinstance(tensor, torch.Tensor):
+            device = tensor.device
+        else:
+            device = torch.device('cpu')
+        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
+        if tensor.numel() == 0:
+            # Use reshape, so we don't end up creating a new tensor that
+            # does not depend on the inputs (and consequently confuses jit)
+            tensor = tensor.reshape((0, box_dim)).to(
+                dtype=torch.float32, device=device)
+        assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()
+        if tensor.shape[-1] == 6:
+            # If the dimension of boxes is 6, we expand box_dim by padding
+            # 0 as a fake yaw and set with_yaw to False.
+            assert box_dim == 6
+            fake_rot = tensor.new_zeros(tensor.shape[0], 1)
+            tensor = torch.cat((tensor, fake_rot), dim=-1)
+            self.box_dim = box_dim + 1
+            self.with_yaw = False
+        else:
+            self.box_dim = box_dim
+            self.with_yaw = with_yaw
+        self.tensor = tensor.clone()
+        if origin != (0.5, 1.0, 0.5):
+            dst = self.tensor.new_tensor((0.5, 1.0, 0.5))
+            src = self.tensor.new_tensor(origin)
+            self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)
+    @property
+    def height(self):
+        """torch.Tensor: A vector with height of each box in shape (N, )."""
+        return self.tensor[:, 4]
+    @property
+    def top_height(self):
+        """torch.Tensor:
+            A vector with the top height of each box in shape (N, )."""
+        # the positive direction is down rather than up
+        return self.bottom_height - self.height
+    @property
+    def bottom_height(self):
+        """torch.Tensor:
+            A vector with bottom's height of each box in shape (N, )."""
+        return self.tensor[:, 1]
+    @property
+    def local_yaw(self):
+        """torch.Tensor:
+            A vector with local yaw of each box in shape (N, ).
+            local_yaw equals to alpha in kitti, which is commonly
+            used in monocular 3D object detection task, so only
+            :obj:`CameraInstance3DBoxes` has the property.
+        """
+        yaw = self.yaw
+        loc = self.gravity_center
+        local_yaw = yaw2local(yaw, loc)
+        return local_yaw
+    @property
+    def gravity_center(self):
+        """torch.Tensor: A tensor with center of each box in shape (N, 3)."""
+        bottom_center = self.bottom_center
+        gravity_center = torch.zeros_like(bottom_center)
+        gravity_center[:, [0, 2]] = bottom_center[:, [0, 2]]
+        gravity_center[:, 1] = bottom_center[:, 1] - self.tensor[:, 4] * 0.5
+        return gravity_center
+    @property
+    def corners(self):
+        """torch.Tensor: Coordinates of corners of all the boxes in
+                         shape (N, 8, 3).
+        Convert the boxes to  in clockwise order, in the form of
+        (x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)
+        .. code-block:: none
+                         front z
+                              /
+                             /
+               (x0, y0, z1) + -----------  + (x1, y0, z1)
+                           /|            / |
+                          / |           /  |
+            (x0, y0, z0) + ----------- +   + (x1, y1, z1)
+                         |  /      .   |  /
+                         | / origin    | /
+            (x0, y1, z0) + ----------- + -------> x right
+                         |             (x1, y1, z0)
+                         |
+                         v
+                    down y
+        """
+        if self.tensor.numel() == 0:
+            return torch.empty([0, 8, 3], device=self.tensor.device)
+        dims = self.dims
+        corners_norm = torch.from_numpy(
+            np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)).to(
+                device=dims.device, dtype=dims.dtype)
+        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
+        # use relative origin [0.5, 1, 0.5]
+        corners_norm = corners_norm - dims.new_tensor([0.5, 1, 0.5])
+        corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])
+        corners = rotation_3d_in_axis(
+            corners, self.tensor[:, 6], axis=self.YAW_AXIS)
+        corners += self.tensor[:, :3].view(-1, 1, 3)
+        return corners
+    @property
+    def bev(self):
+        """torch.Tensor: 2D BEV box of each box with rotation
+            in XYWHR format, in shape (N, 5)."""
+        bev = self.tensor[:, [0, 2, 3, 5, 6]].clone()
+        # positive direction of the gravity axis
+        # in cam coord system points to the earth
+        # so the bev yaw angle needs to be reversed
+        bev[:, -1] = -bev[:, -1]
+        return bev
+    def rotate(self, angle, points=None):
+        """Rotate boxes with points (optional) with the given angle or rotation
+        matrix.
+        Args:
+            angle (float | torch.Tensor | np.ndarray):
+                Rotation angle or rotation matrix.
+            points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):
+                Points to rotate. Defaults to None.
+        Returns:
+            tuple or None: When ``points`` is None, the function returns
+                None, otherwise it returns the rotated points and the
+                rotation matrix ``rot_mat_T``.
+        """
+        if not isinstance(angle, torch.Tensor):
+            angle = self.tensor.new_tensor(angle)
+        assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \
+            f'invalid rotation angle shape {angle.shape}'
+        if angle.numel() == 1:
+            self.tensor[:, 0:3], rot_mat_T = rotation_3d_in_axis(
+                self.tensor[:, 0:3],
+                angle,
+                axis=self.YAW_AXIS,
+                return_mat=True)
+        else:
+            rot_mat_T = angle
+            rot_sin = rot_mat_T[2, 0]
+            rot_cos = rot_mat_T[0, 0]
+            angle = np.arctan2(rot_sin, rot_cos)
+            self.tensor[:, 0:3] = self.tensor[:, 0:3] @ rot_mat_T
+        self.tensor[:, 6] += angle
+        if points is not None:
+            if isinstance(points, torch.Tensor):
+                points[:, :3] = points[:, :3] @ rot_mat_T
+            elif isinstance(points, np.ndarray):
+                rot_mat_T = rot_mat_T.cpu().numpy()
+                points[:, :3] = np.dot(points[:, :3], rot_mat_T)
+            elif isinstance(points, BasePoints):
+                points.rotate(rot_mat_T)
+            else:
+                raise ValueError
+            return points, rot_mat_T
+    def flip(self, bev_direction='horizontal', points=None):
+        """Flip the boxes in BEV along given BEV direction.
+        In CAM coordinates, it flips the x (horizontal) or z (vertical) axis.
+        Args:
+            bev_direction (str): Flip direction (horizontal or vertical).
+            points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional):
+                Points to flip. Defaults to None.
+        Returns:
+            torch.Tensor, numpy.ndarray or None: Flipped points.
+        """
+        assert bev_direction in ('horizontal', 'vertical')
+        if bev_direction == 'horizontal':
+            self.tensor[:, 0::7] = -self.tensor[:, 0::7]
+            if self.with_yaw:
+                self.tensor[:, 6] = -self.tensor[:, 6] + np.pi
+        elif bev_direction == 'vertical':
+            self.tensor[:, 2::7] = -self.tensor[:, 2::7]
+            if self.with_yaw:
+                self.tensor[:, 6] = -self.tensor[:, 6]
+        if points is not None:
+            assert isinstance(points, (torch.Tensor, np.ndarray, BasePoints))
+            if isinstance(points, (torch.Tensor, np.ndarray)):
+                if bev_direction == 'horizontal':
+                    points[:, 0] = -points[:, 0]
+                elif bev_direction == 'vertical':
+                    points[:, 2] = -points[:, 2]
+            elif isinstance(points, BasePoints):
+                points.flip(bev_direction)
+            return points
+    @classmethod
+    def height_overlaps(cls, boxes1, boxes2, mode='iou'):
+        """Calculate height overlaps of two boxes.
+        This function calculates the height overlaps between ``boxes1`` and
+        ``boxes2``, where ``boxes1`` and ``boxes2`` should be in the same type.
+        Args:
+            boxes1 (:obj:`CameraInstance3DBoxes`): Boxes 1 contain N boxes.
+            boxes2 (:obj:`CameraInstance3DBoxes`): Boxes 2 contain M boxes.
+            mode (str, optional): Mode of iou calculation. Defaults to 'iou'.
+        Returns:
+            torch.Tensor: Calculated iou of boxes' heights.
+        """
+        assert isinstance(boxes1, CameraInstance3DBoxes)
+        assert isinstance(boxes2, CameraInstance3DBoxes)
+        boxes1_top_height = boxes1.top_height.view(-1, 1)
+        boxes1_bottom_height = boxes1.bottom_height.view(-1, 1)
+        boxes2_top_height = boxes2.top_height.view(1, -1)
+        boxes2_bottom_height = boxes2.bottom_height.view(1, -1)
+        # positive direction of the gravity axis
+        # in cam coord system points to the earth
+        heighest_of_bottom = torch.min(boxes1_bottom_height,
+                                       boxes2_bottom_height)
+        lowest_of_top = torch.max(boxes1_top_height, boxes2_top_height)
+        overlaps_h = torch.clamp(heighest_of_bottom - lowest_of_top, min=0)
+        return overlaps_h
+    def convert_to(self, dst, rt_mat=None):
+        """Convert self to ``dst`` mode.
+        Args:
+            dst (:obj:`Box3DMode`): The target Box mode.
+            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
+                translation matrix between different coordinates.
+                Defaults to None.
+                The conversion from ``src`` coordinates to ``dst`` coordinates
+                usually comes along the change of sensors, e.g., from camera
+                to LiDAR. This requires a transformation matrix.
+        Returns:
+            :obj:`BaseInstance3DBoxes`:
+                The converted box of the same type in the ``dst`` mode.
+        """
+        from .box_3d_mode import Box3DMode
+        return Box3DMode.convert(
+            box=self, src=Box3DMode.CAM, dst=dst, rt_mat=rt_mat)
+    def points_in_boxes_part(self, points, boxes_override=None):
+        """Find the box in which each point is.
+        Args:
+            points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),
+                3 dimensions are (x, y, z) in LiDAR or depth coordinate.
+            boxes_override (torch.Tensor, optional): Boxes to override
+                `self.tensor `. Defaults to None.
+        Returns:
+            torch.Tensor: The index of the box in which
+                each point is, in shape (M, ). Default value is -1
+                (if the point is not enclosed by any box).
+        """
+        from .coord_3d_mode import Coord3DMode
+        points_lidar = Coord3DMode.convert(points, Coord3DMode.CAM,
+                                           Coord3DMode.LIDAR)
+        if boxes_override is not None:
+            boxes_lidar = boxes_override
+        else:
+            boxes_lidar = Coord3DMode.convert(self.tensor, Coord3DMode.CAM,
+                                              Coord3DMode.LIDAR)
+        box_idx = super().points_in_boxes_part(points_lidar, boxes_lidar)
+        return box_idx
+    def points_in_boxes_all(self, points, boxes_override=None):
+        """Find all boxes in which each point is.
+        Args:
+            points (torch.Tensor): Points in shape (1, M, 3) or (M, 3),
+                3 dimensions are (x, y, z) in LiDAR or depth coordinate.
+            boxes_override (torch.Tensor, optional): Boxes to override
+                `self.tensor `. Defaults to None.
+        Returns:
+            torch.Tensor: The index of all boxes in which each point is,
+                in shape (B, M, T).
+        """
+        from .coord_3d_mode import Coord3DMode
+        points_lidar = Coord3DMode.convert(points, Coord3DMode.CAM,
+                                           Coord3DMode.LIDAR)
+        if boxes_override is not None:
+            boxes_lidar = boxes_override
+        else:
+            boxes_lidar = Coord3DMode.convert(self.tensor, Coord3DMode.CAM,
+                                              Coord3DMode.LIDAR)
+        box_idx = super().points_in_boxes_all(points_lidar, boxes_lidar)
+        return box_idx