MMScan-HVG-Challenge

Running

App Files Files Community

rbler commited on Jun 5

Commit

88ad01d

verified ·

1 Parent(s): dc8d315

Upload 7 files

Browse files

Files changed (7) hide show

app.py +32 -1
evaluate.py +7 -12
leaderboard.csv +12 -0
mmscan_utils/box_metric.py +277 -0
mmscan_utils/box_utils.py +1079 -0
test_annotations_mmscan.json +0 -0
vg_evaluator.py +361 -0

app.py CHANGED Viewed

@@ -4,8 +4,39 @@ import pandas as pd
 import os
 LEADERBOARD_CSV = "leaderboard.csv"
 def evaluate_and_update(pred_file, username):
     score = run_evaluation(pred_file.name)
     if os.path.exists(LEADERBOARD_CSV):
         df = pd.read_csv(LEADERBOARD_CSV)
@@ -19,7 +50,7 @@ def evaluate_and_update(pred_file, username):
 with gr.Blocks() as demo:
     gr.Markdown("# 🧊 3D IoU Challenge")
     name = gr.Textbox(label="Username")
-    upload = gr.File(label="Upload your prediction (.npy)")
     score_text = gr.Textbox(label="Evaluation score")
     leaderboard = gr.Dataframe(headers=["Name", "Score"], interactive=False)

 import os
 LEADERBOARD_CSV = "leaderboard.csv"
+import pandas as pd
+import os
+from datetime import datetime, date
+SUBMIT_RECORD = "submissions.csv"
+MAX_SUBMIT_PER_DAY = 2
+def check_submission_limit(username):
+    if not os.path.exists(SUBMIT_RECORD):
+        return True  # 没有人提交过
+    df = pd.read_csv(SUBMIT_RECORD)
+    today = date.today()
+    user_today_subs = df[
+        (df["username"] == username) &
+        (pd.to_datetime(df["timestamp"]).dt.date == today)
+    ]
+    return len(user_today_subs) < MAX_SUBMIT_PER_DAY
+def record_submission(username):
+    now = datetime.now().isoformat()
+    if os.path.exists(SUBMIT_RECORD):
+        df = pd.read_csv(SUBMIT_RECORD)
+    else:
+        df = pd.DataFrame(columns=["username", "timestamp"])
+    df.loc[len(df)] = {"username": username, "timestamp": now}
+    df.to_csv(SUBMIT_RECORD, index=False)
 def evaluate_and_update(pred_file, username):
+    if not check_submission_limit(username):
+        return "⛔ Submission limit exceeded for today.", pd.read_csv(LEADERBOARD_CSV)
     score = run_evaluation(pred_file.name)
     if os.path.exists(LEADERBOARD_CSV):
         df = pd.read_csv(LEADERBOARD_CSV)
 with gr.Blocks() as demo:
     gr.Markdown("# 🧊 3D IoU Challenge")
     name = gr.Textbox(label="Username")
+    upload = gr.File(label="Upload your prediction (.json)")
     score_text = gr.Textbox(label="Evaluation score")
     leaderboard = gr.Dataframe(headers=["Name", "Score"], interactive=False)

evaluate.py CHANGED Viewed

@@ -1,16 +1,11 @@
-from datasets import load_dataset
 import numpy as np
-import torch
-from pytorch3d.ops import box3d_overlap
 def run_evaluation(pred_path):
-    pred_boxes = np.load(pred_path)[:, :7]
-    pred_boxes = torch.tensor(pred_boxes).float().unsqueeze(0)
-    dataset = load_dataset("yourname/3d-iou-challenge-data", split="test")
-    gt_boxes = torch.tensor(dataset[0]["boxes"]).float().unsqueeze(0)
-    iou_matrix, _ = box3d_overlap(pred_boxes, gt_boxes)
-    iou = iou_matrix.diagonal(dim1=1, dim2=2).mean()
-    return float(iou)

 import numpy as np
+import json
+from vg_evaluator import evaluation_for_challenge
 def run_evaluation(pred_path):
+    pred_ = json.load(open(pred_path))
+    gt_ = json.load(open('test_annotations_mmscan.json'))
+    results = evaluation_for_challenge(gt_,pred_)
+    return results['gTop-[email protected]']

leaderboard.csv ADDED Viewed

	@@ -0,0 +1,12 @@

+name,score
+aaa,0.1947003033781529
+eee,0.0
+sss,0.0
+sss,0.0
+sss,0.0
+sss,0.0
+sss,0.0
+sss,0.0
+sss,0.0
+bbb,0.0
+aaa,0.0

mmscan_utils/box_metric.py ADDED Viewed

	@@ -0,0 +1,277 @@

+from typing import Dict, Tuple, Union
+import numpy as np
+import torch
+from scipy.optimize import linear_sum_assignment
+def average_precision(recalls: np.ndarray,
+                      precisions: np.ndarray,
+                      mode: str = 'area') -> np.ndarray:
+    """Calculate average precision (for single or multiple scales).
+    Args:
+        recalls (np.ndarray): Recalls with shape of (num_scales, num_dets)
+            or (num_dets, ).
+        precisions (np.ndarray): Precisions with shape of
+            (num_scales, num_dets) or (num_dets, ).
+        mode (str): 'area' or '11points', 'area' means calculating the area
+            under precision-recall curve, '11points' means calculating
+            the average precision of recalls at [0, 0.1, ..., 1]
+            Defaults to 'area'.
+    Returns:
+        np.ndarray: Calculated average precision.
+    """
+    if recalls.ndim == 1:
+        recalls = recalls[np.newaxis, :]
+        precisions = precisions[np.newaxis, :]
+    assert recalls.shape == precisions.shape
+    assert recalls.ndim == 2
+    num_scales = recalls.shape[0]
+    ap = np.zeros(num_scales, dtype=np.float32)
+    if mode == 'area':
+        zeros = np.zeros((num_scales, 1), dtype=recalls.dtype)
+        ones = np.ones((num_scales, 1), dtype=recalls.dtype)
+        mrec = np.hstack((zeros, recalls, ones))
+        mpre = np.hstack((zeros, precisions, zeros))
+        for i in range(mpre.shape[1] - 1, 0, -1):
+            mpre[:, i - 1] = np.maximum(mpre[:, i - 1], mpre[:, i])
+        for i in range(num_scales):
+            ind = np.where(mrec[i, 1:] != mrec[i, :-1])[0]
+            ap[i] = np.sum(
+                (mrec[i, ind + 1] - mrec[i, ind]) * mpre[i, ind + 1])
+    elif mode == '11points':
+        for i in range(num_scales):
+            for thr in np.arange(0, 1 + 1e-3, 0.1):
+                precs = precisions[i, recalls[i, :] >= thr]
+                prec = precs.max() if precs.size > 0 else 0
+                ap[i] += prec
+            ap /= 11
+    else:
+        raise ValueError(
+            'Unrecognized mode, only "area" and "11points" are supported')
+    return ap
+def get_f1_scores(iou_matrix: Union[np.ndarray, torch.tensor],
+                  iou_threshold) -> float:
+    """Refer to the algorithm in Multi3DRefer to compute the F1 score.
+    Args:
+        iou_matrix (ndarray/tensor):
+            The iou matrix of the predictions and ground truths with
+                shape (num_preds , num_gts)
+        iou_threshold (float): 0.25/0.5
+    Returns:
+        float: the f1 score as the result
+    """
+    iou_thr_tp = 0
+    pred_bboxes_count, gt_bboxes_count = iou_matrix.shape
+    square_matrix_len = max(gt_bboxes_count, pred_bboxes_count)
+    iou_matrix_fill = np.zeros(shape=(square_matrix_len, square_matrix_len),
+                               dtype=np.float32)
+    iou_matrix_fill[:pred_bboxes_count, :gt_bboxes_count] = iou_matrix
+    # apply matching algorithm
+    row_idx, col_idx = linear_sum_assignment(iou_matrix_fill * -1)
+    # iterate matched pairs, check ious
+    for i in range(pred_bboxes_count):
+        iou = iou_matrix[row_idx[i], col_idx[i]]
+        # calculate true positives
+        if iou >= iou_threshold:
+            iou_thr_tp += 1
+    # calculate precision, recall and f1-score for the current scene
+    f1_score = 2 * iou_thr_tp / (pred_bboxes_count + gt_bboxes_count)
+    return f1_score
+def __get_fp_tp_array__(iou_array: Union[np.ndarray, torch.tensor],
+                        iou_threshold: float) \
+                        -> Tuple[np.ndarray, np.ndarray]:
+    """Compute the False-positive and True-positive array for each prediction.
+    Args:
+        iou_array (ndarray/tensor):
+            the iou matrix of the predictions and ground truths
+            (shape num_preds, num_gts)
+        iou_threshold (float): 0.25/0.5
+    Returns:
+        np.ndarray, np.ndarray: (len(preds)),
+        the false-positive and true-positive array for each prediction.
+    """
+    gt_matched_records = np.zeros((len(iou_array[0])), dtype=bool)
+    tp_thr = np.zeros((len(iou_array)))
+    fp_thr = np.zeros((len(iou_array)))
+    for d, _ in enumerate(range(len(iou_array))):
+        iou_max = -np.inf
+        cur_iou = iou_array[d]
+        num_gts = cur_iou.shape[0]
+        if num_gts > 0:
+            for j in range(num_gts):
+                iou = cur_iou[j]
+                if iou > iou_max:
+                    iou_max = iou
+                    jmax = j
+        if iou_max >= iou_threshold:
+            if not gt_matched_records[jmax]:
+                gt_matched_records[jmax] = True
+                tp_thr[d] = 1.0
+            else:
+                fp_thr[d] = 1.0
+        else:
+            fp_thr[d] = 1.0
+    return fp_thr, tp_thr
+def subset_get_average_precision(subset_results: dict,
+                                 iou_thr: float)\
+                                 -> Tuple[np.ndarray, np.ndarray]:
+    """Return the average precision and max recall for a given iou array,
+    "subset" version while the num_gt of each sample may differ.
+    Args:
+        subset_results (dict):
+            The results, consisting of scores, sample_indices, ious.
+            sample_indices means which sample the prediction belongs to.
+        iou_threshold (float): 0.25/0.5
+    Returns:
+        Tuple[np.ndarray, np.ndarray]: the average precision and max recall.
+    """
+    confidences = subset_results['scores']
+    sample_indices = subset_results['sample_indices']
+    ious = subset_results['ious']
+    gt_matched_records = {}
+    total_gt_boxes = 0
+    for i, sample_idx in enumerate(sample_indices):
+        if sample_idx not in gt_matched_records:
+            gt_matched_records[sample_idx] = np.zeros((len(ious[i]), ),
+                                                      dtype=bool)
+            total_gt_boxes += ious[i].shape[0]
+    confidences = np.array(confidences)
+    sorted_inds = np.argsort(-confidences)
+    sample_indices = [sample_indices[i] for i in sorted_inds]
+    ious = [ious[i] for i in sorted_inds]
+    tp_thr = np.zeros(len(sample_indices))
+    fp_thr = np.zeros(len(sample_indices))
+    for d, sample_idx in enumerate(sample_indices):
+        iou_max = -np.inf
+        cur_iou = ious[d]
+        num_gts = cur_iou.shape[0]
+        if num_gts > 0:
+            for j in range(num_gts):
+                iou = cur_iou[j]
+                if iou > iou_max:
+                    iou_max = iou
+                    jmax = j
+        if iou_max >= iou_thr:
+            if not gt_matched_records[sample_idx][jmax]:
+                gt_matched_records[sample_idx][jmax] = True
+                tp_thr[d] = 1.0
+            else:
+                fp_thr[d] = 1.0
+        else:
+            fp_thr[d] = 1.0
+    fp = np.cumsum(fp_thr)
+    tp = np.cumsum(tp_thr)
+    recall = tp / float(total_gt_boxes)
+    precision = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
+    return average_precision(recall, precision), np.max(recall)
+def get_average_precision(iou_array: np.ndarray, iou_threshold: float) \
+        -> Tuple[np.ndarray, np.ndarray]:
+    """Return the average precision and max recall for a given iou array.
+    Args:
+        iou_array (ndarray/tensor):
+            The iou matrix of the predictions and ground truths
+            (shape len(preds)*len(gts))
+        iou_threshold (float): 0.25/0.5
+    Returns:
+        Tuple[np.ndarray, np.ndarray]: the average precision and max recall.
+    """
+    fp, tp = __get_fp_tp_array__(iou_array, iou_threshold)
+    fp_cum = np.cumsum(fp)
+    tp_cum = np.cumsum(tp)
+    recall = tp_cum / float(iou_array.shape[1])
+    precision = tp_cum / np.maximum(tp_cum + fp_cum, np.finfo(np.float64).eps)
+    return average_precision(recall, precision), np.max(recall)
+def get_general_topk_scores(iou_array: Union[np.ndarray, torch.tensor],
+                            iou_threshold: float,
+                            mode: str = 'sigma') -> Dict[str, float]:
+    """Compute the multi-topk metric, we provide two modes.
+    Args:
+        iou_array (ndarray/tensor):
+            the iou matrix of the predictions and ground truths
+            (shape len(preds)*len(gts))
+        iou_threshold (float): 0.25/0.5
+        mode (str): 'sigma'/'simple'
+                "simple": 1/N * Hit(min(N*k,len(pred)))
+                "sigma": 1/N * Sigma [Hit(min(n*k,len(pred)))>=n] n = 1~N
+                    Hit(M) return the number of gtound truths hitted by
+                    the first M predictions.
+                    N = the number of gtound truths
+                Default to 'sigma'.
+    Returns:
+        Dict[str,float]: the score of multi-topk metric.
+    """
+    assert mode in ['sigma', 'simple']
+    topk_scores = []
+    gt_matched_records = np.zeros(len(iou_array[0]))
+    num_gt = len(gt_matched_records)
+    for d, _ in enumerate(range(len(iou_array))):
+        iou_max = -np.inf
+        cur_iou = iou_array[d]
+        for j in range(len(iou_array[d])):
+            iou = cur_iou[j]
+            if iou > iou_max:
+                iou_max = iou
+                j_max = j
+        if iou_max >= iou_threshold:
+            gt_matched_records[j_max] = True
+        topk_scores.append(gt_matched_records.copy())
+    topk_results = {}
+    for topk in [1, 3, 5, 10]:
+        if mode == 'sigma':
+            scores = [
+                int(
+                    np.sum(topk_scores[min(n * topk, len(topk_scores)) -
+                                       1]) >= n) for n in range(1, num_gt + 1)
+            ]
+            result = np.sum(scores) / num_gt
+        else:
+            query_index = min(num_gt * topk, len(topk_scores)) - 1
+            result = np.sum(topk_scores[query_index]) / num_gt
+        topk_results[f'gTop-{topk}@{iou_threshold}'] = result
+    return topk_results

mmscan_utils/box_utils.py ADDED Viewed

	@@ -0,0 +1,1079 @@

+from abc import abstractmethod
+from typing import List,Iterator, Optional, Sequence, Tuple, Union
+import numpy as np
+import torch
+try:
+    from pytorch3d.ops import box3d_overlap
+    from pytorch3d.transforms import (euler_angles_to_matrix,
+                                      matrix_to_euler_angles)
+except ImportError:
+    box3d_overlap = None
+    euler_angles_to_matrix = None
+    matrix_to_euler_angles = None
+from torch import Tensor
+class BaseInstance3DBoxes:
+    """Base class for 3D Boxes.
+    Note:
+        The box is bottom centered, i.e. the relative position of origin in the
+        box is (0.5, 0.5, 0).
+    Args:
+        tensor (Tensor or np.ndarray or Sequence[Sequence[float]]): The boxes
+            data with shape (N, box_dim).
+        box_dim (int): Number of the dimension of a box. Each row is
+            (x, y, z, x_size, y_size, z_size, yaw). Defaults to 7.
+        with_yaw (bool): Whether the box is with yaw rotation. If False, the
+            value of yaw will be set to 0 as minmax boxes. Defaults to True.
+        origin (Tuple[float]): Relative position of the box origin.
+            Defaults to (0.5, 0.5, 0). This will guide the box be converted to
+            (0.5, 0.5, 0) mode.
+    Attributes:
+        tensor (Tensor): Float matrix with shape (N, box_dim).
+        box_dim (int): Integer indicating the dimension of a box. Each row is
+            (x, y, z, x_size, y_size, z_size, yaw, ...).
+        with_yaw (bool): If True, the value of yaw will be set to 0 as minmax
+            boxes.
+    """
+    YAW_AXIS: int = 0
+    def __init__(
+        self,
+        tensor: Union[Tensor, np.ndarray, Sequence[Sequence[float]]],
+        box_dim: int = 7,
+        with_yaw: bool = True,
+        origin: Tuple[float, float, float] = (0.5, 0.5, 0)
+    ) -> None:
+        if isinstance(tensor, Tensor):
+            device = tensor.device
+        else:
+            device = torch.device('cpu')
+        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
+        if tensor.numel() == 0:
+            # Use reshape, so we don't end up creating a new tensor that does
+            # not depend on the inputs (and consequently confuses jit)
+            tensor = tensor.reshape((-1, box_dim))
+        assert tensor.dim() == 2 and tensor.size(-1) == box_dim, \
+            ('The box dimension must be 2 and the length of the last '
+             f'dimension must be {box_dim}, but got boxes with shape '
+             f'{tensor.shape}.')
+        if tensor.shape[-1] == 6:
+            # If the dimension of boxes is 6, we expand box_dim by padding 0 as
+            # a fake yaw and set with_yaw to False
+            assert box_dim == 6
+            fake_rot = tensor.new_zeros(tensor.shape[0], 1)
+            tensor = torch.cat((tensor, fake_rot), dim=-1)
+            self.box_dim = box_dim + 1
+            self.with_yaw = False
+        else:
+            self.box_dim = box_dim
+            self.with_yaw = with_yaw
+        self.tensor = tensor.clone()
+        if origin != (0.5, 0.5, 0):
+            dst = self.tensor.new_tensor((0.5, 0.5, 0))
+            src = self.tensor.new_tensor(origin)
+            self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)
+    @property
+    def shape(self) -> torch.Size:
+        """torch.Size: Shape of boxes."""
+        return self.tensor.shape
+    @property
+    def volume(self) -> Tensor:
+        """Tensor: A vector with volume of each box in shape (N, )."""
+        return self.tensor[:, 3] * self.tensor[:, 4] * self.tensor[:, 5]
+    @property
+    def dims(self) -> Tensor:
+        """Tensor: Size dimensions of each box in shape (N, 3)."""
+        return self.tensor[:, 3:6]
+    @property
+    def yaw(self) -> Tensor:
+        """Tensor: A vector with yaw of each box in shape (N, )."""
+        return self.tensor[:, 6]
+    @property
+    def height(self) -> Tensor:
+        """Tensor: A vector with height of each box in shape (N, )."""
+        return self.tensor[:, 5]
+    @property
+    def top_height(self) -> Tensor:
+        """Tensor: A vector with top height of each box in shape (N, )."""
+        return self.bottom_height + self.height
+    @property
+    def bottom_height(self) -> Tensor:
+        """Tensor: A vector with bottom height of each box in shape (N, )."""
+        return self.tensor[:, 2]
+    @property
+    def center(self) -> Tensor:
+        """Calculate the center of all the boxes.
+        Note:
+            In MMDetection3D's convention, the bottom center is usually taken
+            as the default center.
+            The relative position of the centers in different kinds of boxes
+            are different, e.g., the relative center of a boxes is
+            (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0) in lidar. It is
+            recommended to use ``bottom_center`` or ``gravity_center`` for
+            clearer usage.
+        Returns:
+            Tensor: A tensor with center of each box in shape (N, 3).
+        """
+        return self.bottom_center
+    @property
+    def bottom_center(self) -> Tensor:
+        """Tensor: A tensor with center of each box in shape (N, 3)."""
+        return self.tensor[:, :3]
+    @property
+    def gravity_center(self) -> Tensor:
+        """Tensor: A tensor with center of each box in shape (N, 3)."""
+        bottom_center = self.bottom_center
+        gravity_center = torch.zeros_like(bottom_center)
+        gravity_center[:, :2] = bottom_center[:, :2]
+        gravity_center[:, 2] = bottom_center[:, 2] + self.tensor[:, 5] * 0.5
+        return gravity_center
+    @property
+    def corners(self) -> Tensor:
+        """Tensor: A tensor with 8 corners of each box in shape (N, 8, 3)."""
+        pass
+    @property
+    def bev(self) -> Tensor:
+        """Tensor: 2D BEV box of each box with rotation in XYWHR format, in
+        shape (N, 5)."""
+        return self.tensor[:, [0, 1, 3, 4, 6]]
+    def in_range_bev(
+            self, box_range: Union[Tensor, np.ndarray,
+                                   Sequence[float]]) -> Tensor:
+        """Check whether the boxes are in the given range.
+        Args:
+            box_range (Tensor or np.ndarray or Sequence[float]): The range of
+                box in order of (x_min, y_min, x_max, y_max).
+        Note:
+            The original implementation of SECOND checks whether boxes in a
+            range by checking whether the points are in a convex polygon, we
+            reduce the burden for simpler cases.
+        Returns:
+            Tensor: A binary vector indicating whether each box is inside the
+            reference range.
+        """
+        in_range_flags = ((self.bev[:, 0] > box_range[0])
+                          & (self.bev[:, 1] > box_range[1])
+                          & (self.bev[:, 0] < box_range[2])
+                          & (self.bev[:, 1] < box_range[3]))
+        return in_range_flags
+    @abstractmethod
+    def rotate(
+        self,
+        angle: Union[Tensor, np.ndarray, float],
+        points: Optional[Union[Tensor, np.ndarray]] = None
+    ) -> Union[Tuple[Tensor, Tensor], Tuple[np.ndarray, np.ndarray],
+               Tuple[Tensor], None]:
+        """Rotate boxes with points (optional) with the given angle or rotation
+        matrix.
+        Args:
+            angle (Tensor or np.ndarray or float): Rotation angle or rotation
+                matrix.
+            points (Tensor or np.ndarray or :obj:``, optional):
+                Points to rotate. Defaults to None.
+        Returns:
+            tuple or None: When ``points`` is None, the function returns None,
+            otherwise it returns the rotated points and the rotation matrix
+            ``rot_mat_T``.
+        """
+        pass
+    @abstractmethod
+    def flip(
+        self,
+        bev_direction: str = 'horizontal',
+        points: Optional[Union[Tensor, np.ndarray, ]] = None
+    ) -> Union[Tensor, np.ndarray, None]:
+        """Flip the boxes in BEV along given BEV direction.
+        Args:
+            bev_direction (str): Direction by which to flip. Can be chosen from
+                'horizontal' and 'vertical'. Defaults to 'horizontal'.
+            points (Tensor or np.ndarray or :obj:``, optional):
+                Points to flip. Defaults to None.
+        Returns:
+            Tensor or np.ndarray or :obj:`` or None: When ``points``
+            is None, the function returns None, otherwise it returns the
+            flipped points.
+        """
+        pass
+    def translate(self, trans_vector: Union[Tensor, np.ndarray]) -> None:
+        """Translate boxes with the given translation vector.
+        Args:
+            trans_vector (Tensor or np.ndarray): Translation vector of size
+                1x3.
+        """
+        if not isinstance(trans_vector, Tensor):
+            trans_vector = self.tensor.new_tensor(trans_vector)
+        self.tensor[:, :3] += trans_vector
+    def in_range_3d(
+            self, box_range: Union[Tensor, np.ndarray,
+                                   Sequence[float]]) -> Tensor:
+        """Check whether the boxes are in the given range.
+        Args:
+            box_range (Tensor or np.ndarray or Sequence[float]): The range of
+                box (x_min, y_min, z_min, x_max, y_max, z_max).
+        Note:
+            In the original implementation of SECOND, checking whether a box in
+            the range checks whether the points are in a convex polygon, we try
+            to reduce the burden for simpler cases.
+        Returns:
+            Tensor: A binary vector indicating whether each point is inside the
+            reference range.
+        """
+        in_range_flags = ((self.tensor[:, 0] > box_range[0])
+                          & (self.tensor[:, 1] > box_range[1])
+                          & (self.tensor[:, 2] > box_range[2])
+                          & (self.tensor[:, 0] < box_range[3])
+                          & (self.tensor[:, 1] < box_range[4])
+                          & (self.tensor[:, 2] < box_range[5]))
+        return in_range_flags
+    @abstractmethod
+    def convert_to(self,
+                   dst: int,
+                   rt_mat: Optional[Union[Tensor, np.ndarray]] = None,
+                   correct_yaw: bool = False) -> 'BaseInstance3DBoxes':
+        """Convert self to ``dst`` mode.
+        Args:
+            dst (int): The target Box mode.
+            rt_mat (Tensor or np.ndarray, optional): The rotation and
+                translation matrix between different coordinates.
+                Defaults to None. The conversion from ``src`` coordinates to
+                ``dst`` coordinates usually comes along the change of sensors,
+                e.g., from camera to LiDAR. This requires a transformation
+                matrix.
+            correct_yaw (bool): Whether to convert the yaw angle to the target
+                coordinate. Defaults to False.
+        Returns:
+            :obj:`BaseInstance3DBoxes`: The converted box of the same type in
+            the ``dst`` mode.
+        """
+        pass
+    def scale(self, scale_factor: float) -> None:
+        """Scale the box with horizontal and vertical scaling factors.
+        Args:
+            scale_factors (float): Scale factors to scale the boxes.
+        """
+        self.tensor[:, :6] *= scale_factor
+        self.tensor[:, 7:] *= scale_factor  # velocity
+    def nonempty(self, threshold: float = 0.0) -> Tensor:
+        """Find boxes that are non-empty.
+        A box is considered empty if either of its side is no larger than
+        threshold.
+        Args:
+            threshold (float): The threshold of minimal sizes. Defaults to 0.0.
+        Returns:
+            Tensor: A binary vector which represents whether each box is empty
+            (False) or non-empty (True).
+        """
+        box = self.tensor
+        size_x = box[..., 3]
+        size_y = box[..., 4]
+        size_z = box[..., 5]
+        keep = ((size_x > threshold)
+                & (size_y > threshold) & (size_z > threshold))
+        return keep
+    def __getitem__(
+            self, item: Union[int, slice, np.ndarray,
+                              Tensor]) -> 'BaseInstance3DBoxes':
+        """
+        Args:
+            item (int or slice or np.ndarray or Tensor): Index of boxes.
+        Note:
+            The following usage are allowed:
+            1. `new_boxes = boxes[3]`: Return a `Boxes` that contains only one
+               box.
+            2. `new_boxes = boxes[2:10]`: Return a slice of boxes.
+            3. `new_boxes = boxes[vector]`: Where vector is a
+               torch.BoolTensor with `length = len(boxes)`. Nonzero elements in
+               the vector will be selected.
+            Note that the returned Boxes might share storage with this Boxes,
+            subject to PyTorch's indexing semantics.
+        Returns:
+            :obj:`BaseInstance3DBoxes`: A new object of
+            :class:`BaseInstance3DBoxes` after indexing.
+        """
+        original_type = type(self)
+        if isinstance(item, int):
+            return original_type(self.tensor[item].view(1, -1),
+                                 box_dim=self.box_dim,
+                                 with_yaw=self.with_yaw)
+        b = self.tensor[item]
+        assert b.dim() == 2, \
+            f'Indexing on Boxes with {item} failed to return a matrix!'
+        return original_type(b, box_dim=self.box_dim, with_yaw=self.with_yaw)
+    def __len__(self) -> int:
+        """int: Number of boxes in the current object."""
+        return self.tensor.shape[0]
+    def __repr__(self) -> str:
+        """str: Return a string that describes the object."""
+        return self.__class__.__name__ + '(\n    ' + str(self.tensor) + ')'
+    @classmethod
+    def cat(cls, boxes_list: Sequence['BaseInstance3DBoxes']
+            ) -> 'BaseInstance3DBoxes':
+        """Concatenate a list of Boxes into a single Boxes.
+        Args:
+            boxes_list (Sequence[:obj:`BaseInstance3DBoxes`]): List of boxes.
+        Returns:
+            :obj:`BaseInstance3DBoxes`: The concatenated boxes.
+        """
+        assert isinstance(boxes_list, (list, tuple))
+        if len(boxes_list) == 0:
+            return cls(torch.empty(0))
+        assert all(isinstance(box, cls) for box in boxes_list)
+        # use torch.cat (v.s. layers.cat)
+        # so the returned boxes never share storage with input
+        cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0),
+                        box_dim=boxes_list[0].box_dim,
+                        with_yaw=boxes_list[0].with_yaw)
+        return cat_boxes
+    def numpy(self) -> np.ndarray:
+        """Reload ``numpy`` from self.tensor."""
+        return self.tensor.numpy()
+    def to(self, device: Union[str, torch.device], *args,
+           **kwargs) -> 'BaseInstance3DBoxes':
+        """Convert current boxes to a specific device.
+        Args:
+            device (str or :obj:`torch.device`): The name of the device.
+        Returns:
+            :obj:`BaseInstance3DBoxes`: A new boxes object on the specific
+            device.
+        """
+        original_type = type(self)
+        return original_type(self.tensor.to(device, *args, **kwargs),
+                             box_dim=self.box_dim,
+                             with_yaw=self.with_yaw)
+    def cpu(self) -> 'BaseInstance3DBoxes':
+        """Convert current boxes to cpu device.
+        Returns:
+            :obj:`BaseInstance3DBoxes`: A new boxes object on the cpu device.
+        """
+        original_type = type(self)
+        return original_type(self.tensor.cpu(),
+                             box_dim=self.box_dim,
+                             with_yaw=self.with_yaw)
+    def cuda(self, *args, **kwargs) -> 'BaseInstance3DBoxes':
+        """Convert current boxes to cuda device.
+        Returns:
+            :obj:`BaseInstance3DBoxes`: A new boxes object on the cuda device.
+        """
+        original_type = type(self)
+        return original_type(self.tensor.cuda(*args, **kwargs),
+                             box_dim=self.box_dim,
+                             with_yaw=self.with_yaw)
+    def clone(self) -> 'BaseInstance3DBoxes':
+        """Clone the boxes.
+        Returns:
+            :obj:`BaseInstance3DBoxes`: Box object with the same properties as
+            self.
+        """
+        original_type = type(self)
+        return original_type(self.tensor.clone(),
+                             box_dim=self.box_dim,
+                             with_yaw=self.with_yaw)
+    def detach(self) -> 'BaseInstance3DBoxes':
+        """Detach the boxes.
+        Returns:
+            :obj:`BaseInstance3DBoxes`: Box object with the same properties as
+            self.
+        """
+        original_type = type(self)
+        return original_type(self.tensor.detach(),
+                             box_dim=self.box_dim,
+                             with_yaw=self.with_yaw)
+    @property
+    def device(self) -> torch.device:
+        """torch.device: The device of the boxes are on."""
+        return self.tensor.device
+    def __iter__(self) -> Iterator[Tensor]:
+        """Yield a box as a Tensor at a time.
+        Returns:
+            Iterator[Tensor]: A box of shape (box_dim, ).
+        """
+        yield from self.tensor
+    @classmethod
+    def height_overlaps(cls, boxes1: 'BaseInstance3DBoxes',
+                        boxes2: 'BaseInstance3DBoxes') -> Tensor:
+        """Calculate height overlaps of two boxes.
+        Note:
+            This function calculates the height overlaps between ``boxes1`` and
+            ``boxes2``, ``boxes1`` and ``boxes2`` should be in the same type.
+        Args:
+            boxes1 (:obj:`BaseInstance3DBoxes`): Boxes 1 contain N boxes.
+            boxes2 (:obj:`BaseInstance3DBoxes`): Boxes 2 contain M boxes.
+        Returns:
+            Tensor: Calculated height overlap of the boxes.
+        """
+        assert isinstance(boxes1, BaseInstance3DBoxes)
+        assert isinstance(boxes2, BaseInstance3DBoxes)
+        assert type(boxes1) == type(boxes2), \
+            '"boxes1" and "boxes2" should be in the same type, ' \
+            f'but got {type(boxes1)} and {type(boxes2)}.'
+        boxes1_top_height = boxes1.top_height.view(-1, 1)
+        boxes1_bottom_height = boxes1.bottom_height.view(-1, 1)
+        boxes2_top_height = boxes2.top_height.view(1, -1)
+        boxes2_bottom_height = boxes2.bottom_height.view(1, -1)
+        heighest_of_bottom = torch.max(boxes1_bottom_height,
+                                       boxes2_bottom_height)
+        lowest_of_top = torch.min(boxes1_top_height, boxes2_top_height)
+        overlaps_h = torch.clamp(lowest_of_top - heighest_of_bottom, min=0)
+        return overlaps_h
+    def new_box(
+        self, data: Union[Tensor, np.ndarray, Sequence[Sequence[float]]]
+    ) -> 'BaseInstance3DBoxes':
+        """Create a new box object with data.
+        The new box and its tensor has the similar properties as self and
+        self.tensor, respectively.
+        Args:
+            data (Tensor or np.ndarray or Sequence[Sequence[float]]): Data to
+                be copied.
+        Returns:
+            :obj:`BaseInstance3DBoxes`: A new bbox object with ``data``, the
+            object's other properties are similar to ``self``.
+        """
+        new_tensor = self.tensor.new_tensor(data) \
+            if not isinstance(data, Tensor) else data.to(self.device)
+        original_type = type(self)
+        return original_type(new_tensor,
+                             box_dim=self.box_dim,
+                             with_yaw=self.with_yaw)
+class EulerInstance3DBoxes(BaseInstance3DBoxes):
+    """3D boxes with 1-D orientation represented by three Euler angles.
+    See https://en.wikipedia.org/wiki/Euler_angles for
+        regarding the definition of Euler angles.
+    Attributes:
+        tensor (torch.Tensor): Float matrix of N x box_dim.
+        box_dim (int): Integer indicates the dimension of a box
+            Each row is (x, y, z, x_size, y_size, z_size, alpha, beta, gamma).
+    """
+    def __init__(self, tensor, box_dim=9, origin=(0.5, 0.5, 0.5)):
+        if isinstance(tensor, torch.Tensor):
+            device = tensor.device
+        else:
+            device = torch.device('cpu')
+        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
+        if tensor.numel() == 0:
+            # Use reshape, so we don't end up creating a new tensor that
+            # does not depend on the inputs (and consequently confuses jit)
+            tensor = tensor.reshape((0, box_dim)).to(dtype=torch.float32,
+                                                     device=device)
+        assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()
+        if tensor.shape[-1] == 6:
+            # If the dimension of boxes is 6, we expand box_dim by padding
+            # (0, 0, 0) as a fake euler angle.
+            assert box_dim == 6
+            fake_rot = tensor.new_zeros(tensor.shape[0], 3)
+            tensor = torch.cat((tensor, fake_rot), dim=-1)
+            self.box_dim = box_dim + 3
+        elif tensor.shape[-1] == 7:
+            assert box_dim == 7
+            fake_euler = tensor.new_zeros(tensor.shape[0], 2)
+            tensor = torch.cat((tensor, fake_euler), dim=-1)
+            self.box_dim = box_dim + 2
+        else:
+            assert tensor.shape[-1] == 9
+            self.box_dim = box_dim
+        self.tensor = tensor.clone()
+        self.origin = origin
+        if origin != (0.5, 0.5, 0.5):
+            dst = self.tensor.new_tensor((0.5, 0.5, 0.5))
+            src = self.tensor.new_tensor(origin)
+            self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)
+    def get_corners(self, tensor1):
+        """torch.Tensor: Coordinates of corners of all the boxes
+        in shape (N, 8, 3).
+        Convert the boxes to corners in clockwise order, in form of
+        ``(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)``
+        .. code-block:: none
+                                           up z
+                            front y           ^
+                                 /            |
+                                /             |
+                  (x0, y1, z1) + -----------  + (x1, y1, z1)
+                              /|            / |
+                             / |           /  |
+               (x0, y0, z1) + ----------- +   + (x1, y1, z0)
+                            |  /      .   |  /
+                            | / origin    | /
+               (x0, y0, z0) + ----------- + --------> right x
+                                          (x1, y0, z0)
+        """
+        if tensor1.numel() == 0:
+            return torch.empty([0, 8, 3], device=tensor1.device)
+        dims = tensor1[:, 3:6]
+        corners_norm = torch.from_numpy(
+            np.stack(np.unravel_index(np.arange(8), [2] * 3),
+                     axis=1)).to(device=dims.device, dtype=dims.dtype)
+        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
+        # use relative origin
+        assert self.origin == (0.5, 0.5, 0.5), \
+            'self.origin != (0.5, 0.5, 0.5) needs to be checked!'
+        corners_norm = corners_norm - dims.new_tensor(self.origin)
+        corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])
+        # rotate
+        corners = rotation_3d_in_euler(corners, tensor1[:, 6:])
+        corners += tensor1[:, :3].view(-1, 1, 3)
+        return corners
+    @classmethod
+    def overlaps(cls, boxes1, boxes2, mode='iou', eps=1e-4):
+        """Calculate 3D overlaps of two boxes.
+        Note:
+            This function calculates the overlaps between ``boxes1`` and
+            ``boxes2``, ``boxes1`` and ``boxes2`` should be in the same type.
+        Args:
+            boxes1 (:obj:`EulerInstance3DBoxes`): Boxes 1 contain N boxes.
+            boxes2 (:obj:`EulerInstance3DBoxes`): Boxes 2 contain M boxes.
+            mode (str): Mode of iou calculation. Defaults to 'iou'.
+            eps (bool): Epsilon. Defaults to 1e-4.
+        Returns:
+            torch.Tensor: Calculated 3D overlaps of the boxes.
+        """
+        assert isinstance(boxes1, EulerInstance3DBoxes)
+        assert isinstance(boxes2, EulerInstance3DBoxes)
+        assert type(boxes1) == type(boxes2), '"boxes1" and "boxes2" should' \
+            f'be in the same type, got {type(boxes1)} and {type(boxes2)}.'
+        assert mode in ['iou']
+        rows = len(boxes1)
+        cols = len(boxes2)
+        if rows * cols == 0:
+            return boxes1.tensor.new(rows, cols)
+        corners1 = boxes1.corners
+        corners2 = boxes2.corners
+        _, iou3d = box3d_overlap(corners1, corners2, eps=eps)
+        return iou3d
+    @property
+    def gravity_center(self):
+        """torch.Tensor: A tensor with center of each box in shape (N, 3)."""
+        return self.tensor[:, :3]
+    @property
+    def corners(self):
+        """torch.Tensor: Coordinates of corners of all the boxes
+        in shape (N, 8, 3).
+        Convert the boxes to corners in clockwise order, in form of
+        ``(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)``
+        .. code-block:: none
+                                           up z
+                            front y           ^
+                                 /            |
+                                /             |
+                  (x0, y1, z1) + -----------  + (x1, y1, z1)
+                              /|            / |
+                             / |           /  |
+               (x0, y0, z1) + ----------- +   + (x1, y1, z0)
+                            |  /      .   |  /
+                            | / origin    | /
+               (x0, y0, z0) + ----------- + --------> right x
+                                          (x1, y0, z0)
+        """
+        if self.tensor.numel() == 0:
+            return torch.empty([0, 8, 3], device=self.tensor.device)
+        dims = self.dims
+        corners_norm = torch.from_numpy(
+            np.stack(np.unravel_index(np.arange(8), [2] * 3),
+                     axis=1)).to(device=dims.device, dtype=dims.dtype)
+        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
+        # use relative origin
+        assert self.origin == (0.5, 0.5, 0.5), \
+            'self.origin != (0.5, 0.5, 0.5) needs to be checked!'
+        corners_norm = corners_norm - dims.new_tensor(self.origin)
+        corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])
+        # rotate
+        corners = rotation_3d_in_euler(corners, self.tensor[:, 6:])
+        corners += self.tensor[:, :3].view(-1, 1, 3)
+        return corners
+    def transform(self, matrix):
+        if self.tensor.shape[0] == 0:
+            return
+        if not isinstance(matrix, torch.Tensor):
+            matrix = self.tensor.new_tensor(matrix)
+        points = self.tensor[:, :3]
+        constant = points.new_ones(points.shape[0], 1)
+        points_extend = torch.concat([points, constant], dim=-1)
+        points_trans = torch.matmul(points_extend, matrix.transpose(-2,
+                                                                    -1))[:, :3]
+        size = self.tensor[:, 3:6]
+        # angle_delta = matrix_to_euler_angles(matrix[:3,:3], 'ZXY')
+        # angle = self.tensor[:,6:] + angle_delta
+        ori_matrix = euler_angles_to_matrix(self.tensor[:, 6:], 'ZXY')
+        rot_matrix = matrix[:3, :3].expand_as(ori_matrix)
+        final = torch.bmm(rot_matrix, ori_matrix)
+        angle = matrix_to_euler_angles(final, 'ZXY')
+        self.tensor = torch.cat([points_trans, size, angle], dim=-1)
+    def scale(self, scale_factor: float) -> None:
+        """Scale the box with horizontal and vertical scaling factors.
+        Args:
+            scale_factors (float): Scale factors to scale the boxes.
+        """
+        self.tensor[:, :6] *= scale_factor
+    def rotate(self, angle, points=None):
+        """Rotate boxes with points (optional) with the given angle or rotation
+        matrix.
+        Args:
+            angle (float | torch.Tensor | np.ndarray):
+                Rotation angle or rotation matrix.
+            points (torch.Tensor | np.ndarray | :obj:``, optional):
+                Points to rotate. Defaults to None.
+        Returns:
+            tuple or None: When ``points`` is None, the function returns
+                None, otherwise it returns the rotated points and the
+                rotation matrix ``rot_mat_T``.
+        """
+        if not isinstance(angle, torch.Tensor):
+            angle = self.tensor.new_tensor(angle)
+        if angle.numel() == 1:  # only given yaw angle for rotation
+            angle = self.tensor.new_tensor([angle, 0., 0.])
+            rot_matrix = euler_angles_to_matrix(angle, 'ZXY')
+        elif angle.numel() == 3:
+            rot_matrix = euler_angles_to_matrix(angle, 'ZXY')
+        elif angle.shape == torch.Size([3, 3]):
+            rot_matrix = angle
+        else:
+            raise NotImplementedError
+        rot_mat_T = rot_matrix.T
+        transform_matrix = torch.eye(4)
+        transform_matrix[:3, :3] = rot_matrix
+        self.transform(transform_matrix)
+        if points is not None:
+            if isinstance(points, torch.Tensor):
+                points[:, :3] = points[:, :3] @ rot_mat_T
+            elif isinstance(points, np.ndarray):
+                rot_mat_T = rot_mat_T.cpu().numpy()
+                points[:, :3] = np.dot(points[:, :3], rot_mat_T)
+            elif isinstance(points, ):
+                points.rotate(rot_mat_T)
+            else:
+                raise ValueError
+            return points, rot_mat_T
+        else:
+            return rot_mat_T
+    def flip(self, direction='X'):
+        """Flip the boxes along the corresponding axis.
+        Args:
+            direction (str, optional): Flip axis. Defaults to 'X'.
+        """
+        assert direction in ['X', 'Y', 'Z']
+        if direction == 'X':
+            self.tensor[:, 0] = -self.tensor[:, 0]
+            self.tensor[:, 6] = -self.tensor[:, 6] + np.pi
+            self.tensor[:, 8] = -self.tensor[:, 8]
+        elif direction == 'Y':
+            self.tensor[:, 1] = -self.tensor[:, 1]
+            self.tensor[:, 6] = -self.tensor[:, 6]
+            self.tensor[:, 7] = -self.tensor[:, 7] + np.pi
+        elif direction == 'Z':
+            self.tensor[:, 2] = -self.tensor[:, 2]
+            self.tensor[:, 7] = -self.tensor[:, 7]
+            self.tensor[:, 8] = -self.tensor[:, 8] + np.pi
+def rotation_3d_in_euler(points, angles, return_mat=False, clockwise=False):
+    """Rotate points by angles according to axis.
+    Args:
+        points (np.ndarray | torch.Tensor | list | tuple ):
+            Points of shape (N, M, 3).
+        angles (np.ndarray | torch.Tensor | list | tuple):
+            Vector of angles in shape (N, 3)
+        return_mat: Whether or not return the rotation matrix (transposed).
+            Defaults to False.
+        clockwise: Whether the rotation is clockwise. Defaults to False.
+    Raises:
+        ValueError: when the axis is not in range [0, 1, 2], it will
+            raise value error.
+    Returns:
+        (torch.Tensor | np.ndarray): Rotated points in shape (N, M, 3).
+    """
+    batch_free = len(points.shape) == 2
+    if batch_free:
+        points = points[None]
+    if len(angles.shape) == 1:
+        angles = angles.expand(points.shape[:1] + (3, ))
+        # angles = torch.full(points.shape[:1], angles)
+    assert len(points.shape) == 3 and len(angles.shape) == 2 \
+        and points.shape[0] == angles.shape[0], f'Incorrect shape of points ' \
+        f'angles: {points.shape}, {angles.shape}'
+    assert points.shape[-1] in [2, 3], \
+        f'Points size should be 2 or 3 instead of {points.shape[-1]}'
+    rot_mat_T = euler_angles_to_matrix(angles, 'ZXY') # N, 3,3
+    rot_mat_T = rot_mat_T.transpose(-2, -1)
+    if clockwise:
+        raise NotImplementedError('clockwise')
+    if points.shape[0] == 0:
+        points_new = points
+    else:
+        points_new = torch.bmm(points, rot_mat_T)
+    if batch_free:
+        points_new = points_new.squeeze(0)
+    if return_mat:
+        if batch_free:
+            rot_mat_T = rot_mat_T.squeeze(0)
+        return points_new, rot_mat_T
+    else:
+        return points_new
+def _axis_angle_rotation(axis: str, angle: np.ndarray) -> np.ndarray:
+    """Return the rotation matrices for one of the rotations about an axis of
+    which Euler angles describe, for each value of the angle given.
+    Args:
+        axis: Axis label "X" or "Y or "Z".
+        angle: any shape tensor of Euler angles in radians
+    Returns:
+        Rotation matrices as tensor of shape (..., 3, 3).
+    """
+    cos = np.cos(angle)
+    sin = np.sin(angle)
+    one = np.ones_like(angle)
+    zero = np.zeros_like(angle)
+    if axis == 'X':
+        R_flat = (one, zero, zero, zero, cos, -sin, zero, sin, cos)
+    elif axis == 'Y':
+        R_flat = (cos, zero, sin, zero, one, zero, -sin, zero, cos)
+    elif axis == 'Z':
+        R_flat = (cos, -sin, zero, sin, cos, zero, zero, zero, one)
+    else:
+        raise ValueError('letter must be either X, Y or Z.')
+    return np.stack(R_flat, -1).reshape(angle.shape + (3, 3))
+def is_inside_box(points, center, size, rotation_mat):
+    """Check if points are inside a 3D bounding box.
+    Args:
+        points: 3D points, numpy array of shape (n, 3).
+        center: center of the box, numpy array of shape (3, ).
+        size: size of the box, numpy array of shape (3, ).
+        rotation_mat: rotation matrix of the box,
+            numpy array of shape (3, 3).
+    Returns:
+        Boolean array of shape (n, )
+            indicating if each point is inside the box.
+    """
+    assert points.shape[1] == 3, 'points should be of shape (n, 3)'
+    points = np.array(points)  # n,3
+    center = np.array(center)  # n, 3
+    size = np.array(size)  # n, 3
+    rotation_mat = np.array(rotation_mat)
+    assert rotation_mat.shape == (
+        3,
+        3,
+    ), f'R should be shape (3,3), but got {rotation_mat.shape}'
+    pcd_local = (points - center) @ rotation_mat  # n, 3
+    pcd_local = pcd_local / size * 2.0  # scale to [-1, 1] # n, 3
+    pcd_local = abs(pcd_local)
+    return ((pcd_local[:, 0] <= 1)
+            & (pcd_local[:, 1] <= 1)
+            & (pcd_local[:, 2] <= 1))
+def normalize_box(scene_pcd, embodied_scan_bbox):
+    """Find the smallest 6 DoF box that covers these points which 9 DoF box
+    covers.
+    Args:
+        scene_pcd (Tensor / ndarray):
+             (..., 3)
+        embodied_scan_bbox (Tensor / ndarray):
+             (9,) 9 DoF box
+    Returns:
+        Tensor: Transformed 3D box of shape (N, 8, 3).
+    """
+    bbox = np.array(embodied_scan_bbox)
+    orientation = euler_to_matrix_np(bbox[np.newaxis, 6:])[0]
+    position = np.array(bbox[:3])
+    size = np.array(bbox[3:6])
+    obj_mask = np.array(
+        is_inside_box(scene_pcd[:, :3], position, size, orientation),
+        dtype=bool,
+    )
+    obj_pc = scene_pcd[obj_mask]
+    # resume the same if there's None
+    if obj_pc.shape[0] < 1:
+        return embodied_scan_bbox[:6]
+    xmin = np.min(obj_pc[:, 0])
+    ymin = np.min(obj_pc[:, 1])
+    zmin = np.min(obj_pc[:, 2])
+    xmax = np.max(obj_pc[:, 0])
+    ymax = np.max(obj_pc[:, 1])
+    zmax = np.max(obj_pc[:, 2])
+    bbox = np.array([
+        (xmin + xmax) / 2,
+        (ymin + ymax) / 2,
+        (zmin + zmax) / 2,
+        xmax - xmin,
+        ymax - ymin,
+        zmax - zmin,
+    ])
+    return bbox
+def from_9dof_to_6dof(pcd_data, bbox_):
+    # that's a kind of loss of information, so we don't recommend
+    return normalize_box(pcd_data, bbox_)
+def bbox_to_corners(centers, sizes, rot_mat: torch.Tensor) -> torch.Tensor:
+    """Transform bbox parameters to the 8 corners.
+    Args:
+        bbox (Tensor): 3D box of shape (N, 6) or (N, 7) or (N, 9).
+    Returns:
+        Tensor: Transformed 3D box of shape (N, 8, 3).
+    """
+    device = centers.device
+    use_batch = False
+    if len(centers.shape) == 3:
+        use_batch = True
+        batch_size, n_proposals = centers.shape[0], centers.shape[1]
+        centers = centers.reshape(-1, 3)
+        sizes = sizes.reshape(-1, 3)
+        rot_mat = rot_mat.reshape(-1, 3, 3)
+    n_box = centers.shape[0]
+    if use_batch:
+        assert n_box == batch_size * n_proposals
+    centers = centers.unsqueeze(1).repeat(1, 8, 1)  # shape (N, 8, 3)
+    half_sizes = sizes.unsqueeze(1).repeat(1, 8, 1) / 2  # shape (N, 8, 3)
+    eight_corners_x = (torch.tensor([1, 1, 1, 1, -1, -1, -1, -1],
+                                    device=device).unsqueeze(0).repeat(
+                                        n_box, 1))  # shape (N, 8)
+    eight_corners_y = (torch.tensor([1, 1, -1, -1, 1, 1, -1, -1],
+                                    device=device).unsqueeze(0).repeat(
+                                        n_box, 1))  # shape (N, 8)
+    eight_corners_z = (torch.tensor([1, -1, -1, 1, 1, -1, -1, 1],
+                                    device=device).unsqueeze(0).repeat(
+                                        n_box, 1))  # shape (N, 8)
+    eight_corners = torch.stack(
+        (eight_corners_x, eight_corners_y, eight_corners_z),
+        dim=-1)  # shape (N, 8, 3)
+    eight_corners = eight_corners * half_sizes  # shape (N, 8, 3)
+    # rot_mat: (N, 3, 3), eight_corners: (N, 8, 3)
+    rotated_corners = torch.matmul(eight_corners,
+                                   rot_mat.transpose(1, 2))  # shape (N, 8, 3)
+    res = centers + rotated_corners
+    if use_batch:
+        res = res.reshape(batch_size, n_proposals, 8, 3)
+    return res
+def euler_iou3d_corners(boxes1, boxes2):
+    rows = boxes1.shape[0]
+    cols = boxes2.shape[0]
+    if rows * cols == 0:
+        return boxes1.new(rows, cols)
+    _, iou3d = box3d_overlap(boxes1, boxes2)
+    return iou3d
+def euler_iou3d_bbox(center1, size1, rot1, center2, size2, rot2):
+    """Calculate the 3D IoU between two grounps of 9DOF bounding boxes.
+    Args:
+        center1 (Tensor): (n, cx, cy, cz) of grounp1.
+        size1 (Tensor): (n, l, w, h) of grounp1.
+        rot1 (Tensor): rot matrix of grounp1.
+        center1 (Tensor): (m, cx, cy, cz) of grounp2.
+        size1 (Tensor): (m, l, w, h) of grounp2.
+        rot1 (Tensor): rot matrix of grounp2.
+    Returns:
+        numpy.ndarray: (n, m) the 3D IoU.
+    """
+    if torch.cuda.is_available():
+        center1 = center1.cuda()
+        size1 = size1.cuda()
+        rot1 = rot1.cuda()
+        center2 = center2.cuda()
+        size2 = size2.cuda()
+        rot2 = rot2.cuda()
+    corners1 = bbox_to_corners(center1, size1, rot1)
+    corners2 = bbox_to_corners(center2, size2, rot2)
+    result = euler_iou3d_corners(corners1, corners2)
+    if torch.cuda.is_available():
+        result = result.detach().cpu()
+    return result.numpy()
+def index_box(boxes: List[torch.tensor],
+              indices: Union[List[torch.tensor], torch.tensor])\
+            -> Union[List[torch.tensor], torch.tensor]:
+    """Convert a grounp of bounding boxes represented in [center, size, rot]
+    format to 9 DoF format.
+    Args:
+        box (list/tuple, tensor): boxes in a grounp.
+    Returns:
+        Tensor : 9 DoF format. (num,9)
+    """
+    if isinstance(boxes, (list, tuple)):
+        return [index_box(box, indices) for box in boxes]
+    else:
+        return boxes[indices]
+def to_9dof_box(box: List[torch.tensor]):
+    """Convert a grounp of bounding boxes represented in [center, size, rot]
+    format to 9 DoF format.
+    Args:
+        box (list/tuple, tensor): boxes in a grounp.
+    Returns:
+        Tensor : 9 DoF format. (num,9)
+    """
+    return EulerInstance3DBoxes(box, origin=(0.5, 0.5, 0.5))

test_annotations_mmscan.json ADDED Viewed

The diff for this file is too large to render. See raw diff

vg_evaluator.py ADDED Viewed

	@@ -0,0 +1,361 @@

+from typing import List, Tuple
+import numpy as np
+import torch
+from tqdm import tqdm
+from mmscan_utils.box_metric import (get_average_precision,
+                                                 get_general_topk_scores,
+                                                 subset_get_average_precision)
+from mmscan_utils.box_utils import index_box, to_9dof_box
+class VisualGroundingEvaluator:
+    """Evaluator for MMScan Visual Grounding benchmark. The evaluation metric
+    includes "AP","AP_C","AR","gTop-k".
+    Attributes:
+        save_buffer(list[dict]): Save the buffer of Inputs.
+        records(list[dict]): Metric results for each sample
+        category_records(dict): Metric results for each category
+            (average of all samples with the same category)
+    Args:
+        show_results(bool): Whether to print the evaluation results.
+            Defaults to True.
+    """
+    def __init__(self, show_results: bool = True) -> None:
+        self.show_results = show_results
+        self.eval_metric_type = ['AP', 'AR']
+        self.top_k_visible = [1, 3, 5]
+        self.call_for_category_mode = True
+        for top_k in [1, 3, 5, 10]:
+            self.eval_metric_type.append(f'gTop-{top_k}')
+        self.iou_thresholds = [0.25, 0.50]
+        self.eval_metric = []
+        for iou_thr in self.iou_thresholds:
+            for eval_type in self.eval_metric_type:
+                self.eval_metric.append(eval_type + '@' + str(iou_thr))
+        self.reset()
+    def reset(self) -> None:
+        """Reset the evaluator, clear the buffer and records."""
+        self.save_buffer = []
+        self.records = []
+        self.category_records = {}
+    def update(self, raw_batch_input: List[dict]) -> None:
+        """Update a batch of results to the buffer.
+        Args:
+            raw_batch_input (list[dict]):
+                Batch of the raw original input.
+        """
+        self.__check_format__(raw_batch_input)
+        self.save_buffer.extend(raw_batch_input)
+    def start_evaluation(self) -> dict:
+        """This function is used to start the evaluation process.
+        It will iterate over the saved buffer and evaluate each item.
+        Returns:
+             category_records(dict): Metric results per category.
+        """
+        category_collect = {}
+        for data_item in tqdm(self.save_buffer):
+            metric_for_single = {}
+            # (1) len(gt)==0 : skip
+            if self.__is_zero__(data_item['gt_bboxes']):
+                continue
+            # (2) len(pred)==0 : model's wrong
+            if self.__is_zero__(data_item['pred_bboxes']):
+                for iou_thr in self.iou_thresholds:
+                    metric_for_single[f'AP@{iou_thr}'] = 0
+                    metric_for_single[f'AR@{iou_thr}'] = 0
+                    for topk in [1, 3, 5, 10]:
+                        metric_for_single[f'gTop-{topk}@{iou_thr}'] = 0
+                data_item['num_gts'] = len(data_item['gt_bboxes'])
+                data_item.update(metric_for_single)
+                self.records.append(data_item)
+                continue
+            iou_array, pred_score = self.__calculate_iou_array_(data_item)
+            if self.call_for_category_mode:
+                category = self.__category_mapping__(data_item['subclass'])
+                if category not in category_collect.keys():
+                    category_collect[category] = {
+                        'ious': [],
+                        'scores': [],
+                        'sample_indices': [],
+                        'cnt': 0,
+                    }
+                category_collect[category]['ious'].extend(iou_array)
+                category_collect[category]['scores'].extend(pred_score)
+                category_collect[category]['sample_indices'].extend(
+                    [data_item['index']] * len(iou_array))
+                category_collect[category]['cnt'] += 1
+            for iou_thr in self.iou_thresholds:
+                # AP/AR metric
+                AP, AR = get_average_precision(iou_array, iou_thr)
+                metric_for_single[f'AP@{iou_thr}'] = AP
+                metric_for_single[f'AR@{iou_thr}'] = AR
+                # topk metric
+                metric_for_single.update(
+                    get_general_topk_scores(iou_array, iou_thr))
+            data_item['num_gts'] = iou_array.shape[1]
+            data_item.update(metric_for_single)
+            self.records.append(data_item)
+        self.collect_result()
+        if self.call_for_category_mode:
+            for iou_thr in self.iou_thresholds:
+                self.category_records['overall'][f'AP_C@{iou_thr}'] = 0
+                self.category_records['overall'][f'AR_C@{iou_thr}'] = 0
+                for category in category_collect:
+                    AP_C, AR_C = subset_get_average_precision(
+                        category_collect[category], iou_thr)
+                    self.category_records[category][f'AP_C@{iou_thr}'] = AP_C
+                    self.category_records[category][f'AR_C@{iou_thr}'] = AR_C
+                    self.category_records['overall'][f'AP_C@{iou_thr}'] += (
+                        AP_C * category_collect[category]['cnt'] /
+                        len(self.records))
+                    self.category_records['overall'][f'AR_C@{iou_thr}'] += (
+                        AR_C * category_collect[category]['cnt'] /
+                        len(self.records))
+        return self.category_records
+    def collect_result(self) -> dict:
+        """Collect the result from the evaluation process.
+        Stores them based on their subclass.
+        Returns:
+             category_results(dict): Average results per category.
+        """
+        category_results = {}
+        category_results['overall'] = {}
+        for metric_name in self.eval_metric:
+            category_results['overall'][metric_name] = []
+            category_results['overall']['num_gts'] = 0
+        for data_item in self.records:
+            category = self.__category_mapping__(data_item['subclass'])
+            if category not in category_results:
+                category_results[category] = {}
+                for metric_name in self.eval_metric:
+                    category_results[category][metric_name] = []
+                    category_results[category]['num_gts'] = 0
+            for metric_name in self.eval_metric:
+                for metric_name in self.eval_metric:
+                    category_results[category][metric_name].append(
+                        data_item[metric_name])
+                    category_results['overall'][metric_name].append(
+                        data_item[metric_name])
+            category_results['overall']['num_gts'] += data_item['num_gts']
+            category_results[category]['num_gts'] += data_item['num_gts']
+        for category in category_results:
+            for metric_name in self.eval_metric:
+                category_results[category][metric_name] = np.mean(
+                    category_results[category][metric_name])
+        self.category_records = category_results
+        return category_results
+    def print_result(self) -> str:
+        """Showing the result table.
+        Returns:
+            table(str): The metric result table.
+        """
+        assert len(self.category_records) > 0, 'No result yet.'
+        self.category_records = {
+            key: self.category_records[key]
+            for key in sorted(self.category_records.keys(), reverse=True)
+        }
+        header = ['Type']
+        header.extend(self.category_records.keys())
+        table_columns = [[] for _ in range(len(header))]
+        # some metrics
+        for iou_thr in self.iou_thresholds:
+            show_in_table = (['AP', 'AR'] +
+                             [f'gTop-{k}' for k in self.top_k_visible]
+                             if not self.call_for_category_mode else
+                             ['AP', 'AR', 'AP_C', 'AR_C'] +
+                             [f'gTop-{k}' for k in self.top_k_visible])
+            for metric_type in show_in_table:
+                table_columns[0].append(metric_type + ' ' + str(iou_thr))
+            for i, category in enumerate(self.category_records.keys()):
+                ap = self.category_records[category][f'AP@{iou_thr}']
+                ar = self.category_records[category][f'AR@{iou_thr}']
+                table_columns[i + 1].append(f'{float(ap):.4f}')
+                table_columns[i + 1].append(f'{float(ar):.4f}')
+                ap = self.category_records[category][f'AP_C@{iou_thr}']
+                ar = self.category_records[category][f'AR_C@{iou_thr}']
+                table_columns[i + 1].append(f'{float(ap):.4f}')
+                table_columns[i + 1].append(f'{float(ar):.4f}')
+                for k in self.top_k_visible:
+                    top_k = self.category_records[category][
+                        f'gTop-{k}@{iou_thr}']
+                    table_columns[i + 1].append(f'{float(top_k):.4f}')
+        # Number of gts
+        table_columns[0].append('Num GT')
+        for i, category in enumerate(self.category_records.keys()):
+            table_columns[i + 1].append(
+                f'{int(self.category_records[category]["num_gts"])}')
+        table_data = [header]
+        table_rows = list(zip(*table_columns))
+        table_data += table_rows
+        table_data = [list(row) for row in zip(*table_data)]
+        return table_data
+    def __category_mapping__(self, sub_class: str) -> str:
+        """Mapping the subclass name to the category name.
+        Args:
+            sub_class (str): The subclass name in the original samples.
+        Returns:
+            category (str): The category name.
+        """
+        sub_class = sub_class.lower()
+        sub_class = sub_class.replace('single', 'sngl')
+        sub_class = sub_class.replace('inter', 'int')
+        sub_class = sub_class.replace('unique', 'uniq')
+        sub_class = sub_class.replace('common', 'cmn')
+        sub_class = sub_class.replace('attribute', 'attr')
+        if 'sngl' in sub_class and ('attr' in sub_class or 'eq' in sub_class):
+            sub_class = 'vg_sngl_attr'
+        return sub_class
+    def __calculate_iou_array_(
+            self, data_item: dict) -> Tuple[np.ndarray, np.ndarray]:
+        """Calculate some information needed for eavl.
+        Args:
+             data_item (dict): The subclass name in the original samples.
+        Returns:
+             np.ndarray, np.ndarray :
+                The iou array sorted by the confidence and the
+                confidence scores.
+        """
+        pred_bboxes = data_item['pred_bboxes']
+        gt_bboxes = data_item['gt_bboxes']
+        # Sort the bounding boxes based on their scores
+        pred_scores = data_item['pred_scores']
+        top_idxs = torch.argsort(pred_scores, descending=True)
+        pred_scores = pred_scores[top_idxs]
+        pred_bboxes = to_9dof_box(index_box(pred_bboxes, top_idxs))
+        gt_bboxes = to_9dof_box(gt_bboxes)
+        iou_matrix = pred_bboxes.overlaps(pred_bboxes,
+                                          gt_bboxes)  # (num_query, num_gt)
+        # (3) calculate the TP and NP,
+        # preparing for the forward AP/topk calculation
+        pred_scores = pred_scores.cpu().numpy()
+        iou_array = iou_matrix.cpu().numpy()
+        return iou_array, pred_scores
+    def __is_zero__(self, box):
+        if isinstance(box, (list, tuple)):
+            return (len(box[0]) == 0)
+        return (len(box) == 0)
+    def __check_format__(self, raw_input: List[dict]) -> None:
+        """Check if the input conform with mmscan evaluation format. Transform
+        the input box format.
+        Args:
+            raw_input (list[dict]): The input of VG evaluator.
+        """
+        assert isinstance(
+            raw_input,
+            list), 'The input of VG evaluator should be a list of dict. '
+        raw_input = raw_input
+        for _index in tqdm(range(len(raw_input))):
+            if 'index' not in raw_input[_index]:
+                raw_input[_index]['index'] = len(self.save_buffer) + _index
+            if 'subclass' not in raw_input[_index]:
+                raw_input[_index]['subclass'] = 'non-class'
+            assert 'gt_bboxes' in raw_input[_index]
+            assert 'pred_bboxes' in raw_input[_index]
+            assert 'pred_scores' in raw_input[_index]
+            for mode in ['pred_bboxes', 'gt_bboxes']:
+                if (isinstance(raw_input[_index][mode], dict)
+                        and 'center' in raw_input[_index][mode]):
+                    raw_input[_index][mode] = [
+                        torch.tensor(raw_input[_index][mode]['center']),
+                        torch.tensor(raw_input[_index][mode]['size']).to(
+                            torch.float32),
+                        torch.tensor(raw_input[_index][mode]['rot']).to(
+                            torch.float32)
+                    ]
+def trun_box(box_list):
+    trun_box_list = []
+    for box in box_list:
+        trun_box_list.append([round(x,2) for x in box])
+    return trun_box_list
+def evaluation_for_challenge(gt_data,pred_data):
+    inputs = []
+    for sample_ID in gt_data:
+        batch_result = {}
+        if sample_ID not in pred_data:
+            batch_result["pred_scores"] = torch.zeros(0,9)
+            batch_result["pred_bboxes"] =  torch.zeros(0,)
+        else:
+            batch_result["pred_scores"] = torch.tensor(pred_data[sample_ID]["score"])
+            batch_result["pred_bboxes"] = torch.tensor(trun_box(pred_data[sample_ID]["pred_bboxes"]))
+        batch_result["gt_bboxes"] = torch.tensor(gt_data[sample_ID])
+        batch_result["subclass"] = sample_ID.split('__')[0]
+        inputs.append(batch_result)
+    vg_evaluator = VisualGroundingEvaluator()
+    vg_evaluator.update(inputs)
+    results = vg_evaluator.start_evaluation()
+    #vg_evaluator.print_result()
+    return results['overall']