rbler commited on
Commit
88ad01d
·
verified ·
1 Parent(s): dc8d315

Upload 7 files

Browse files
app.py CHANGED
@@ -4,8 +4,39 @@ import pandas as pd
4
  import os
5
 
6
  LEADERBOARD_CSV = "leaderboard.csv"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  def evaluate_and_update(pred_file, username):
 
 
9
  score = run_evaluation(pred_file.name)
10
  if os.path.exists(LEADERBOARD_CSV):
11
  df = pd.read_csv(LEADERBOARD_CSV)
@@ -19,7 +50,7 @@ def evaluate_and_update(pred_file, username):
19
  with gr.Blocks() as demo:
20
  gr.Markdown("# 🧊 3D IoU Challenge")
21
  name = gr.Textbox(label="Username")
22
- upload = gr.File(label="Upload your prediction (.npy)")
23
  score_text = gr.Textbox(label="Evaluation score")
24
  leaderboard = gr.Dataframe(headers=["Name", "Score"], interactive=False)
25
 
 
4
  import os
5
 
6
  LEADERBOARD_CSV = "leaderboard.csv"
7
+ import pandas as pd
8
+ import os
9
+ from datetime import datetime, date
10
+
11
+ SUBMIT_RECORD = "submissions.csv"
12
+ MAX_SUBMIT_PER_DAY = 2
13
+
14
+ def check_submission_limit(username):
15
+ if not os.path.exists(SUBMIT_RECORD):
16
+ return True # 没有人提交过
17
+
18
+ df = pd.read_csv(SUBMIT_RECORD)
19
+ today = date.today()
20
+
21
+ user_today_subs = df[
22
+ (df["username"] == username) &
23
+ (pd.to_datetime(df["timestamp"]).dt.date == today)
24
+ ]
25
+
26
+ return len(user_today_subs) < MAX_SUBMIT_PER_DAY
27
+
28
+ def record_submission(username):
29
+ now = datetime.now().isoformat()
30
+ if os.path.exists(SUBMIT_RECORD):
31
+ df = pd.read_csv(SUBMIT_RECORD)
32
+ else:
33
+ df = pd.DataFrame(columns=["username", "timestamp"])
34
+ df.loc[len(df)] = {"username": username, "timestamp": now}
35
+ df.to_csv(SUBMIT_RECORD, index=False)
36
 
37
  def evaluate_and_update(pred_file, username):
38
+ if not check_submission_limit(username):
39
+ return "⛔ Submission limit exceeded for today.", pd.read_csv(LEADERBOARD_CSV)
40
  score = run_evaluation(pred_file.name)
41
  if os.path.exists(LEADERBOARD_CSV):
42
  df = pd.read_csv(LEADERBOARD_CSV)
 
50
  with gr.Blocks() as demo:
51
  gr.Markdown("# 🧊 3D IoU Challenge")
52
  name = gr.Textbox(label="Username")
53
+ upload = gr.File(label="Upload your prediction (.json)")
54
  score_text = gr.Textbox(label="Evaluation score")
55
  leaderboard = gr.Dataframe(headers=["Name", "Score"], interactive=False)
56
 
evaluate.py CHANGED
@@ -1,16 +1,11 @@
1
- from datasets import load_dataset
2
  import numpy as np
3
- import torch
4
- from pytorch3d.ops import box3d_overlap
5
 
6
  def run_evaluation(pred_path):
7
- pred_boxes = np.load(pred_path)[:, :7]
8
- pred_boxes = torch.tensor(pred_boxes).float().unsqueeze(0)
 
 
9
 
10
- dataset = load_dataset("yourname/3d-iou-challenge-data", split="test")
11
- gt_boxes = torch.tensor(dataset[0]["boxes"]).float().unsqueeze(0)
12
-
13
- iou_matrix, _ = box3d_overlap(pred_boxes, gt_boxes)
14
- iou = iou_matrix.diagonal(dim1=1, dim2=2).mean()
15
-
16
- return float(iou)
 
 
1
  import numpy as np
2
+ import json
3
+ from vg_evaluator import evaluation_for_challenge
4
 
5
  def run_evaluation(pred_path):
6
+ pred_ = json.load(open(pred_path))
7
+ gt_ = json.load(open('test_annotations_mmscan.json'))
8
+ results = evaluation_for_challenge(gt_,pred_)
9
+
10
 
11
+ return results['gTop-[email protected]']
 
 
 
 
 
 
leaderboard.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,score
2
+ aaa,0.1947003033781529
3
+ eee,0.0
4
+ sss,0.0
5
+ sss,0.0
6
+ sss,0.0
7
+ sss,0.0
8
+ sss,0.0
9
+ sss,0.0
10
+ sss,0.0
11
+ bbb,0.0
12
+ aaa,0.0
mmscan_utils/box_metric.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Tuple, Union
2
+
3
+ import numpy as np
4
+ import torch
5
+ from scipy.optimize import linear_sum_assignment
6
+
7
+
8
+ def average_precision(recalls: np.ndarray,
9
+ precisions: np.ndarray,
10
+ mode: str = 'area') -> np.ndarray:
11
+ """Calculate average precision (for single or multiple scales).
12
+
13
+ Args:
14
+ recalls (np.ndarray): Recalls with shape of (num_scales, num_dets)
15
+ or (num_dets, ).
16
+ precisions (np.ndarray): Precisions with shape of
17
+ (num_scales, num_dets) or (num_dets, ).
18
+ mode (str): 'area' or '11points', 'area' means calculating the area
19
+ under precision-recall curve, '11points' means calculating
20
+ the average precision of recalls at [0, 0.1, ..., 1]
21
+ Defaults to 'area'.
22
+
23
+ Returns:
24
+ np.ndarray: Calculated average precision.
25
+ """
26
+ if recalls.ndim == 1:
27
+ recalls = recalls[np.newaxis, :]
28
+ precisions = precisions[np.newaxis, :]
29
+
30
+ assert recalls.shape == precisions.shape
31
+ assert recalls.ndim == 2
32
+
33
+ num_scales = recalls.shape[0]
34
+ ap = np.zeros(num_scales, dtype=np.float32)
35
+
36
+ if mode == 'area':
37
+ zeros = np.zeros((num_scales, 1), dtype=recalls.dtype)
38
+ ones = np.ones((num_scales, 1), dtype=recalls.dtype)
39
+ mrec = np.hstack((zeros, recalls, ones))
40
+ mpre = np.hstack((zeros, precisions, zeros))
41
+ for i in range(mpre.shape[1] - 1, 0, -1):
42
+ mpre[:, i - 1] = np.maximum(mpre[:, i - 1], mpre[:, i])
43
+ for i in range(num_scales):
44
+ ind = np.where(mrec[i, 1:] != mrec[i, :-1])[0]
45
+ ap[i] = np.sum(
46
+ (mrec[i, ind + 1] - mrec[i, ind]) * mpre[i, ind + 1])
47
+
48
+ elif mode == '11points':
49
+ for i in range(num_scales):
50
+ for thr in np.arange(0, 1 + 1e-3, 0.1):
51
+ precs = precisions[i, recalls[i, :] >= thr]
52
+ prec = precs.max() if precs.size > 0 else 0
53
+ ap[i] += prec
54
+ ap /= 11
55
+ else:
56
+ raise ValueError(
57
+ 'Unrecognized mode, only "area" and "11points" are supported')
58
+ return ap
59
+
60
+
61
+ def get_f1_scores(iou_matrix: Union[np.ndarray, torch.tensor],
62
+ iou_threshold) -> float:
63
+ """Refer to the algorithm in Multi3DRefer to compute the F1 score.
64
+
65
+ Args:
66
+ iou_matrix (ndarray/tensor):
67
+ The iou matrix of the predictions and ground truths with
68
+ shape (num_preds , num_gts)
69
+ iou_threshold (float): 0.25/0.5
70
+
71
+ Returns:
72
+ float: the f1 score as the result
73
+ """
74
+ iou_thr_tp = 0
75
+ pred_bboxes_count, gt_bboxes_count = iou_matrix.shape
76
+
77
+ square_matrix_len = max(gt_bboxes_count, pred_bboxes_count)
78
+ iou_matrix_fill = np.zeros(shape=(square_matrix_len, square_matrix_len),
79
+ dtype=np.float32)
80
+ iou_matrix_fill[:pred_bboxes_count, :gt_bboxes_count] = iou_matrix
81
+
82
+ # apply matching algorithm
83
+ row_idx, col_idx = linear_sum_assignment(iou_matrix_fill * -1)
84
+
85
+ # iterate matched pairs, check ious
86
+ for i in range(pred_bboxes_count):
87
+ iou = iou_matrix[row_idx[i], col_idx[i]]
88
+ # calculate true positives
89
+ if iou >= iou_threshold:
90
+ iou_thr_tp += 1
91
+
92
+ # calculate precision, recall and f1-score for the current scene
93
+ f1_score = 2 * iou_thr_tp / (pred_bboxes_count + gt_bboxes_count)
94
+
95
+ return f1_score
96
+
97
+
98
+ def __get_fp_tp_array__(iou_array: Union[np.ndarray, torch.tensor],
99
+ iou_threshold: float) \
100
+ -> Tuple[np.ndarray, np.ndarray]:
101
+ """Compute the False-positive and True-positive array for each prediction.
102
+
103
+ Args:
104
+ iou_array (ndarray/tensor):
105
+ the iou matrix of the predictions and ground truths
106
+ (shape num_preds, num_gts)
107
+ iou_threshold (float): 0.25/0.5
108
+
109
+ Returns:
110
+ np.ndarray, np.ndarray: (len(preds)),
111
+ the false-positive and true-positive array for each prediction.
112
+ """
113
+ gt_matched_records = np.zeros((len(iou_array[0])), dtype=bool)
114
+ tp_thr = np.zeros((len(iou_array)))
115
+ fp_thr = np.zeros((len(iou_array)))
116
+
117
+ for d, _ in enumerate(range(len(iou_array))):
118
+ iou_max = -np.inf
119
+ cur_iou = iou_array[d]
120
+ num_gts = cur_iou.shape[0]
121
+
122
+ if num_gts > 0:
123
+ for j in range(num_gts):
124
+ iou = cur_iou[j]
125
+ if iou > iou_max:
126
+ iou_max = iou
127
+ jmax = j
128
+
129
+ if iou_max >= iou_threshold:
130
+ if not gt_matched_records[jmax]:
131
+ gt_matched_records[jmax] = True
132
+ tp_thr[d] = 1.0
133
+ else:
134
+ fp_thr[d] = 1.0
135
+ else:
136
+ fp_thr[d] = 1.0
137
+
138
+ return fp_thr, tp_thr
139
+
140
+
141
+ def subset_get_average_precision(subset_results: dict,
142
+ iou_thr: float)\
143
+ -> Tuple[np.ndarray, np.ndarray]:
144
+ """Return the average precision and max recall for a given iou array,
145
+ "subset" version while the num_gt of each sample may differ.
146
+
147
+ Args:
148
+ subset_results (dict):
149
+ The results, consisting of scores, sample_indices, ious.
150
+ sample_indices means which sample the prediction belongs to.
151
+ iou_threshold (float): 0.25/0.5
152
+
153
+ Returns:
154
+ Tuple[np.ndarray, np.ndarray]: the average precision and max recall.
155
+ """
156
+ confidences = subset_results['scores']
157
+ sample_indices = subset_results['sample_indices']
158
+ ious = subset_results['ious']
159
+ gt_matched_records = {}
160
+ total_gt_boxes = 0
161
+ for i, sample_idx in enumerate(sample_indices):
162
+ if sample_idx not in gt_matched_records:
163
+ gt_matched_records[sample_idx] = np.zeros((len(ious[i]), ),
164
+ dtype=bool)
165
+ total_gt_boxes += ious[i].shape[0]
166
+
167
+ confidences = np.array(confidences)
168
+ sorted_inds = np.argsort(-confidences)
169
+ sample_indices = [sample_indices[i] for i in sorted_inds]
170
+ ious = [ious[i] for i in sorted_inds]
171
+
172
+ tp_thr = np.zeros(len(sample_indices))
173
+ fp_thr = np.zeros(len(sample_indices))
174
+
175
+ for d, sample_idx in enumerate(sample_indices):
176
+ iou_max = -np.inf
177
+ cur_iou = ious[d]
178
+ num_gts = cur_iou.shape[0]
179
+ if num_gts > 0:
180
+ for j in range(num_gts):
181
+ iou = cur_iou[j]
182
+ if iou > iou_max:
183
+ iou_max = iou
184
+ jmax = j
185
+
186
+ if iou_max >= iou_thr:
187
+ if not gt_matched_records[sample_idx][jmax]:
188
+ gt_matched_records[sample_idx][jmax] = True
189
+ tp_thr[d] = 1.0
190
+ else:
191
+ fp_thr[d] = 1.0
192
+ else:
193
+ fp_thr[d] = 1.0
194
+
195
+ fp = np.cumsum(fp_thr)
196
+ tp = np.cumsum(tp_thr)
197
+ recall = tp / float(total_gt_boxes)
198
+ precision = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
199
+
200
+ return average_precision(recall, precision), np.max(recall)
201
+
202
+
203
+ def get_average_precision(iou_array: np.ndarray, iou_threshold: float) \
204
+ -> Tuple[np.ndarray, np.ndarray]:
205
+ """Return the average precision and max recall for a given iou array.
206
+
207
+ Args:
208
+ iou_array (ndarray/tensor):
209
+ The iou matrix of the predictions and ground truths
210
+ (shape len(preds)*len(gts))
211
+ iou_threshold (float): 0.25/0.5
212
+
213
+ Returns:
214
+ Tuple[np.ndarray, np.ndarray]: the average precision and max recall.
215
+ """
216
+ fp, tp = __get_fp_tp_array__(iou_array, iou_threshold)
217
+ fp_cum = np.cumsum(fp)
218
+ tp_cum = np.cumsum(tp)
219
+ recall = tp_cum / float(iou_array.shape[1])
220
+ precision = tp_cum / np.maximum(tp_cum + fp_cum, np.finfo(np.float64).eps)
221
+
222
+ return average_precision(recall, precision), np.max(recall)
223
+
224
+
225
+ def get_general_topk_scores(iou_array: Union[np.ndarray, torch.tensor],
226
+ iou_threshold: float,
227
+ mode: str = 'sigma') -> Dict[str, float]:
228
+ """Compute the multi-topk metric, we provide two modes.
229
+
230
+ Args:
231
+ iou_array (ndarray/tensor):
232
+ the iou matrix of the predictions and ground truths
233
+ (shape len(preds)*len(gts))
234
+ iou_threshold (float): 0.25/0.5
235
+ mode (str): 'sigma'/'simple'
236
+ "simple": 1/N * Hit(min(N*k,len(pred)))
237
+ "sigma": 1/N * Sigma [Hit(min(n*k,len(pred)))>=n] n = 1~N
238
+ Hit(M) return the number of gtound truths hitted by
239
+ the first M predictions.
240
+ N = the number of gtound truths
241
+ Default to 'sigma'.
242
+
243
+ Returns:
244
+ Dict[str,float]: the score of multi-topk metric.
245
+ """
246
+
247
+ assert mode in ['sigma', 'simple']
248
+ topk_scores = []
249
+ gt_matched_records = np.zeros(len(iou_array[0]))
250
+ num_gt = len(gt_matched_records)
251
+ for d, _ in enumerate(range(len(iou_array))):
252
+ iou_max = -np.inf
253
+ cur_iou = iou_array[d]
254
+
255
+ for j in range(len(iou_array[d])):
256
+ iou = cur_iou[j]
257
+ if iou > iou_max:
258
+ iou_max = iou
259
+ j_max = j
260
+ if iou_max >= iou_threshold:
261
+ gt_matched_records[j_max] = True
262
+ topk_scores.append(gt_matched_records.copy())
263
+
264
+ topk_results = {}
265
+ for topk in [1, 3, 5, 10]:
266
+ if mode == 'sigma':
267
+ scores = [
268
+ int(
269
+ np.sum(topk_scores[min(n * topk, len(topk_scores)) -
270
+ 1]) >= n) for n in range(1, num_gt + 1)
271
+ ]
272
+ result = np.sum(scores) / num_gt
273
+ else:
274
+ query_index = min(num_gt * topk, len(topk_scores)) - 1
275
+ result = np.sum(topk_scores[query_index]) / num_gt
276
+ topk_results[f'gTop-{topk}@{iou_threshold}'] = result
277
+ return topk_results
mmscan_utils/box_utils.py ADDED
@@ -0,0 +1,1079 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import abstractmethod
2
+ from typing import List,Iterator, Optional, Sequence, Tuple, Union
3
+
4
+ import numpy as np
5
+ import torch
6
+
7
+ try:
8
+ from pytorch3d.ops import box3d_overlap
9
+ from pytorch3d.transforms import (euler_angles_to_matrix,
10
+ matrix_to_euler_angles)
11
+ except ImportError:
12
+ box3d_overlap = None
13
+ euler_angles_to_matrix = None
14
+ matrix_to_euler_angles = None
15
+ from torch import Tensor
16
+
17
+
18
+ class BaseInstance3DBoxes:
19
+ """Base class for 3D Boxes.
20
+
21
+ Note:
22
+ The box is bottom centered, i.e. the relative position of origin in the
23
+ box is (0.5, 0.5, 0).
24
+
25
+ Args:
26
+ tensor (Tensor or np.ndarray or Sequence[Sequence[float]]): The boxes
27
+ data with shape (N, box_dim).
28
+ box_dim (int): Number of the dimension of a box. Each row is
29
+ (x, y, z, x_size, y_size, z_size, yaw). Defaults to 7.
30
+ with_yaw (bool): Whether the box is with yaw rotation. If False, the
31
+ value of yaw will be set to 0 as minmax boxes. Defaults to True.
32
+ origin (Tuple[float]): Relative position of the box origin.
33
+ Defaults to (0.5, 0.5, 0). This will guide the box be converted to
34
+ (0.5, 0.5, 0) mode.
35
+
36
+ Attributes:
37
+ tensor (Tensor): Float matrix with shape (N, box_dim).
38
+ box_dim (int): Integer indicating the dimension of a box. Each row is
39
+ (x, y, z, x_size, y_size, z_size, yaw, ...).
40
+ with_yaw (bool): If True, the value of yaw will be set to 0 as minmax
41
+ boxes.
42
+ """
43
+
44
+ YAW_AXIS: int = 0
45
+
46
+ def __init__(
47
+ self,
48
+ tensor: Union[Tensor, np.ndarray, Sequence[Sequence[float]]],
49
+ box_dim: int = 7,
50
+ with_yaw: bool = True,
51
+ origin: Tuple[float, float, float] = (0.5, 0.5, 0)
52
+ ) -> None:
53
+ if isinstance(tensor, Tensor):
54
+ device = tensor.device
55
+ else:
56
+ device = torch.device('cpu')
57
+ tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
58
+ if tensor.numel() == 0:
59
+ # Use reshape, so we don't end up creating a new tensor that does
60
+ # not depend on the inputs (and consequently confuses jit)
61
+ tensor = tensor.reshape((-1, box_dim))
62
+ assert tensor.dim() == 2 and tensor.size(-1) == box_dim, \
63
+ ('The box dimension must be 2 and the length of the last '
64
+ f'dimension must be {box_dim}, but got boxes with shape '
65
+ f'{tensor.shape}.')
66
+
67
+ if tensor.shape[-1] == 6:
68
+ # If the dimension of boxes is 6, we expand box_dim by padding 0 as
69
+ # a fake yaw and set with_yaw to False
70
+ assert box_dim == 6
71
+ fake_rot = tensor.new_zeros(tensor.shape[0], 1)
72
+ tensor = torch.cat((tensor, fake_rot), dim=-1)
73
+ self.box_dim = box_dim + 1
74
+ self.with_yaw = False
75
+ else:
76
+ self.box_dim = box_dim
77
+ self.with_yaw = with_yaw
78
+ self.tensor = tensor.clone()
79
+
80
+ if origin != (0.5, 0.5, 0):
81
+ dst = self.tensor.new_tensor((0.5, 0.5, 0))
82
+ src = self.tensor.new_tensor(origin)
83
+ self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)
84
+
85
+ @property
86
+ def shape(self) -> torch.Size:
87
+ """torch.Size: Shape of boxes."""
88
+ return self.tensor.shape
89
+
90
+ @property
91
+ def volume(self) -> Tensor:
92
+ """Tensor: A vector with volume of each box in shape (N, )."""
93
+ return self.tensor[:, 3] * self.tensor[:, 4] * self.tensor[:, 5]
94
+
95
+ @property
96
+ def dims(self) -> Tensor:
97
+ """Tensor: Size dimensions of each box in shape (N, 3)."""
98
+ return self.tensor[:, 3:6]
99
+
100
+ @property
101
+ def yaw(self) -> Tensor:
102
+ """Tensor: A vector with yaw of each box in shape (N, )."""
103
+ return self.tensor[:, 6]
104
+
105
+ @property
106
+ def height(self) -> Tensor:
107
+ """Tensor: A vector with height of each box in shape (N, )."""
108
+ return self.tensor[:, 5]
109
+
110
+ @property
111
+ def top_height(self) -> Tensor:
112
+ """Tensor: A vector with top height of each box in shape (N, )."""
113
+ return self.bottom_height + self.height
114
+
115
+ @property
116
+ def bottom_height(self) -> Tensor:
117
+ """Tensor: A vector with bottom height of each box in shape (N, )."""
118
+ return self.tensor[:, 2]
119
+
120
+ @property
121
+ def center(self) -> Tensor:
122
+ """Calculate the center of all the boxes.
123
+
124
+ Note:
125
+ In MMDetection3D's convention, the bottom center is usually taken
126
+ as the default center.
127
+
128
+ The relative position of the centers in different kinds of boxes
129
+ are different, e.g., the relative center of a boxes is
130
+ (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0) in lidar. It is
131
+ recommended to use ``bottom_center`` or ``gravity_center`` for
132
+ clearer usage.
133
+
134
+ Returns:
135
+ Tensor: A tensor with center of each box in shape (N, 3).
136
+ """
137
+ return self.bottom_center
138
+
139
+ @property
140
+ def bottom_center(self) -> Tensor:
141
+ """Tensor: A tensor with center of each box in shape (N, 3)."""
142
+ return self.tensor[:, :3]
143
+
144
+ @property
145
+ def gravity_center(self) -> Tensor:
146
+ """Tensor: A tensor with center of each box in shape (N, 3)."""
147
+ bottom_center = self.bottom_center
148
+ gravity_center = torch.zeros_like(bottom_center)
149
+ gravity_center[:, :2] = bottom_center[:, :2]
150
+ gravity_center[:, 2] = bottom_center[:, 2] + self.tensor[:, 5] * 0.5
151
+ return gravity_center
152
+
153
+ @property
154
+ def corners(self) -> Tensor:
155
+ """Tensor: A tensor with 8 corners of each box in shape (N, 8, 3)."""
156
+ pass
157
+
158
+ @property
159
+ def bev(self) -> Tensor:
160
+ """Tensor: 2D BEV box of each box with rotation in XYWHR format, in
161
+ shape (N, 5)."""
162
+ return self.tensor[:, [0, 1, 3, 4, 6]]
163
+
164
+ def in_range_bev(
165
+ self, box_range: Union[Tensor, np.ndarray,
166
+ Sequence[float]]) -> Tensor:
167
+ """Check whether the boxes are in the given range.
168
+
169
+ Args:
170
+ box_range (Tensor or np.ndarray or Sequence[float]): The range of
171
+ box in order of (x_min, y_min, x_max, y_max).
172
+
173
+ Note:
174
+ The original implementation of SECOND checks whether boxes in a
175
+ range by checking whether the points are in a convex polygon, we
176
+ reduce the burden for simpler cases.
177
+
178
+ Returns:
179
+ Tensor: A binary vector indicating whether each box is inside the
180
+ reference range.
181
+ """
182
+ in_range_flags = ((self.bev[:, 0] > box_range[0])
183
+ & (self.bev[:, 1] > box_range[1])
184
+ & (self.bev[:, 0] < box_range[2])
185
+ & (self.bev[:, 1] < box_range[3]))
186
+ return in_range_flags
187
+
188
+ @abstractmethod
189
+ def rotate(
190
+ self,
191
+ angle: Union[Tensor, np.ndarray, float],
192
+ points: Optional[Union[Tensor, np.ndarray]] = None
193
+ ) -> Union[Tuple[Tensor, Tensor], Tuple[np.ndarray, np.ndarray],
194
+ Tuple[Tensor], None]:
195
+ """Rotate boxes with points (optional) with the given angle or rotation
196
+ matrix.
197
+
198
+ Args:
199
+ angle (Tensor or np.ndarray or float): Rotation angle or rotation
200
+ matrix.
201
+ points (Tensor or np.ndarray or :obj:``, optional):
202
+ Points to rotate. Defaults to None.
203
+
204
+ Returns:
205
+ tuple or None: When ``points`` is None, the function returns None,
206
+ otherwise it returns the rotated points and the rotation matrix
207
+ ``rot_mat_T``.
208
+ """
209
+ pass
210
+
211
+ @abstractmethod
212
+ def flip(
213
+ self,
214
+ bev_direction: str = 'horizontal',
215
+ points: Optional[Union[Tensor, np.ndarray, ]] = None
216
+ ) -> Union[Tensor, np.ndarray, None]:
217
+ """Flip the boxes in BEV along given BEV direction.
218
+
219
+ Args:
220
+ bev_direction (str): Direction by which to flip. Can be chosen from
221
+ 'horizontal' and 'vertical'. Defaults to 'horizontal'.
222
+ points (Tensor or np.ndarray or :obj:``, optional):
223
+ Points to flip. Defaults to None.
224
+
225
+ Returns:
226
+ Tensor or np.ndarray or :obj:`` or None: When ``points``
227
+ is None, the function returns None, otherwise it returns the
228
+ flipped points.
229
+ """
230
+ pass
231
+
232
+ def translate(self, trans_vector: Union[Tensor, np.ndarray]) -> None:
233
+ """Translate boxes with the given translation vector.
234
+
235
+ Args:
236
+ trans_vector (Tensor or np.ndarray): Translation vector of size
237
+ 1x3.
238
+ """
239
+ if not isinstance(trans_vector, Tensor):
240
+ trans_vector = self.tensor.new_tensor(trans_vector)
241
+ self.tensor[:, :3] += trans_vector
242
+
243
+ def in_range_3d(
244
+ self, box_range: Union[Tensor, np.ndarray,
245
+ Sequence[float]]) -> Tensor:
246
+ """Check whether the boxes are in the given range.
247
+
248
+ Args:
249
+ box_range (Tensor or np.ndarray or Sequence[float]): The range of
250
+ box (x_min, y_min, z_min, x_max, y_max, z_max).
251
+
252
+ Note:
253
+ In the original implementation of SECOND, checking whether a box in
254
+ the range checks whether the points are in a convex polygon, we try
255
+ to reduce the burden for simpler cases.
256
+
257
+ Returns:
258
+ Tensor: A binary vector indicating whether each point is inside the
259
+ reference range.
260
+ """
261
+ in_range_flags = ((self.tensor[:, 0] > box_range[0])
262
+ & (self.tensor[:, 1] > box_range[1])
263
+ & (self.tensor[:, 2] > box_range[2])
264
+ & (self.tensor[:, 0] < box_range[3])
265
+ & (self.tensor[:, 1] < box_range[4])
266
+ & (self.tensor[:, 2] < box_range[5]))
267
+ return in_range_flags
268
+
269
+ @abstractmethod
270
+ def convert_to(self,
271
+ dst: int,
272
+ rt_mat: Optional[Union[Tensor, np.ndarray]] = None,
273
+ correct_yaw: bool = False) -> 'BaseInstance3DBoxes':
274
+ """Convert self to ``dst`` mode.
275
+
276
+ Args:
277
+ dst (int): The target Box mode.
278
+ rt_mat (Tensor or np.ndarray, optional): The rotation and
279
+ translation matrix between different coordinates.
280
+ Defaults to None. The conversion from ``src`` coordinates to
281
+ ``dst`` coordinates usually comes along the change of sensors,
282
+ e.g., from camera to LiDAR. This requires a transformation
283
+ matrix.
284
+ correct_yaw (bool): Whether to convert the yaw angle to the target
285
+ coordinate. Defaults to False.
286
+
287
+ Returns:
288
+ :obj:`BaseInstance3DBoxes`: The converted box of the same type in
289
+ the ``dst`` mode.
290
+ """
291
+ pass
292
+
293
+ def scale(self, scale_factor: float) -> None:
294
+ """Scale the box with horizontal and vertical scaling factors.
295
+
296
+ Args:
297
+ scale_factors (float): Scale factors to scale the boxes.
298
+ """
299
+ self.tensor[:, :6] *= scale_factor
300
+ self.tensor[:, 7:] *= scale_factor # velocity
301
+
302
+ def nonempty(self, threshold: float = 0.0) -> Tensor:
303
+ """Find boxes that are non-empty.
304
+
305
+ A box is considered empty if either of its side is no larger than
306
+ threshold.
307
+
308
+ Args:
309
+ threshold (float): The threshold of minimal sizes. Defaults to 0.0.
310
+
311
+ Returns:
312
+ Tensor: A binary vector which represents whether each box is empty
313
+ (False) or non-empty (True).
314
+ """
315
+ box = self.tensor
316
+ size_x = box[..., 3]
317
+ size_y = box[..., 4]
318
+ size_z = box[..., 5]
319
+ keep = ((size_x > threshold)
320
+ & (size_y > threshold) & (size_z > threshold))
321
+ return keep
322
+
323
+ def __getitem__(
324
+ self, item: Union[int, slice, np.ndarray,
325
+ Tensor]) -> 'BaseInstance3DBoxes':
326
+ """
327
+ Args:
328
+ item (int or slice or np.ndarray or Tensor): Index of boxes.
329
+
330
+ Note:
331
+ The following usage are allowed:
332
+
333
+ 1. `new_boxes = boxes[3]`: Return a `Boxes` that contains only one
334
+ box.
335
+ 2. `new_boxes = boxes[2:10]`: Return a slice of boxes.
336
+ 3. `new_boxes = boxes[vector]`: Where vector is a
337
+ torch.BoolTensor with `length = len(boxes)`. Nonzero elements in
338
+ the vector will be selected.
339
+
340
+ Note that the returned Boxes might share storage with this Boxes,
341
+ subject to PyTorch's indexing semantics.
342
+
343
+ Returns:
344
+ :obj:`BaseInstance3DBoxes`: A new object of
345
+ :class:`BaseInstance3DBoxes` after indexing.
346
+ """
347
+ original_type = type(self)
348
+ if isinstance(item, int):
349
+ return original_type(self.tensor[item].view(1, -1),
350
+ box_dim=self.box_dim,
351
+ with_yaw=self.with_yaw)
352
+ b = self.tensor[item]
353
+ assert b.dim() == 2, \
354
+ f'Indexing on Boxes with {item} failed to return a matrix!'
355
+ return original_type(b, box_dim=self.box_dim, with_yaw=self.with_yaw)
356
+
357
+ def __len__(self) -> int:
358
+ """int: Number of boxes in the current object."""
359
+ return self.tensor.shape[0]
360
+
361
+ def __repr__(self) -> str:
362
+ """str: Return a string that describes the object."""
363
+ return self.__class__.__name__ + '(\n ' + str(self.tensor) + ')'
364
+
365
+ @classmethod
366
+ def cat(cls, boxes_list: Sequence['BaseInstance3DBoxes']
367
+ ) -> 'BaseInstance3DBoxes':
368
+ """Concatenate a list of Boxes into a single Boxes.
369
+
370
+ Args:
371
+ boxes_list (Sequence[:obj:`BaseInstance3DBoxes`]): List of boxes.
372
+
373
+ Returns:
374
+ :obj:`BaseInstance3DBoxes`: The concatenated boxes.
375
+ """
376
+ assert isinstance(boxes_list, (list, tuple))
377
+ if len(boxes_list) == 0:
378
+ return cls(torch.empty(0))
379
+ assert all(isinstance(box, cls) for box in boxes_list)
380
+
381
+ # use torch.cat (v.s. layers.cat)
382
+ # so the returned boxes never share storage with input
383
+ cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0),
384
+ box_dim=boxes_list[0].box_dim,
385
+ with_yaw=boxes_list[0].with_yaw)
386
+ return cat_boxes
387
+
388
+ def numpy(self) -> np.ndarray:
389
+ """Reload ``numpy`` from self.tensor."""
390
+ return self.tensor.numpy()
391
+
392
+ def to(self, device: Union[str, torch.device], *args,
393
+ **kwargs) -> 'BaseInstance3DBoxes':
394
+ """Convert current boxes to a specific device.
395
+
396
+ Args:
397
+ device (str or :obj:`torch.device`): The name of the device.
398
+
399
+ Returns:
400
+ :obj:`BaseInstance3DBoxes`: A new boxes object on the specific
401
+ device.
402
+ """
403
+ original_type = type(self)
404
+ return original_type(self.tensor.to(device, *args, **kwargs),
405
+ box_dim=self.box_dim,
406
+ with_yaw=self.with_yaw)
407
+
408
+ def cpu(self) -> 'BaseInstance3DBoxes':
409
+ """Convert current boxes to cpu device.
410
+
411
+ Returns:
412
+ :obj:`BaseInstance3DBoxes`: A new boxes object on the cpu device.
413
+ """
414
+ original_type = type(self)
415
+ return original_type(self.tensor.cpu(),
416
+ box_dim=self.box_dim,
417
+ with_yaw=self.with_yaw)
418
+
419
+ def cuda(self, *args, **kwargs) -> 'BaseInstance3DBoxes':
420
+ """Convert current boxes to cuda device.
421
+
422
+ Returns:
423
+ :obj:`BaseInstance3DBoxes`: A new boxes object on the cuda device.
424
+ """
425
+ original_type = type(self)
426
+ return original_type(self.tensor.cuda(*args, **kwargs),
427
+ box_dim=self.box_dim,
428
+ with_yaw=self.with_yaw)
429
+
430
+ def clone(self) -> 'BaseInstance3DBoxes':
431
+ """Clone the boxes.
432
+
433
+ Returns:
434
+ :obj:`BaseInstance3DBoxes`: Box object with the same properties as
435
+ self.
436
+ """
437
+ original_type = type(self)
438
+ return original_type(self.tensor.clone(),
439
+ box_dim=self.box_dim,
440
+ with_yaw=self.with_yaw)
441
+
442
+ def detach(self) -> 'BaseInstance3DBoxes':
443
+ """Detach the boxes.
444
+
445
+ Returns:
446
+ :obj:`BaseInstance3DBoxes`: Box object with the same properties as
447
+ self.
448
+ """
449
+ original_type = type(self)
450
+ return original_type(self.tensor.detach(),
451
+ box_dim=self.box_dim,
452
+ with_yaw=self.with_yaw)
453
+
454
+ @property
455
+ def device(self) -> torch.device:
456
+ """torch.device: The device of the boxes are on."""
457
+ return self.tensor.device
458
+
459
+ def __iter__(self) -> Iterator[Tensor]:
460
+ """Yield a box as a Tensor at a time.
461
+
462
+ Returns:
463
+ Iterator[Tensor]: A box of shape (box_dim, ).
464
+ """
465
+ yield from self.tensor
466
+
467
+ @classmethod
468
+ def height_overlaps(cls, boxes1: 'BaseInstance3DBoxes',
469
+ boxes2: 'BaseInstance3DBoxes') -> Tensor:
470
+ """Calculate height overlaps of two boxes.
471
+
472
+ Note:
473
+ This function calculates the height overlaps between ``boxes1`` and
474
+ ``boxes2``, ``boxes1`` and ``boxes2`` should be in the same type.
475
+
476
+ Args:
477
+ boxes1 (:obj:`BaseInstance3DBoxes`): Boxes 1 contain N boxes.
478
+ boxes2 (:obj:`BaseInstance3DBoxes`): Boxes 2 contain M boxes.
479
+
480
+ Returns:
481
+ Tensor: Calculated height overlap of the boxes.
482
+ """
483
+ assert isinstance(boxes1, BaseInstance3DBoxes)
484
+ assert isinstance(boxes2, BaseInstance3DBoxes)
485
+ assert type(boxes1) == type(boxes2), \
486
+ '"boxes1" and "boxes2" should be in the same type, ' \
487
+ f'but got {type(boxes1)} and {type(boxes2)}.'
488
+
489
+ boxes1_top_height = boxes1.top_height.view(-1, 1)
490
+ boxes1_bottom_height = boxes1.bottom_height.view(-1, 1)
491
+ boxes2_top_height = boxes2.top_height.view(1, -1)
492
+ boxes2_bottom_height = boxes2.bottom_height.view(1, -1)
493
+
494
+ heighest_of_bottom = torch.max(boxes1_bottom_height,
495
+ boxes2_bottom_height)
496
+ lowest_of_top = torch.min(boxes1_top_height, boxes2_top_height)
497
+ overlaps_h = torch.clamp(lowest_of_top - heighest_of_bottom, min=0)
498
+ return overlaps_h
499
+
500
+ def new_box(
501
+ self, data: Union[Tensor, np.ndarray, Sequence[Sequence[float]]]
502
+ ) -> 'BaseInstance3DBoxes':
503
+ """Create a new box object with data.
504
+
505
+ The new box and its tensor has the similar properties as self and
506
+ self.tensor, respectively.
507
+
508
+ Args:
509
+ data (Tensor or np.ndarray or Sequence[Sequence[float]]): Data to
510
+ be copied.
511
+
512
+ Returns:
513
+ :obj:`BaseInstance3DBoxes`: A new bbox object with ``data``, the
514
+ object's other properties are similar to ``self``.
515
+ """
516
+ new_tensor = self.tensor.new_tensor(data) \
517
+ if not isinstance(data, Tensor) else data.to(self.device)
518
+ original_type = type(self)
519
+ return original_type(new_tensor,
520
+ box_dim=self.box_dim,
521
+ with_yaw=self.with_yaw)
522
+
523
+
524
+ class EulerInstance3DBoxes(BaseInstance3DBoxes):
525
+ """3D boxes with 1-D orientation represented by three Euler angles.
526
+
527
+ See https://en.wikipedia.org/wiki/Euler_angles for
528
+ regarding the definition of Euler angles.
529
+
530
+ Attributes:
531
+ tensor (torch.Tensor): Float matrix of N x box_dim.
532
+ box_dim (int): Integer indicates the dimension of a box
533
+ Each row is (x, y, z, x_size, y_size, z_size, alpha, beta, gamma).
534
+ """
535
+
536
+ def __init__(self, tensor, box_dim=9, origin=(0.5, 0.5, 0.5)):
537
+ if isinstance(tensor, torch.Tensor):
538
+ device = tensor.device
539
+ else:
540
+ device = torch.device('cpu')
541
+ tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
542
+ if tensor.numel() == 0:
543
+ # Use reshape, so we don't end up creating a new tensor that
544
+ # does not depend on the inputs (and consequently confuses jit)
545
+ tensor = tensor.reshape((0, box_dim)).to(dtype=torch.float32,
546
+ device=device)
547
+ assert tensor.dim() == 2 and tensor.size(-1) == box_dim, tensor.size()
548
+
549
+ if tensor.shape[-1] == 6:
550
+ # If the dimension of boxes is 6, we expand box_dim by padding
551
+ # (0, 0, 0) as a fake euler angle.
552
+ assert box_dim == 6
553
+ fake_rot = tensor.new_zeros(tensor.shape[0], 3)
554
+ tensor = torch.cat((tensor, fake_rot), dim=-1)
555
+ self.box_dim = box_dim + 3
556
+ elif tensor.shape[-1] == 7:
557
+ assert box_dim == 7
558
+ fake_euler = tensor.new_zeros(tensor.shape[0], 2)
559
+ tensor = torch.cat((tensor, fake_euler), dim=-1)
560
+ self.box_dim = box_dim + 2
561
+ else:
562
+ assert tensor.shape[-1] == 9
563
+ self.box_dim = box_dim
564
+ self.tensor = tensor.clone()
565
+
566
+ self.origin = origin
567
+ if origin != (0.5, 0.5, 0.5):
568
+ dst = self.tensor.new_tensor((0.5, 0.5, 0.5))
569
+ src = self.tensor.new_tensor(origin)
570
+ self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)
571
+
572
+ def get_corners(self, tensor1):
573
+ """torch.Tensor: Coordinates of corners of all the boxes
574
+ in shape (N, 8, 3).
575
+
576
+ Convert the boxes to corners in clockwise order, in form of
577
+ ``(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)``
578
+
579
+ .. code-block:: none
580
+
581
+ up z
582
+ front y ^
583
+ / |
584
+ / |
585
+ (x0, y1, z1) + ----------- + (x1, y1, z1)
586
+ /| / |
587
+ / | / |
588
+ (x0, y0, z1) + ----------- + + (x1, y1, z0)
589
+ | / . | /
590
+ | / origin | /
591
+ (x0, y0, z0) + ----------- + --------> right x
592
+ (x1, y0, z0)
593
+ """
594
+ if tensor1.numel() == 0:
595
+ return torch.empty([0, 8, 3], device=tensor1.device)
596
+
597
+ dims = tensor1[:, 3:6]
598
+ corners_norm = torch.from_numpy(
599
+ np.stack(np.unravel_index(np.arange(8), [2] * 3),
600
+ axis=1)).to(device=dims.device, dtype=dims.dtype)
601
+
602
+ corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
603
+ # use relative origin
604
+ assert self.origin == (0.5, 0.5, 0.5), \
605
+ 'self.origin != (0.5, 0.5, 0.5) needs to be checked!'
606
+ corners_norm = corners_norm - dims.new_tensor(self.origin)
607
+ corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])
608
+
609
+ # rotate
610
+ corners = rotation_3d_in_euler(corners, tensor1[:, 6:])
611
+
612
+ corners += tensor1[:, :3].view(-1, 1, 3)
613
+ return corners
614
+
615
+ @classmethod
616
+ def overlaps(cls, boxes1, boxes2, mode='iou', eps=1e-4):
617
+ """Calculate 3D overlaps of two boxes.
618
+
619
+ Note:
620
+ This function calculates the overlaps between ``boxes1`` and
621
+ ``boxes2``, ``boxes1`` and ``boxes2`` should be in the same type.
622
+
623
+ Args:
624
+ boxes1 (:obj:`EulerInstance3DBoxes`): Boxes 1 contain N boxes.
625
+ boxes2 (:obj:`EulerInstance3DBoxes`): Boxes 2 contain M boxes.
626
+ mode (str): Mode of iou calculation. Defaults to 'iou'.
627
+ eps (bool): Epsilon. Defaults to 1e-4.
628
+
629
+ Returns:
630
+ torch.Tensor: Calculated 3D overlaps of the boxes.
631
+ """
632
+ assert isinstance(boxes1, EulerInstance3DBoxes)
633
+ assert isinstance(boxes2, EulerInstance3DBoxes)
634
+ assert type(boxes1) == type(boxes2), '"boxes1" and "boxes2" should' \
635
+ f'be in the same type, got {type(boxes1)} and {type(boxes2)}.'
636
+
637
+ assert mode in ['iou']
638
+
639
+ rows = len(boxes1)
640
+ cols = len(boxes2)
641
+ if rows * cols == 0:
642
+ return boxes1.tensor.new(rows, cols)
643
+
644
+ corners1 = boxes1.corners
645
+ corners2 = boxes2.corners
646
+ _, iou3d = box3d_overlap(corners1, corners2, eps=eps)
647
+ return iou3d
648
+
649
+ @property
650
+ def gravity_center(self):
651
+ """torch.Tensor: A tensor with center of each box in shape (N, 3)."""
652
+ return self.tensor[:, :3]
653
+
654
+ @property
655
+ def corners(self):
656
+ """torch.Tensor: Coordinates of corners of all the boxes
657
+ in shape (N, 8, 3).
658
+
659
+ Convert the boxes to corners in clockwise order, in form of
660
+ ``(x0y0z0, x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0)``
661
+
662
+ .. code-block:: none
663
+
664
+ up z
665
+ front y ^
666
+ / |
667
+ / |
668
+ (x0, y1, z1) + ----------- + (x1, y1, z1)
669
+ /| / |
670
+ / | / |
671
+ (x0, y0, z1) + ----------- + + (x1, y1, z0)
672
+ | / . | /
673
+ | / origin | /
674
+ (x0, y0, z0) + ----------- + --------> right x
675
+ (x1, y0, z0)
676
+ """
677
+ if self.tensor.numel() == 0:
678
+ return torch.empty([0, 8, 3], device=self.tensor.device)
679
+
680
+ dims = self.dims
681
+ corners_norm = torch.from_numpy(
682
+ np.stack(np.unravel_index(np.arange(8), [2] * 3),
683
+ axis=1)).to(device=dims.device, dtype=dims.dtype)
684
+
685
+ corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
686
+ # use relative origin
687
+ assert self.origin == (0.5, 0.5, 0.5), \
688
+ 'self.origin != (0.5, 0.5, 0.5) needs to be checked!'
689
+ corners_norm = corners_norm - dims.new_tensor(self.origin)
690
+ corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])
691
+
692
+ # rotate
693
+ corners = rotation_3d_in_euler(corners, self.tensor[:, 6:])
694
+
695
+ corners += self.tensor[:, :3].view(-1, 1, 3)
696
+ return corners
697
+
698
+ def transform(self, matrix):
699
+ if self.tensor.shape[0] == 0:
700
+ return
701
+ if not isinstance(matrix, torch.Tensor):
702
+ matrix = self.tensor.new_tensor(matrix)
703
+ points = self.tensor[:, :3]
704
+ constant = points.new_ones(points.shape[0], 1)
705
+ points_extend = torch.concat([points, constant], dim=-1)
706
+ points_trans = torch.matmul(points_extend, matrix.transpose(-2,
707
+ -1))[:, :3]
708
+
709
+ size = self.tensor[:, 3:6]
710
+
711
+ # angle_delta = matrix_to_euler_angles(matrix[:3,:3], 'ZXY')
712
+ # angle = self.tensor[:,6:] + angle_delta
713
+ ori_matrix = euler_angles_to_matrix(self.tensor[:, 6:], 'ZXY')
714
+ rot_matrix = matrix[:3, :3].expand_as(ori_matrix)
715
+ final = torch.bmm(rot_matrix, ori_matrix)
716
+ angle = matrix_to_euler_angles(final, 'ZXY')
717
+
718
+ self.tensor = torch.cat([points_trans, size, angle], dim=-1)
719
+
720
+ def scale(self, scale_factor: float) -> None:
721
+ """Scale the box with horizontal and vertical scaling factors.
722
+
723
+ Args:
724
+ scale_factors (float): Scale factors to scale the boxes.
725
+ """
726
+ self.tensor[:, :6] *= scale_factor
727
+
728
+ def rotate(self, angle, points=None):
729
+ """Rotate boxes with points (optional) with the given angle or rotation
730
+ matrix.
731
+
732
+ Args:
733
+ angle (float | torch.Tensor | np.ndarray):
734
+ Rotation angle or rotation matrix.
735
+ points (torch.Tensor | np.ndarray | :obj:``, optional):
736
+ Points to rotate. Defaults to None.
737
+
738
+ Returns:
739
+ tuple or None: When ``points`` is None, the function returns
740
+ None, otherwise it returns the rotated points and the
741
+ rotation matrix ``rot_mat_T``.
742
+ """
743
+ if not isinstance(angle, torch.Tensor):
744
+ angle = self.tensor.new_tensor(angle)
745
+
746
+ if angle.numel() == 1: # only given yaw angle for rotation
747
+ angle = self.tensor.new_tensor([angle, 0., 0.])
748
+ rot_matrix = euler_angles_to_matrix(angle, 'ZXY')
749
+ elif angle.numel() == 3:
750
+ rot_matrix = euler_angles_to_matrix(angle, 'ZXY')
751
+ elif angle.shape == torch.Size([3, 3]):
752
+ rot_matrix = angle
753
+ else:
754
+ raise NotImplementedError
755
+
756
+ rot_mat_T = rot_matrix.T
757
+ transform_matrix = torch.eye(4)
758
+ transform_matrix[:3, :3] = rot_matrix
759
+ self.transform(transform_matrix)
760
+
761
+ if points is not None:
762
+ if isinstance(points, torch.Tensor):
763
+ points[:, :3] = points[:, :3] @ rot_mat_T
764
+ elif isinstance(points, np.ndarray):
765
+ rot_mat_T = rot_mat_T.cpu().numpy()
766
+ points[:, :3] = np.dot(points[:, :3], rot_mat_T)
767
+ elif isinstance(points, ):
768
+ points.rotate(rot_mat_T)
769
+ else:
770
+ raise ValueError
771
+ return points, rot_mat_T
772
+ else:
773
+ return rot_mat_T
774
+
775
+ def flip(self, direction='X'):
776
+ """Flip the boxes along the corresponding axis.
777
+
778
+ Args:
779
+ direction (str, optional): Flip axis. Defaults to 'X'.
780
+ """
781
+ assert direction in ['X', 'Y', 'Z']
782
+ if direction == 'X':
783
+ self.tensor[:, 0] = -self.tensor[:, 0]
784
+ self.tensor[:, 6] = -self.tensor[:, 6] + np.pi
785
+ self.tensor[:, 8] = -self.tensor[:, 8]
786
+ elif direction == 'Y':
787
+ self.tensor[:, 1] = -self.tensor[:, 1]
788
+ self.tensor[:, 6] = -self.tensor[:, 6]
789
+ self.tensor[:, 7] = -self.tensor[:, 7] + np.pi
790
+ elif direction == 'Z':
791
+ self.tensor[:, 2] = -self.tensor[:, 2]
792
+ self.tensor[:, 7] = -self.tensor[:, 7]
793
+ self.tensor[:, 8] = -self.tensor[:, 8] + np.pi
794
+
795
+
796
+ def rotation_3d_in_euler(points, angles, return_mat=False, clockwise=False):
797
+ """Rotate points by angles according to axis.
798
+
799
+ Args:
800
+ points (np.ndarray | torch.Tensor | list | tuple ):
801
+ Points of shape (N, M, 3).
802
+ angles (np.ndarray | torch.Tensor | list | tuple):
803
+ Vector of angles in shape (N, 3)
804
+ return_mat: Whether or not return the rotation matrix (transposed).
805
+ Defaults to False.
806
+ clockwise: Whether the rotation is clockwise. Defaults to False.
807
+
808
+ Raises:
809
+ ValueError: when the axis is not in range [0, 1, 2], it will
810
+ raise value error.
811
+
812
+ Returns:
813
+ (torch.Tensor | np.ndarray): Rotated points in shape (N, M, 3).
814
+ """
815
+ batch_free = len(points.shape) == 2
816
+ if batch_free:
817
+ points = points[None]
818
+
819
+ if len(angles.shape) == 1:
820
+ angles = angles.expand(points.shape[:1] + (3, ))
821
+ # angles = torch.full(points.shape[:1], angles)
822
+
823
+ assert len(points.shape) == 3 and len(angles.shape) == 2 \
824
+ and points.shape[0] == angles.shape[0], f'Incorrect shape of points ' \
825
+ f'angles: {points.shape}, {angles.shape}'
826
+
827
+ assert points.shape[-1] in [2, 3], \
828
+ f'Points size should be 2 or 3 instead of {points.shape[-1]}'
829
+
830
+ rot_mat_T = euler_angles_to_matrix(angles, 'ZXY') # N, 3,3
831
+
832
+ rot_mat_T = rot_mat_T.transpose(-2, -1)
833
+
834
+ if clockwise:
835
+ raise NotImplementedError('clockwise')
836
+
837
+ if points.shape[0] == 0:
838
+ points_new = points
839
+ else:
840
+ points_new = torch.bmm(points, rot_mat_T)
841
+
842
+ if batch_free:
843
+ points_new = points_new.squeeze(0)
844
+
845
+ if return_mat:
846
+ if batch_free:
847
+ rot_mat_T = rot_mat_T.squeeze(0)
848
+ return points_new, rot_mat_T
849
+ else:
850
+ return points_new
851
+
852
+
853
+ def _axis_angle_rotation(axis: str, angle: np.ndarray) -> np.ndarray:
854
+ """Return the rotation matrices for one of the rotations about an axis of
855
+ which Euler angles describe, for each value of the angle given.
856
+
857
+ Args:
858
+ axis: Axis label "X" or "Y or "Z".
859
+ angle: any shape tensor of Euler angles in radians
860
+
861
+ Returns:
862
+ Rotation matrices as tensor of shape (..., 3, 3).
863
+ """
864
+
865
+ cos = np.cos(angle)
866
+ sin = np.sin(angle)
867
+ one = np.ones_like(angle)
868
+ zero = np.zeros_like(angle)
869
+
870
+ if axis == 'X':
871
+ R_flat = (one, zero, zero, zero, cos, -sin, zero, sin, cos)
872
+ elif axis == 'Y':
873
+ R_flat = (cos, zero, sin, zero, one, zero, -sin, zero, cos)
874
+ elif axis == 'Z':
875
+ R_flat = (cos, -sin, zero, sin, cos, zero, zero, zero, one)
876
+ else:
877
+ raise ValueError('letter must be either X, Y or Z.')
878
+
879
+ return np.stack(R_flat, -1).reshape(angle.shape + (3, 3))
880
+
881
+
882
+
883
+
884
+
885
+ def is_inside_box(points, center, size, rotation_mat):
886
+ """Check if points are inside a 3D bounding box.
887
+
888
+ Args:
889
+ points: 3D points, numpy array of shape (n, 3).
890
+ center: center of the box, numpy array of shape (3, ).
891
+ size: size of the box, numpy array of shape (3, ).
892
+ rotation_mat: rotation matrix of the box,
893
+ numpy array of shape (3, 3).
894
+
895
+ Returns:
896
+ Boolean array of shape (n, )
897
+ indicating if each point is inside the box.
898
+ """
899
+ assert points.shape[1] == 3, 'points should be of shape (n, 3)'
900
+ points = np.array(points) # n,3
901
+ center = np.array(center) # n, 3
902
+ size = np.array(size) # n, 3
903
+ rotation_mat = np.array(rotation_mat)
904
+ assert rotation_mat.shape == (
905
+ 3,
906
+ 3,
907
+ ), f'R should be shape (3,3), but got {rotation_mat.shape}'
908
+ pcd_local = (points - center) @ rotation_mat # n, 3
909
+ pcd_local = pcd_local / size * 2.0 # scale to [-1, 1] # n, 3
910
+ pcd_local = abs(pcd_local)
911
+ return ((pcd_local[:, 0] <= 1)
912
+ & (pcd_local[:, 1] <= 1)
913
+ & (pcd_local[:, 2] <= 1))
914
+
915
+
916
+ def normalize_box(scene_pcd, embodied_scan_bbox):
917
+ """Find the smallest 6 DoF box that covers these points which 9 DoF box
918
+ covers.
919
+
920
+ Args:
921
+ scene_pcd (Tensor / ndarray):
922
+ (..., 3)
923
+ embodied_scan_bbox (Tensor / ndarray):
924
+ (9,) 9 DoF box
925
+
926
+ Returns:
927
+ Tensor: Transformed 3D box of shape (N, 8, 3).
928
+ """
929
+
930
+ bbox = np.array(embodied_scan_bbox)
931
+ orientation = euler_to_matrix_np(bbox[np.newaxis, 6:])[0]
932
+ position = np.array(bbox[:3])
933
+ size = np.array(bbox[3:6])
934
+ obj_mask = np.array(
935
+ is_inside_box(scene_pcd[:, :3], position, size, orientation),
936
+ dtype=bool,
937
+ )
938
+ obj_pc = scene_pcd[obj_mask]
939
+
940
+ # resume the same if there's None
941
+ if obj_pc.shape[0] < 1:
942
+ return embodied_scan_bbox[:6]
943
+ xmin = np.min(obj_pc[:, 0])
944
+ ymin = np.min(obj_pc[:, 1])
945
+ zmin = np.min(obj_pc[:, 2])
946
+ xmax = np.max(obj_pc[:, 0])
947
+ ymax = np.max(obj_pc[:, 1])
948
+ zmax = np.max(obj_pc[:, 2])
949
+ bbox = np.array([
950
+ (xmin + xmax) / 2,
951
+ (ymin + ymax) / 2,
952
+ (zmin + zmax) / 2,
953
+ xmax - xmin,
954
+ ymax - ymin,
955
+ zmax - zmin,
956
+ ])
957
+ return bbox
958
+
959
+
960
+ def from_9dof_to_6dof(pcd_data, bbox_):
961
+ # that's a kind of loss of information, so we don't recommend
962
+ return normalize_box(pcd_data, bbox_)
963
+
964
+
965
+ def bbox_to_corners(centers, sizes, rot_mat: torch.Tensor) -> torch.Tensor:
966
+ """Transform bbox parameters to the 8 corners.
967
+
968
+ Args:
969
+ bbox (Tensor): 3D box of shape (N, 6) or (N, 7) or (N, 9).
970
+
971
+ Returns:
972
+ Tensor: Transformed 3D box of shape (N, 8, 3).
973
+ """
974
+ device = centers.device
975
+ use_batch = False
976
+ if len(centers.shape) == 3:
977
+ use_batch = True
978
+ batch_size, n_proposals = centers.shape[0], centers.shape[1]
979
+ centers = centers.reshape(-1, 3)
980
+ sizes = sizes.reshape(-1, 3)
981
+ rot_mat = rot_mat.reshape(-1, 3, 3)
982
+
983
+ n_box = centers.shape[0]
984
+ if use_batch:
985
+ assert n_box == batch_size * n_proposals
986
+ centers = centers.unsqueeze(1).repeat(1, 8, 1) # shape (N, 8, 3)
987
+ half_sizes = sizes.unsqueeze(1).repeat(1, 8, 1) / 2 # shape (N, 8, 3)
988
+ eight_corners_x = (torch.tensor([1, 1, 1, 1, -1, -1, -1, -1],
989
+ device=device).unsqueeze(0).repeat(
990
+ n_box, 1)) # shape (N, 8)
991
+ eight_corners_y = (torch.tensor([1, 1, -1, -1, 1, 1, -1, -1],
992
+ device=device).unsqueeze(0).repeat(
993
+ n_box, 1)) # shape (N, 8)
994
+ eight_corners_z = (torch.tensor([1, -1, -1, 1, 1, -1, -1, 1],
995
+ device=device).unsqueeze(0).repeat(
996
+ n_box, 1)) # shape (N, 8)
997
+ eight_corners = torch.stack(
998
+ (eight_corners_x, eight_corners_y, eight_corners_z),
999
+ dim=-1) # shape (N, 8, 3)
1000
+ eight_corners = eight_corners * half_sizes # shape (N, 8, 3)
1001
+ # rot_mat: (N, 3, 3), eight_corners: (N, 8, 3)
1002
+ rotated_corners = torch.matmul(eight_corners,
1003
+ rot_mat.transpose(1, 2)) # shape (N, 8, 3)
1004
+ res = centers + rotated_corners
1005
+ if use_batch:
1006
+ res = res.reshape(batch_size, n_proposals, 8, 3)
1007
+ return res
1008
+
1009
+
1010
+ def euler_iou3d_corners(boxes1, boxes2):
1011
+ rows = boxes1.shape[0]
1012
+ cols = boxes2.shape[0]
1013
+ if rows * cols == 0:
1014
+ return boxes1.new(rows, cols)
1015
+
1016
+ _, iou3d = box3d_overlap(boxes1, boxes2)
1017
+ return iou3d
1018
+
1019
+
1020
+ def euler_iou3d_bbox(center1, size1, rot1, center2, size2, rot2):
1021
+ """Calculate the 3D IoU between two grounps of 9DOF bounding boxes.
1022
+
1023
+ Args:
1024
+ center1 (Tensor): (n, cx, cy, cz) of grounp1.
1025
+ size1 (Tensor): (n, l, w, h) of grounp1.
1026
+ rot1 (Tensor): rot matrix of grounp1.
1027
+ center1 (Tensor): (m, cx, cy, cz) of grounp2.
1028
+ size1 (Tensor): (m, l, w, h) of grounp2.
1029
+ rot1 (Tensor): rot matrix of grounp2.
1030
+
1031
+ Returns:
1032
+ numpy.ndarray: (n, m) the 3D IoU.
1033
+ """
1034
+ if torch.cuda.is_available():
1035
+ center1 = center1.cuda()
1036
+ size1 = size1.cuda()
1037
+ rot1 = rot1.cuda()
1038
+ center2 = center2.cuda()
1039
+ size2 = size2.cuda()
1040
+ rot2 = rot2.cuda()
1041
+ corners1 = bbox_to_corners(center1, size1, rot1)
1042
+ corners2 = bbox_to_corners(center2, size2, rot2)
1043
+ result = euler_iou3d_corners(corners1, corners2)
1044
+
1045
+ if torch.cuda.is_available():
1046
+ result = result.detach().cpu()
1047
+ return result.numpy()
1048
+
1049
+
1050
+ def index_box(boxes: List[torch.tensor],
1051
+ indices: Union[List[torch.tensor], torch.tensor])\
1052
+ -> Union[List[torch.tensor], torch.tensor]:
1053
+ """Convert a grounp of bounding boxes represented in [center, size, rot]
1054
+ format to 9 DoF format.
1055
+
1056
+ Args:
1057
+ box (list/tuple, tensor): boxes in a grounp.
1058
+
1059
+ Returns:
1060
+ Tensor : 9 DoF format. (num,9)
1061
+ """
1062
+ if isinstance(boxes, (list, tuple)):
1063
+ return [index_box(box, indices) for box in boxes]
1064
+ else:
1065
+ return boxes[indices]
1066
+
1067
+
1068
+ def to_9dof_box(box: List[torch.tensor]):
1069
+ """Convert a grounp of bounding boxes represented in [center, size, rot]
1070
+ format to 9 DoF format.
1071
+
1072
+ Args:
1073
+ box (list/tuple, tensor): boxes in a grounp.
1074
+
1075
+ Returns:
1076
+ Tensor : 9 DoF format. (num,9)
1077
+ """
1078
+
1079
+ return EulerInstance3DBoxes(box, origin=(0.5, 0.5, 0.5))
test_annotations_mmscan.json ADDED
The diff for this file is too large to render. See raw diff
 
vg_evaluator.py ADDED
@@ -0,0 +1,361 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Tuple
2
+
3
+ import numpy as np
4
+ import torch
5
+ from tqdm import tqdm
6
+
7
+ from mmscan_utils.box_metric import (get_average_precision,
8
+ get_general_topk_scores,
9
+ subset_get_average_precision)
10
+ from mmscan_utils.box_utils import index_box, to_9dof_box
11
+
12
+
13
+ class VisualGroundingEvaluator:
14
+ """Evaluator for MMScan Visual Grounding benchmark. The evaluation metric
15
+ includes "AP","AP_C","AR","gTop-k".
16
+
17
+ Attributes:
18
+ save_buffer(list[dict]): Save the buffer of Inputs.
19
+
20
+ records(list[dict]): Metric results for each sample
21
+
22
+ category_records(dict): Metric results for each category
23
+ (average of all samples with the same category)
24
+ Args:
25
+ show_results(bool): Whether to print the evaluation results.
26
+ Defaults to True.
27
+ """
28
+
29
+ def __init__(self, show_results: bool = True) -> None:
30
+
31
+ self.show_results = show_results
32
+ self.eval_metric_type = ['AP', 'AR']
33
+ self.top_k_visible = [1, 3, 5]
34
+ self.call_for_category_mode = True
35
+
36
+ for top_k in [1, 3, 5, 10]:
37
+ self.eval_metric_type.append(f'gTop-{top_k}')
38
+
39
+ self.iou_thresholds = [0.25, 0.50]
40
+ self.eval_metric = []
41
+ for iou_thr in self.iou_thresholds:
42
+ for eval_type in self.eval_metric_type:
43
+ self.eval_metric.append(eval_type + '@' + str(iou_thr))
44
+
45
+ self.reset()
46
+
47
+ def reset(self) -> None:
48
+ """Reset the evaluator, clear the buffer and records."""
49
+ self.save_buffer = []
50
+ self.records = []
51
+ self.category_records = {}
52
+
53
+ def update(self, raw_batch_input: List[dict]) -> None:
54
+ """Update a batch of results to the buffer.
55
+
56
+ Args:
57
+ raw_batch_input (list[dict]):
58
+ Batch of the raw original input.
59
+ """
60
+ self.__check_format__(raw_batch_input)
61
+ self.save_buffer.extend(raw_batch_input)
62
+
63
+ def start_evaluation(self) -> dict:
64
+ """This function is used to start the evaluation process.
65
+
66
+ It will iterate over the saved buffer and evaluate each item.
67
+ Returns:
68
+ category_records(dict): Metric results per category.
69
+ """
70
+
71
+ category_collect = {}
72
+
73
+ for data_item in tqdm(self.save_buffer):
74
+
75
+ metric_for_single = {}
76
+
77
+ # (1) len(gt)==0 : skip
78
+ if self.__is_zero__(data_item['gt_bboxes']):
79
+ continue
80
+
81
+ # (2) len(pred)==0 : model's wrong
82
+ if self.__is_zero__(data_item['pred_bboxes']):
83
+ for iou_thr in self.iou_thresholds:
84
+ metric_for_single[f'AP@{iou_thr}'] = 0
85
+ metric_for_single[f'AR@{iou_thr}'] = 0
86
+ for topk in [1, 3, 5, 10]:
87
+ metric_for_single[f'gTop-{topk}@{iou_thr}'] = 0
88
+
89
+ data_item['num_gts'] = len(data_item['gt_bboxes'])
90
+ data_item.update(metric_for_single)
91
+ self.records.append(data_item)
92
+ continue
93
+
94
+ iou_array, pred_score = self.__calculate_iou_array_(data_item)
95
+ if self.call_for_category_mode:
96
+ category = self.__category_mapping__(data_item['subclass'])
97
+ if category not in category_collect.keys():
98
+ category_collect[category] = {
99
+ 'ious': [],
100
+ 'scores': [],
101
+ 'sample_indices': [],
102
+ 'cnt': 0,
103
+ }
104
+
105
+ category_collect[category]['ious'].extend(iou_array)
106
+ category_collect[category]['scores'].extend(pred_score)
107
+ category_collect[category]['sample_indices'].extend(
108
+ [data_item['index']] * len(iou_array))
109
+ category_collect[category]['cnt'] += 1
110
+
111
+ for iou_thr in self.iou_thresholds:
112
+ # AP/AR metric
113
+ AP, AR = get_average_precision(iou_array, iou_thr)
114
+ metric_for_single[f'AP@{iou_thr}'] = AP
115
+ metric_for_single[f'AR@{iou_thr}'] = AR
116
+
117
+ # topk metric
118
+ metric_for_single.update(
119
+ get_general_topk_scores(iou_array, iou_thr))
120
+
121
+ data_item['num_gts'] = iou_array.shape[1]
122
+ data_item.update(metric_for_single)
123
+ self.records.append(data_item)
124
+
125
+ self.collect_result()
126
+
127
+ if self.call_for_category_mode:
128
+ for iou_thr in self.iou_thresholds:
129
+ self.category_records['overall'][f'AP_C@{iou_thr}'] = 0
130
+ self.category_records['overall'][f'AR_C@{iou_thr}'] = 0
131
+
132
+ for category in category_collect:
133
+ AP_C, AR_C = subset_get_average_precision(
134
+ category_collect[category], iou_thr)
135
+ self.category_records[category][f'AP_C@{iou_thr}'] = AP_C
136
+ self.category_records[category][f'AR_C@{iou_thr}'] = AR_C
137
+ self.category_records['overall'][f'AP_C@{iou_thr}'] += (
138
+ AP_C * category_collect[category]['cnt'] /
139
+ len(self.records))
140
+ self.category_records['overall'][f'AR_C@{iou_thr}'] += (
141
+ AR_C * category_collect[category]['cnt'] /
142
+ len(self.records))
143
+
144
+ return self.category_records
145
+
146
+ def collect_result(self) -> dict:
147
+ """Collect the result from the evaluation process.
148
+
149
+ Stores them based on their subclass.
150
+ Returns:
151
+ category_results(dict): Average results per category.
152
+ """
153
+ category_results = {}
154
+ category_results['overall'] = {}
155
+
156
+ for metric_name in self.eval_metric:
157
+ category_results['overall'][metric_name] = []
158
+ category_results['overall']['num_gts'] = 0
159
+
160
+ for data_item in self.records:
161
+ category = self.__category_mapping__(data_item['subclass'])
162
+
163
+ if category not in category_results:
164
+ category_results[category] = {}
165
+ for metric_name in self.eval_metric:
166
+ category_results[category][metric_name] = []
167
+ category_results[category]['num_gts'] = 0
168
+
169
+ for metric_name in self.eval_metric:
170
+ for metric_name in self.eval_metric:
171
+ category_results[category][metric_name].append(
172
+ data_item[metric_name])
173
+ category_results['overall'][metric_name].append(
174
+ data_item[metric_name])
175
+
176
+ category_results['overall']['num_gts'] += data_item['num_gts']
177
+ category_results[category]['num_gts'] += data_item['num_gts']
178
+ for category in category_results:
179
+ for metric_name in self.eval_metric:
180
+ category_results[category][metric_name] = np.mean(
181
+ category_results[category][metric_name])
182
+
183
+ self.category_records = category_results
184
+
185
+ return category_results
186
+
187
+ def print_result(self) -> str:
188
+ """Showing the result table.
189
+
190
+ Returns:
191
+ table(str): The metric result table.
192
+ """
193
+ assert len(self.category_records) > 0, 'No result yet.'
194
+ self.category_records = {
195
+ key: self.category_records[key]
196
+ for key in sorted(self.category_records.keys(), reverse=True)
197
+ }
198
+
199
+ header = ['Type']
200
+ header.extend(self.category_records.keys())
201
+ table_columns = [[] for _ in range(len(header))]
202
+
203
+ # some metrics
204
+ for iou_thr in self.iou_thresholds:
205
+ show_in_table = (['AP', 'AR'] +
206
+ [f'gTop-{k}' for k in self.top_k_visible]
207
+ if not self.call_for_category_mode else
208
+ ['AP', 'AR', 'AP_C', 'AR_C'] +
209
+ [f'gTop-{k}' for k in self.top_k_visible])
210
+
211
+ for metric_type in show_in_table:
212
+ table_columns[0].append(metric_type + ' ' + str(iou_thr))
213
+
214
+ for i, category in enumerate(self.category_records.keys()):
215
+ ap = self.category_records[category][f'AP@{iou_thr}']
216
+ ar = self.category_records[category][f'AR@{iou_thr}']
217
+ table_columns[i + 1].append(f'{float(ap):.4f}')
218
+ table_columns[i + 1].append(f'{float(ar):.4f}')
219
+
220
+ ap = self.category_records[category][f'AP_C@{iou_thr}']
221
+ ar = self.category_records[category][f'AR_C@{iou_thr}']
222
+ table_columns[i + 1].append(f'{float(ap):.4f}')
223
+ table_columns[i + 1].append(f'{float(ar):.4f}')
224
+ for k in self.top_k_visible:
225
+ top_k = self.category_records[category][
226
+ f'gTop-{k}@{iou_thr}']
227
+ table_columns[i + 1].append(f'{float(top_k):.4f}')
228
+
229
+ # Number of gts
230
+ table_columns[0].append('Num GT')
231
+ for i, category in enumerate(self.category_records.keys()):
232
+ table_columns[i + 1].append(
233
+ f'{int(self.category_records[category]["num_gts"])}')
234
+
235
+ table_data = [header]
236
+ table_rows = list(zip(*table_columns))
237
+ table_data += table_rows
238
+ table_data = [list(row) for row in zip(*table_data)]
239
+
240
+
241
+ return table_data
242
+
243
+ def __category_mapping__(self, sub_class: str) -> str:
244
+ """Mapping the subclass name to the category name.
245
+
246
+ Args:
247
+ sub_class (str): The subclass name in the original samples.
248
+
249
+ Returns:
250
+ category (str): The category name.
251
+ """
252
+ sub_class = sub_class.lower()
253
+ sub_class = sub_class.replace('single', 'sngl')
254
+ sub_class = sub_class.replace('inter', 'int')
255
+ sub_class = sub_class.replace('unique', 'uniq')
256
+ sub_class = sub_class.replace('common', 'cmn')
257
+ sub_class = sub_class.replace('attribute', 'attr')
258
+ if 'sngl' in sub_class and ('attr' in sub_class or 'eq' in sub_class):
259
+ sub_class = 'vg_sngl_attr'
260
+ return sub_class
261
+
262
+ def __calculate_iou_array_(
263
+ self, data_item: dict) -> Tuple[np.ndarray, np.ndarray]:
264
+ """Calculate some information needed for eavl.
265
+
266
+ Args:
267
+ data_item (dict): The subclass name in the original samples.
268
+
269
+ Returns:
270
+ np.ndarray, np.ndarray :
271
+ The iou array sorted by the confidence and the
272
+ confidence scores.
273
+ """
274
+
275
+ pred_bboxes = data_item['pred_bboxes']
276
+ gt_bboxes = data_item['gt_bboxes']
277
+ # Sort the bounding boxes based on their scores
278
+ pred_scores = data_item['pred_scores']
279
+ top_idxs = torch.argsort(pred_scores, descending=True)
280
+ pred_scores = pred_scores[top_idxs]
281
+
282
+ pred_bboxes = to_9dof_box(index_box(pred_bboxes, top_idxs))
283
+
284
+ gt_bboxes = to_9dof_box(gt_bboxes)
285
+
286
+
287
+ iou_matrix = pred_bboxes.overlaps(pred_bboxes,
288
+ gt_bboxes) # (num_query, num_gt)
289
+ # (3) calculate the TP and NP,
290
+ # preparing for the forward AP/topk calculation
291
+ pred_scores = pred_scores.cpu().numpy()
292
+ iou_array = iou_matrix.cpu().numpy()
293
+
294
+ return iou_array, pred_scores
295
+
296
+ def __is_zero__(self, box):
297
+ if isinstance(box, (list, tuple)):
298
+ return (len(box[0]) == 0)
299
+ return (len(box) == 0)
300
+
301
+ def __check_format__(self, raw_input: List[dict]) -> None:
302
+ """Check if the input conform with mmscan evaluation format. Transform
303
+ the input box format.
304
+
305
+ Args:
306
+ raw_input (list[dict]): The input of VG evaluator.
307
+ """
308
+ assert isinstance(
309
+ raw_input,
310
+ list), 'The input of VG evaluator should be a list of dict. '
311
+ raw_input = raw_input
312
+
313
+ for _index in tqdm(range(len(raw_input))):
314
+ if 'index' not in raw_input[_index]:
315
+ raw_input[_index]['index'] = len(self.save_buffer) + _index
316
+
317
+ if 'subclass' not in raw_input[_index]:
318
+ raw_input[_index]['subclass'] = 'non-class'
319
+
320
+ assert 'gt_bboxes' in raw_input[_index]
321
+ assert 'pred_bboxes' in raw_input[_index]
322
+ assert 'pred_scores' in raw_input[_index]
323
+
324
+ for mode in ['pred_bboxes', 'gt_bboxes']:
325
+ if (isinstance(raw_input[_index][mode], dict)
326
+ and 'center' in raw_input[_index][mode]):
327
+ raw_input[_index][mode] = [
328
+ torch.tensor(raw_input[_index][mode]['center']),
329
+ torch.tensor(raw_input[_index][mode]['size']).to(
330
+ torch.float32),
331
+ torch.tensor(raw_input[_index][mode]['rot']).to(
332
+ torch.float32)
333
+ ]
334
+
335
+
336
+ def trun_box(box_list):
337
+ trun_box_list = []
338
+ for box in box_list:
339
+ trun_box_list.append([round(x,2) for x in box])
340
+ return trun_box_list
341
+
342
+ def evaluation_for_challenge(gt_data,pred_data):
343
+ inputs = []
344
+ for sample_ID in gt_data:
345
+ batch_result = {}
346
+ if sample_ID not in pred_data:
347
+ batch_result["pred_scores"] = torch.zeros(0,9)
348
+ batch_result["pred_bboxes"] = torch.zeros(0,)
349
+ else:
350
+ batch_result["pred_scores"] = torch.tensor(pred_data[sample_ID]["score"])
351
+ batch_result["pred_bboxes"] = torch.tensor(trun_box(pred_data[sample_ID]["pred_bboxes"]))
352
+
353
+ batch_result["gt_bboxes"] = torch.tensor(gt_data[sample_ID])
354
+ batch_result["subclass"] = sample_ID.split('__')[0]
355
+ inputs.append(batch_result)
356
+
357
+ vg_evaluator = VisualGroundingEvaluator()
358
+ vg_evaluator.update(inputs)
359
+ results = vg_evaluator.start_evaluation()
360
+ #vg_evaluator.print_result()
361
+ return results['overall']