|
import torch
|
|
from det_map.det.dal.mmdet3d.models.builder import BBOX_ASSIGNERS
|
|
from mmdet.core.bbox.assigners import AssignResult
|
|
from mmdet.core.bbox.assigners import BaseAssigner
|
|
from mmdet.core.bbox.match_costs import build_match_cost
|
|
import torch.nn.functional as F
|
|
from mmdet.core.bbox.transforms import bbox_xyxy_to_cxcywh, bbox_cxcywh_to_xyxy
|
|
try:
|
|
from scipy.optimize import linear_sum_assignment
|
|
except ImportError:
|
|
linear_sum_assignment = None
|
|
|
|
def denormalize_3d_pts(pts, pc_range):
|
|
new_pts = pts.clone()
|
|
new_pts[...,0:1] = (pts[..., 0:1]*(pc_range[3] -
|
|
pc_range[0]) + pc_range[0])
|
|
new_pts[...,1:2] = (pts[...,1:2]*(pc_range[4] -
|
|
pc_range[1]) + pc_range[1])
|
|
new_pts[...,2:3] = (pts[...,2:3]*(pc_range[5] -
|
|
pc_range[2]) + pc_range[2])
|
|
return new_pts
|
|
|
|
def normalize_3d_pts(pts, pc_range):
|
|
patch_h = pc_range[4]-pc_range[1]
|
|
patch_w = pc_range[3]-pc_range[0]
|
|
patch_z = pc_range[5]-pc_range[2]
|
|
new_pts = pts.clone()
|
|
new_pts[...,0:1] = pts[..., 0:1] - pc_range[0]
|
|
new_pts[...,1:2] = pts[...,1:2] - pc_range[1]
|
|
new_pts[...,2:3] = pts[...,2:3] - pc_range[2]
|
|
factor = pts.new_tensor([patch_w, patch_h,patch_z])
|
|
normalized_pts = new_pts / factor
|
|
return normalized_pts
|
|
|
|
def normalize_2d_bbox(bboxes, pc_range):
|
|
|
|
patch_h = pc_range[4]-pc_range[1]
|
|
patch_w = pc_range[3]-pc_range[0]
|
|
cxcywh_bboxes = bbox_xyxy_to_cxcywh(bboxes)
|
|
cxcywh_bboxes[...,0:1] = cxcywh_bboxes[..., 0:1] - pc_range[0]
|
|
cxcywh_bboxes[...,1:2] = cxcywh_bboxes[...,1:2] - pc_range[1]
|
|
factor = bboxes.new_tensor([patch_w, patch_h,patch_w,patch_h])
|
|
|
|
normalized_bboxes = cxcywh_bboxes / factor
|
|
return normalized_bboxes
|
|
|
|
def normalize_2d_pts(pts, pc_range):
|
|
patch_h = pc_range[4]-pc_range[1]
|
|
patch_w = pc_range[3]-pc_range[0]
|
|
new_pts = pts.clone()
|
|
new_pts[...,0:1] = pts[..., 0:1] - pc_range[0]
|
|
new_pts[...,1:2] = pts[...,1:2] - pc_range[1]
|
|
factor = pts.new_tensor([patch_w, patch_h])
|
|
normalized_pts = new_pts / factor
|
|
return normalized_pts
|
|
|
|
def denormalize_2d_bbox(bboxes, pc_range):
|
|
|
|
bboxes = bbox_cxcywh_to_xyxy(bboxes)
|
|
bboxes[..., 0::2] = (bboxes[..., 0::2]*(pc_range[3] -
|
|
pc_range[0]) + pc_range[0])
|
|
bboxes[..., 1::2] = (bboxes[..., 1::2]*(pc_range[4] -
|
|
pc_range[1]) + pc_range[1])
|
|
|
|
return bboxes
|
|
def denormalize_2d_pts(pts, pc_range):
|
|
new_pts = pts.clone()
|
|
new_pts[...,0:1] = (pts[..., 0:1]*(pc_range[3] -
|
|
pc_range[0]) + pc_range[0])
|
|
new_pts[...,1:2] = (pts[...,1:2]*(pc_range[4] -
|
|
pc_range[1]) + pc_range[1])
|
|
return new_pts
|
|
|
|
@BBOX_ASSIGNERS.register_module()
|
|
class MapTRAssigner(BaseAssigner):
|
|
"""Computes one-to-one matching between predictions and ground truth.
|
|
This class computes an assignment between the targets and the predictions
|
|
based on the costs. The costs are weighted sum of three components:
|
|
classification cost, regression L1 cost and regression iou cost. The
|
|
targets don't include the no_object, so generally there are more
|
|
predictions than targets. After the one-to-one matching, the un-matched
|
|
are treated as backgrounds. Thus each query prediction will be assigned
|
|
with `0` or a positive integer indicating the ground truth index:
|
|
- 0: negative sample, no assigned gt
|
|
- positive integer: positive sample, index (1-based) of assigned gt
|
|
Args:
|
|
cls_weight (int | float, optional): The scale factor for classification
|
|
cost. Default 1.0.
|
|
bbox_weight (int | float, optional): The scale factor for regression
|
|
L1 cost. Default 1.0.
|
|
iou_weight (int | float, optional): The scale factor for regression
|
|
iou cost. Default 1.0.
|
|
iou_calculator (dict | optional): The config for the iou calculation.
|
|
Default type `BboxOverlaps2D`.
|
|
iou_mode (str | optional): "iou" (intersection over union), "iof"
|
|
(intersection over foreground), or "giou" (generalized
|
|
intersection over union). Default "giou".
|
|
"""
|
|
|
|
def __init__(self,
|
|
z_cfg = dict(
|
|
pred_z_flag=False,
|
|
gt_z_flag=False,
|
|
),
|
|
cls_cost=dict(type='ClassificationCost', weight=2.),
|
|
reg_cost=dict(type='BBoxL1Cost', weight=1.0),
|
|
iou_cost=dict(type='IoUCost', weight=0.0),
|
|
pts_cost=dict(type='ChamferDistance',loss_src_weight=1.0,loss_dst_weight=1.0),
|
|
pc_range=None):
|
|
self.z_cfg = z_cfg
|
|
self.cls_cost = build_match_cost(cls_cost)
|
|
|
|
|
|
self.pts_cost = build_match_cost(pts_cost)
|
|
self.pc_range = pc_range
|
|
|
|
def assign(self,
|
|
bbox_pred,
|
|
cls_pred,
|
|
pts_pred,
|
|
gt_bboxes,
|
|
gt_labels,
|
|
gt_pts,
|
|
gt_bboxes_ignore=None,
|
|
eps=1e-7):
|
|
"""Computes one-to-one matching based on the weighted costs.
|
|
This method assign each query prediction to a ground truth or
|
|
background. The `assigned_gt_inds` with -1 means don't care,
|
|
0 means negative sample, and positive number is the index (1-based)
|
|
of assigned gt.
|
|
The assignment is done in the following steps, the order matters.
|
|
1. assign every prediction to -1
|
|
2. compute the weighted costs
|
|
3. do Hungarian matching on CPU based on the costs
|
|
4. assign all to 0 (background) first, then for each matched pair
|
|
between predictions and gts, treat this prediction as foreground
|
|
and assign the corresponding gt index (plus 1) to it.
|
|
Args:
|
|
bbox_pred (Tensor): Predicted boxes with normalized coordinates
|
|
(cx, cy, w, h), which are all in range [0, 1]. Shape
|
|
[num_query, 4].
|
|
cls_pred (Tensor): Predicted classification logits, shape
|
|
[num_query, num_class].
|
|
gt_bboxes (Tensor): Ground truth boxes with unnormalized
|
|
coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
|
|
gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
|
|
gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
|
|
labelled as `ignored`. Default None.
|
|
eps (int | float, optional): A value added to the denominator for
|
|
numerical stability. Default 1e-7.
|
|
Returns:
|
|
:obj:`AssignResult`: The assigned result.
|
|
"""
|
|
|
|
|
|
assert gt_bboxes_ignore is None, \
|
|
'Only case when gt_bboxes_ignore is None is supported.'
|
|
assert bbox_pred.shape[-1] == 4, \
|
|
'Only support bbox pred shape is 4 dims'
|
|
|
|
num_gts, num_bboxes = gt_pts.size(0), pts_pred.size(0)
|
|
|
|
|
|
|
|
assigned_gt_inds = bbox_pred.new_full((num_bboxes, ),
|
|
-1,
|
|
dtype=torch.long)
|
|
assigned_labels = bbox_pred.new_full((num_bboxes, ),
|
|
-1,
|
|
dtype=torch.long)
|
|
if num_gts == 0 or num_bboxes == 0:
|
|
|
|
if num_gts == 0:
|
|
|
|
assigned_gt_inds[:] = 0
|
|
return AssignResult(
|
|
num_gts, assigned_gt_inds, None, labels=assigned_labels), None
|
|
|
|
|
|
|
|
cls_cost = self.cls_cost(cls_pred, gt_labels)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
reg_cost = 0
|
|
iou_cost = 0
|
|
_, num_orders, num_pts_per_gtline, num_coords = gt_pts.shape
|
|
normalized_gt_pts = normalize_2d_pts(gt_pts, self.pc_range) if not self.z_cfg['gt_z_flag'] \
|
|
else normalize_3d_pts(gt_pts, self.pc_range)
|
|
num_pts_per_predline = pts_pred.size(1)
|
|
if num_pts_per_predline != num_pts_per_gtline:
|
|
pts_pred_interpolated = F.interpolate(pts_pred.permute(0,2,1),size=(num_pts_per_gtline),
|
|
mode='linear', align_corners=True)
|
|
pts_pred_interpolated = pts_pred_interpolated.permute(0,2,1).contiguous()
|
|
else:
|
|
pts_pred_interpolated = pts_pred
|
|
|
|
normalized_gt_pts = normalized_gt_pts.to(pts_pred_interpolated.device)
|
|
pts_cost_ordered = self.pts_cost(pts_pred_interpolated, normalized_gt_pts)
|
|
pts_cost_ordered = pts_cost_ordered.view(num_bboxes, num_gts, num_orders)
|
|
pts_cost, order_index = torch.min(pts_cost_ordered, 2)
|
|
|
|
|
|
|
|
|
|
cost = cls_cost + reg_cost + iou_cost + pts_cost
|
|
assert(reg_cost == 0 and iou_cost == 0)
|
|
|
|
cost = cost.detach().cpu()
|
|
if linear_sum_assignment is None:
|
|
raise ImportError('Please run "pip install scipy" '
|
|
'to install scipy first.')
|
|
matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
|
|
matched_row_inds = torch.from_numpy(matched_row_inds).to(
|
|
bbox_pred.device)
|
|
matched_col_inds = torch.from_numpy(matched_col_inds).to(
|
|
bbox_pred.device)
|
|
matched_row_inds = matched_row_inds.cpu()
|
|
|
|
|
|
|
|
assigned_gt_inds[:] = 0
|
|
|
|
assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
|
|
assigned_labels[matched_row_inds] = gt_labels[matched_col_inds.cpu()].to(assigned_labels.device)
|
|
return AssignResult(
|
|
num_gts, assigned_gt_inds, None, labels=assigned_labels), order_index
|
|
|
|
|
|
|
|
|