|
|
|
import logging
|
|
from typing import Callable, Dict, List, Optional, Tuple, Union
|
|
import torch
|
|
from torch import nn
|
|
from torch.nn import functional as F
|
|
|
|
from detectron2.config import configurable
|
|
from detectron2.data.detection_utils import get_fed_loss_cls_weights
|
|
from detectron2.layers import ShapeSpec, batched_nms, cat, cross_entropy, nonzero_tuple
|
|
from detectron2.modeling.box_regression import Box2BoxTransform, _dense_box_regression_loss
|
|
from detectron2.structures import Boxes, Instances
|
|
from detectron2.utils.events import get_event_storage
|
|
|
|
__all__ = ["fast_rcnn_inference", "FastRCNNOutputLayers"]
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
"""
|
|
Shape shorthand in this module:
|
|
|
|
N: number of images in the minibatch
|
|
R: number of ROIs, combined over all images, in the minibatch
|
|
Ri: number of ROIs in image i
|
|
K: number of foreground classes. E.g.,there are 80 foreground classes in COCO.
|
|
|
|
Naming convention:
|
|
|
|
deltas: refers to the 4-d (dx, dy, dw, dh) deltas that parameterize the box2box
|
|
transform (see :class:`box_regression.Box2BoxTransform`).
|
|
|
|
pred_class_logits: predicted class scores in [-inf, +inf]; use
|
|
softmax(pred_class_logits) to estimate P(class).
|
|
|
|
gt_classes: ground-truth classification labels in [0, K], where [0, K) represent
|
|
foreground object classes and K represents the background class.
|
|
|
|
pred_proposal_deltas: predicted box2box transform deltas for transforming proposals
|
|
to detection box predictions.
|
|
|
|
gt_proposal_deltas: ground-truth box2box transform deltas
|
|
"""
|
|
|
|
|
|
def fast_rcnn_inference(
|
|
boxes: List[torch.Tensor],
|
|
scores: List[torch.Tensor],
|
|
image_shapes: List[Tuple[int, int]],
|
|
score_thresh: float,
|
|
nms_thresh: float,
|
|
topk_per_image: int,
|
|
):
|
|
"""
|
|
Call `fast_rcnn_inference_single_image` for all images.
|
|
|
|
Args:
|
|
boxes (list[Tensor]): A list of Tensors of predicted class-specific or class-agnostic
|
|
boxes for each image. Element i has shape (Ri, K * 4) if doing
|
|
class-specific regression, or (Ri, 4) if doing class-agnostic
|
|
regression, where Ri is the number of predicted objects for image i.
|
|
This is compatible with the output of :meth:`FastRCNNOutputLayers.predict_boxes`.
|
|
scores (list[Tensor]): A list of Tensors of predicted class scores for each image.
|
|
Element i has shape (Ri, K + 1), where Ri is the number of predicted objects
|
|
for image i. Compatible with the output of :meth:`FastRCNNOutputLayers.predict_probs`.
|
|
image_shapes (list[tuple]): A list of (width, height) tuples for each image in the batch.
|
|
score_thresh (float): Only return detections with a confidence score exceeding this
|
|
threshold.
|
|
nms_thresh (float): The threshold to use for box non-maximum suppression. Value in [0, 1].
|
|
topk_per_image (int): The number of top scoring detections to return. Set < 0 to return
|
|
all detections.
|
|
|
|
Returns:
|
|
instances: (list[Instances]): A list of N instances, one for each image in the batch,
|
|
that stores the topk most confidence detections.
|
|
kept_indices: (list[Tensor]): A list of 1D tensor of length of N, each element indicates
|
|
the corresponding boxes/scores index in [0, Ri) from the input, for image i.
|
|
"""
|
|
result_per_image = [
|
|
fast_rcnn_inference_single_image(
|
|
boxes_per_image, scores_per_image, image_shape, score_thresh, nms_thresh, topk_per_image
|
|
)
|
|
for scores_per_image, boxes_per_image, image_shape in zip(scores, boxes, image_shapes)
|
|
]
|
|
return [x[0] for x in result_per_image], [x[1] for x in result_per_image]
|
|
|
|
|
|
def _log_classification_stats(pred_logits, gt_classes, prefix="fast_rcnn"):
|
|
"""
|
|
Log the classification metrics to EventStorage.
|
|
|
|
Args:
|
|
pred_logits: Rx(K+1) logits. The last column is for background class.
|
|
gt_classes: R labels
|
|
"""
|
|
num_instances = gt_classes.numel()
|
|
if num_instances == 0:
|
|
return
|
|
pred_classes = pred_logits.argmax(dim=1)
|
|
bg_class_ind = pred_logits.shape[1] - 1
|
|
|
|
fg_inds = (gt_classes >= 0) & (gt_classes < bg_class_ind)
|
|
num_fg = fg_inds.nonzero().numel()
|
|
fg_gt_classes = gt_classes[fg_inds]
|
|
fg_pred_classes = pred_classes[fg_inds]
|
|
|
|
num_false_negative = (fg_pred_classes == bg_class_ind).nonzero().numel()
|
|
num_accurate = (pred_classes == gt_classes).nonzero().numel()
|
|
fg_num_accurate = (fg_pred_classes == fg_gt_classes).nonzero().numel()
|
|
|
|
storage = get_event_storage()
|
|
storage.put_scalar(f"{prefix}/cls_accuracy", num_accurate / num_instances)
|
|
if num_fg > 0:
|
|
storage.put_scalar(f"{prefix}/fg_cls_accuracy", fg_num_accurate / num_fg)
|
|
storage.put_scalar(f"{prefix}/false_negative", num_false_negative / num_fg)
|
|
|
|
|
|
def fast_rcnn_inference_single_image(
|
|
boxes,
|
|
scores,
|
|
image_shape: Tuple[int, int],
|
|
score_thresh: float,
|
|
nms_thresh: float,
|
|
topk_per_image: int,
|
|
):
|
|
"""
|
|
Single-image inference. Return bounding-box detection results by thresholding
|
|
on scores and applying non-maximum suppression (NMS).
|
|
|
|
Args:
|
|
Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
|
|
per image.
|
|
|
|
Returns:
|
|
Same as `fast_rcnn_inference`, but for only one image.
|
|
"""
|
|
valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1)
|
|
if not valid_mask.all():
|
|
boxes = boxes[valid_mask]
|
|
scores = scores[valid_mask]
|
|
|
|
scores = scores[:, :-1]
|
|
num_bbox_reg_classes = boxes.shape[1] // 4
|
|
|
|
boxes = Boxes(boxes.reshape(-1, 4))
|
|
boxes.clip(image_shape)
|
|
boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)
|
|
|
|
|
|
|
|
filter_mask = scores > score_thresh
|
|
|
|
|
|
filter_inds = filter_mask.nonzero()
|
|
if num_bbox_reg_classes == 1:
|
|
boxes = boxes[filter_inds[:, 0], 0]
|
|
else:
|
|
boxes = boxes[filter_mask]
|
|
scores = scores[filter_mask]
|
|
|
|
|
|
keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
|
|
if topk_per_image >= 0:
|
|
keep = keep[:topk_per_image]
|
|
boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
|
|
|
|
result = Instances(image_shape)
|
|
result.pred_boxes = Boxes(boxes)
|
|
result.scores = scores
|
|
result.pred_classes = filter_inds[:, 1]
|
|
return result, filter_inds[:, 0]
|
|
|
|
|
|
class FastRCNNOutputLayers(nn.Module):
|
|
"""
|
|
Two linear layers for predicting Fast R-CNN outputs:
|
|
|
|
1. proposal-to-detection box regression deltas
|
|
2. classification scores
|
|
"""
|
|
|
|
@configurable
|
|
def __init__(
|
|
self,
|
|
input_shape: ShapeSpec,
|
|
*,
|
|
box2box_transform,
|
|
num_classes: int,
|
|
test_score_thresh: float = 0.0,
|
|
test_nms_thresh: float = 0.5,
|
|
test_topk_per_image: int = 100,
|
|
cls_agnostic_bbox_reg: bool = False,
|
|
smooth_l1_beta: float = 0.0,
|
|
box_reg_loss_type: str = "smooth_l1",
|
|
loss_weight: Union[float, Dict[str, float]] = 1.0,
|
|
use_fed_loss: bool = False,
|
|
use_sigmoid_ce: bool = False,
|
|
get_fed_loss_cls_weights: Optional[Callable] = None,
|
|
fed_loss_num_classes: int = 50,
|
|
):
|
|
"""
|
|
NOTE: this interface is experimental.
|
|
|
|
Args:
|
|
input_shape (ShapeSpec): shape of the input feature to this module
|
|
box2box_transform (Box2BoxTransform or Box2BoxTransformRotated):
|
|
num_classes (int): number of foreground classes
|
|
test_score_thresh (float): threshold to filter predictions results.
|
|
test_nms_thresh (float): NMS threshold for prediction results.
|
|
test_topk_per_image (int): number of top predictions to produce per image.
|
|
cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
|
|
smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if
|
|
`box_reg_loss_type` is "smooth_l1"
|
|
box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou",
|
|
"diou", "ciou"
|
|
loss_weight (float|dict): weights to use for losses. Can be single float for weighting
|
|
all losses, or a dict of individual weightings. Valid dict keys are:
|
|
* "loss_cls": applied to classification loss
|
|
* "loss_box_reg": applied to box regression loss
|
|
use_fed_loss (bool): whether to use federated loss which samples additional negative
|
|
classes to calculate the loss
|
|
use_sigmoid_ce (bool): whether to calculate the loss using weighted average of binary
|
|
cross entropy with logits. This could be used together with federated loss
|
|
get_fed_loss_cls_weights (Callable): a callable which takes dataset name and frequency
|
|
weight power, and returns the probabilities to sample negative classes for
|
|
federated loss. The implementation can be found in
|
|
detectron2/data/detection_utils.py
|
|
fed_loss_num_classes (int): number of federated classes to keep in total
|
|
"""
|
|
super().__init__()
|
|
if isinstance(input_shape, int):
|
|
input_shape = ShapeSpec(channels=input_shape)
|
|
self.num_classes = num_classes
|
|
input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1)
|
|
|
|
self.cls_score = nn.Linear(input_size, num_classes + 1)
|
|
num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes
|
|
box_dim = len(box2box_transform.weights)
|
|
self.bbox_pred = nn.Linear(input_size, num_bbox_reg_classes * box_dim)
|
|
|
|
nn.init.normal_(self.cls_score.weight, std=0.01)
|
|
nn.init.normal_(self.bbox_pred.weight, std=0.001)
|
|
for l in [self.cls_score, self.bbox_pred]:
|
|
nn.init.constant_(l.bias, 0)
|
|
|
|
self.box2box_transform = box2box_transform
|
|
self.smooth_l1_beta = smooth_l1_beta
|
|
self.test_score_thresh = test_score_thresh
|
|
self.test_nms_thresh = test_nms_thresh
|
|
self.test_topk_per_image = test_topk_per_image
|
|
self.box_reg_loss_type = box_reg_loss_type
|
|
if isinstance(loss_weight, float):
|
|
loss_weight = {"loss_cls": loss_weight, "loss_box_reg": loss_weight}
|
|
self.loss_weight = loss_weight
|
|
self.use_fed_loss = use_fed_loss
|
|
self.use_sigmoid_ce = use_sigmoid_ce
|
|
self.fed_loss_num_classes = fed_loss_num_classes
|
|
|
|
if self.use_fed_loss:
|
|
assert self.use_sigmoid_ce, "Please use sigmoid cross entropy loss with federated loss"
|
|
fed_loss_cls_weights = get_fed_loss_cls_weights()
|
|
assert (
|
|
len(fed_loss_cls_weights) == self.num_classes
|
|
), "Please check the provided fed_loss_cls_weights. Their size should match num_classes"
|
|
self.register_buffer("fed_loss_cls_weights", fed_loss_cls_weights)
|
|
|
|
@classmethod
|
|
def from_config(cls, cfg, input_shape):
|
|
return {
|
|
"input_shape": input_shape,
|
|
"box2box_transform": Box2BoxTransform(weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS),
|
|
|
|
"num_classes" : cfg.MODEL.ROI_HEADS.NUM_CLASSES,
|
|
"cls_agnostic_bbox_reg" : cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG,
|
|
"smooth_l1_beta" : cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA,
|
|
"test_score_thresh" : cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST,
|
|
"test_nms_thresh" : cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST,
|
|
"test_topk_per_image" : cfg.TEST.DETECTIONS_PER_IMAGE,
|
|
"box_reg_loss_type" : cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE,
|
|
"loss_weight" : {"loss_box_reg": cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT},
|
|
"use_fed_loss" : cfg.MODEL.ROI_BOX_HEAD.USE_FED_LOSS,
|
|
"use_sigmoid_ce" : cfg.MODEL.ROI_BOX_HEAD.USE_SIGMOID_CE,
|
|
"get_fed_loss_cls_weights" : lambda: get_fed_loss_cls_weights(dataset_names=cfg.DATASETS.TRAIN, freq_weight_power=cfg.MODEL.ROI_BOX_HEAD.FED_LOSS_FREQ_WEIGHT_POWER),
|
|
"fed_loss_num_classes" : cfg.MODEL.ROI_BOX_HEAD.FED_LOSS_NUM_CLASSES,
|
|
|
|
}
|
|
|
|
def forward(self, x):
|
|
"""
|
|
Args:
|
|
x: per-region features of shape (N, ...) for N bounding boxes to predict.
|
|
|
|
Returns:
|
|
(Tensor, Tensor):
|
|
First tensor: shape (N,K+1), scores for each of the N box. Each row contains the
|
|
scores for K object categories and 1 background class.
|
|
|
|
Second tensor: bounding box regression deltas for each box. Shape is shape (N,Kx4),
|
|
or (N,4) for class-agnostic regression.
|
|
"""
|
|
if x.dim() > 2:
|
|
x = torch.flatten(x, start_dim=1)
|
|
scores = self.cls_score(x)
|
|
proposal_deltas = self.bbox_pred(x)
|
|
return scores, proposal_deltas
|
|
|
|
def losses(self, predictions, proposals):
|
|
"""
|
|
Args:
|
|
predictions: return values of :meth:`forward()`.
|
|
proposals (list[Instances]): proposals that match the features that were used
|
|
to compute predictions. The fields ``proposal_boxes``, ``gt_boxes``,
|
|
``gt_classes`` are expected.
|
|
|
|
Returns:
|
|
Dict[str, Tensor]: dict of losses
|
|
"""
|
|
scores, proposal_deltas = predictions
|
|
|
|
|
|
gt_classes = (
|
|
cat([p.gt_classes for p in proposals], dim=0) if len(proposals) else torch.empty(0)
|
|
)
|
|
_log_classification_stats(scores, gt_classes)
|
|
|
|
|
|
if len(proposals):
|
|
proposal_boxes = cat([p.proposal_boxes.tensor for p in proposals], dim=0)
|
|
assert not proposal_boxes.requires_grad, "Proposals should not require gradients!"
|
|
|
|
|
|
|
|
|
|
gt_boxes = cat(
|
|
[(p.gt_boxes if p.has("gt_boxes") else p.proposal_boxes).tensor for p in proposals],
|
|
dim=0,
|
|
)
|
|
else:
|
|
proposal_boxes = gt_boxes = torch.empty((0, 4), device=proposal_deltas.device)
|
|
|
|
if self.use_sigmoid_ce:
|
|
loss_cls = self.sigmoid_cross_entropy_loss(scores, gt_classes)
|
|
else:
|
|
loss_cls = cross_entropy(scores, gt_classes, reduction="mean")
|
|
|
|
losses = {
|
|
"loss_cls": loss_cls,
|
|
"loss_box_reg": self.box_reg_loss(
|
|
proposal_boxes, gt_boxes, proposal_deltas, gt_classes
|
|
),
|
|
}
|
|
return {k: v * self.loss_weight.get(k, 1.0) for k, v in losses.items()}
|
|
|
|
|
|
|
|
def get_fed_loss_classes(self, gt_classes, num_fed_loss_classes, num_classes, weight):
|
|
"""
|
|
Args:
|
|
gt_classes: a long tensor of shape R that contains the gt class label of each proposal.
|
|
num_fed_loss_classes: minimum number of classes to keep when calculating federated loss.
|
|
Will sample negative classes if number of unique gt_classes is smaller than this value.
|
|
num_classes: number of foreground classes
|
|
weight: probabilities used to sample negative classes
|
|
|
|
Returns:
|
|
Tensor:
|
|
classes to keep when calculating the federated loss, including both unique gt
|
|
classes and sampled negative classes.
|
|
"""
|
|
unique_gt_classes = torch.unique(gt_classes)
|
|
prob = unique_gt_classes.new_ones(num_classes + 1).float()
|
|
prob[-1] = 0
|
|
if len(unique_gt_classes) < num_fed_loss_classes:
|
|
prob[:num_classes] = weight.float().clone()
|
|
prob[unique_gt_classes] = 0
|
|
sampled_negative_classes = torch.multinomial(
|
|
prob, num_fed_loss_classes - len(unique_gt_classes), replacement=False
|
|
)
|
|
fed_loss_classes = torch.cat([unique_gt_classes, sampled_negative_classes])
|
|
else:
|
|
fed_loss_classes = unique_gt_classes
|
|
return fed_loss_classes
|
|
|
|
|
|
|
|
def sigmoid_cross_entropy_loss(self, pred_class_logits, gt_classes):
|
|
"""
|
|
Args:
|
|
pred_class_logits: shape (N, K+1), scores for each of the N box. Each row contains the
|
|
scores for K object categories and 1 background class
|
|
gt_classes: a long tensor of shape R that contains the gt class label of each proposal.
|
|
"""
|
|
if pred_class_logits.numel() == 0:
|
|
return pred_class_logits.new_zeros([1])[0]
|
|
|
|
N = pred_class_logits.shape[0]
|
|
K = pred_class_logits.shape[1] - 1
|
|
|
|
target = pred_class_logits.new_zeros(N, K + 1)
|
|
target[range(len(gt_classes)), gt_classes] = 1
|
|
target = target[:, :K]
|
|
|
|
cls_loss = F.binary_cross_entropy_with_logits(
|
|
pred_class_logits[:, :-1], target, reduction="none"
|
|
)
|
|
|
|
if self.use_fed_loss:
|
|
fed_loss_classes = self.get_fed_loss_classes(
|
|
gt_classes,
|
|
num_fed_loss_classes=self.fed_loss_num_classes,
|
|
num_classes=K,
|
|
weight=self.fed_loss_cls_weights,
|
|
)
|
|
fed_loss_classes_mask = fed_loss_classes.new_zeros(K + 1)
|
|
fed_loss_classes_mask[fed_loss_classes] = 1
|
|
fed_loss_classes_mask = fed_loss_classes_mask[:K]
|
|
weight = fed_loss_classes_mask.view(1, K).expand(N, K).float()
|
|
else:
|
|
weight = 1
|
|
|
|
loss = torch.sum(cls_loss * weight) / N
|
|
return loss
|
|
|
|
def box_reg_loss(self, proposal_boxes, gt_boxes, pred_deltas, gt_classes):
|
|
"""
|
|
Args:
|
|
proposal_boxes/gt_boxes are tensors with the same shape (R, 4 or 5).
|
|
pred_deltas has shape (R, 4 or 5), or (R, num_classes * (4 or 5)).
|
|
gt_classes is a long tensor of shape R, the gt class label of each proposal.
|
|
R shall be the number of proposals.
|
|
"""
|
|
box_dim = proposal_boxes.shape[1]
|
|
|
|
fg_inds = nonzero_tuple((gt_classes >= 0) & (gt_classes < self.num_classes))[0]
|
|
if pred_deltas.shape[1] == box_dim:
|
|
fg_pred_deltas = pred_deltas[fg_inds]
|
|
else:
|
|
fg_pred_deltas = pred_deltas.view(-1, self.num_classes, box_dim)[
|
|
fg_inds, gt_classes[fg_inds]
|
|
]
|
|
|
|
loss_box_reg = _dense_box_regression_loss(
|
|
[proposal_boxes[fg_inds]],
|
|
self.box2box_transform,
|
|
[fg_pred_deltas.unsqueeze(0)],
|
|
[gt_boxes[fg_inds]],
|
|
...,
|
|
self.box_reg_loss_type,
|
|
self.smooth_l1_beta,
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return loss_box_reg / max(gt_classes.numel(), 1.0)
|
|
|
|
def inference(self, predictions: Tuple[torch.Tensor, torch.Tensor], proposals: List[Instances]):
|
|
"""
|
|
Args:
|
|
predictions: return values of :meth:`forward()`.
|
|
proposals (list[Instances]): proposals that match the features that were
|
|
used to compute predictions. The ``proposal_boxes`` field is expected.
|
|
|
|
Returns:
|
|
list[Instances]: same as `fast_rcnn_inference`.
|
|
list[Tensor]: same as `fast_rcnn_inference`.
|
|
"""
|
|
boxes = self.predict_boxes(predictions, proposals)
|
|
scores = self.predict_probs(predictions, proposals)
|
|
image_shapes = [x.image_size for x in proposals]
|
|
return fast_rcnn_inference(
|
|
boxes,
|
|
scores,
|
|
image_shapes,
|
|
self.test_score_thresh,
|
|
self.test_nms_thresh,
|
|
self.test_topk_per_image,
|
|
)
|
|
|
|
def predict_boxes_for_gt_classes(self, predictions, proposals):
|
|
"""
|
|
Args:
|
|
predictions: return values of :meth:`forward()`.
|
|
proposals (list[Instances]): proposals that match the features that were used
|
|
to compute predictions. The fields ``proposal_boxes``, ``gt_classes`` are expected.
|
|
|
|
Returns:
|
|
list[Tensor]:
|
|
A list of Tensors of predicted boxes for GT classes in case of
|
|
class-specific box head. Element i of the list has shape (Ri, B), where Ri is
|
|
the number of proposals for image i and B is the box dimension (4 or 5)
|
|
"""
|
|
if not len(proposals):
|
|
return []
|
|
scores, proposal_deltas = predictions
|
|
proposal_boxes = cat([p.proposal_boxes.tensor for p in proposals], dim=0)
|
|
N, B = proposal_boxes.shape
|
|
predict_boxes = self.box2box_transform.apply_deltas(
|
|
proposal_deltas, proposal_boxes
|
|
)
|
|
|
|
K = predict_boxes.shape[1] // B
|
|
if K > 1:
|
|
gt_classes = torch.cat([p.gt_classes for p in proposals], dim=0)
|
|
|
|
|
|
gt_classes = gt_classes.clamp_(0, K - 1)
|
|
|
|
predict_boxes = predict_boxes.view(N, K, B)[
|
|
torch.arange(N, dtype=torch.long, device=predict_boxes.device), gt_classes
|
|
]
|
|
num_prop_per_image = [len(p) for p in proposals]
|
|
return predict_boxes.split(num_prop_per_image)
|
|
|
|
def predict_boxes(
|
|
self, predictions: Tuple[torch.Tensor, torch.Tensor], proposals: List[Instances]
|
|
):
|
|
"""
|
|
Args:
|
|
predictions: return values of :meth:`forward()`.
|
|
proposals (list[Instances]): proposals that match the features that were
|
|
used to compute predictions. The ``proposal_boxes`` field is expected.
|
|
|
|
Returns:
|
|
list[Tensor]:
|
|
A list of Tensors of predicted class-specific or class-agnostic boxes
|
|
for each image. Element i has shape (Ri, K * B) or (Ri, B), where Ri is
|
|
the number of proposals for image i and B is the box dimension (4 or 5)
|
|
"""
|
|
if not len(proposals):
|
|
return []
|
|
_, proposal_deltas = predictions
|
|
num_prop_per_image = [len(p) for p in proposals]
|
|
proposal_boxes = cat([p.proposal_boxes.tensor for p in proposals], dim=0)
|
|
predict_boxes = self.box2box_transform.apply_deltas(
|
|
proposal_deltas,
|
|
proposal_boxes,
|
|
)
|
|
return predict_boxes.split(num_prop_per_image)
|
|
|
|
def predict_probs(
|
|
self, predictions: Tuple[torch.Tensor, torch.Tensor], proposals: List[Instances]
|
|
):
|
|
"""
|
|
Args:
|
|
predictions: return values of :meth:`forward()`.
|
|
proposals (list[Instances]): proposals that match the features that were
|
|
used to compute predictions.
|
|
|
|
Returns:
|
|
list[Tensor]:
|
|
A list of Tensors of predicted class probabilities for each image.
|
|
Element i has shape (Ri, K + 1), where Ri is the number of proposals for image i.
|
|
"""
|
|
scores, _ = predictions
|
|
num_inst_per_image = [len(p) for p in proposals]
|
|
if self.use_sigmoid_ce:
|
|
probs = scores.sigmoid()
|
|
else:
|
|
probs = F.softmax(scores, dim=-1)
|
|
return probs.split(num_inst_per_image, dim=0)
|
|
|