Spaces:

KyanChen
/

TTP

Runtime error

App Files Files Community

TTP / mmdet /models /losses /pisa_loss.py

KyanChen

Upload 1861 files

3b96cb1 almost 2 years ago

raw

history blame

7.67 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	from typing import List, Optional, Tuple

	import torch
	import torch.nn as nn
	from torch import Tensor

	from mmdet.structures.bbox import bbox_overlaps
	from ..task_modules.coders import BaseBBoxCoder
	from ..task_modules.samplers import SamplingResult


	def isr_p(cls_score: Tensor,
	bbox_pred: Tensor,
	bbox_targets: Tuple[Tensor],
	rois: Tensor,
	sampling_results: List[SamplingResult],
	loss_cls: nn.Module,
	bbox_coder: BaseBBoxCoder,
	k: float = 2,
	bias: float = 0,
	num_class: int = 80) -> tuple:
	"""Importance-based Sample Reweighting (ISR_P), positive part.

	Args:
	cls_score (Tensor): Predicted classification scores.
	bbox_pred (Tensor): Predicted bbox deltas.
	bbox_targets (tuple[Tensor]): A tuple of bbox targets, the are
	labels, label_weights, bbox_targets, bbox_weights, respectively.
	rois (Tensor): Anchors (single_stage) in shape (n, 4) or RoIs
	(two_stage) in shape (n, 5).
	sampling_results (:obj:`SamplingResult`): Sampling results.
	loss_cls (:obj:`nn.Module`): Classification loss func of the head.
	bbox_coder (:obj:`BaseBBoxCoder`): BBox coder of the head.
	k (float): Power of the non-linear mapping. Defaults to 2.
	bias (float): Shift of the non-linear mapping. Defaults to 0.
	num_class (int): Number of classes, defaults to 80.

	Return:
	tuple([Tensor]): labels, imp_based_label_weights, bbox_targets,
	bbox_target_weights
	"""

	labels, label_weights, bbox_targets, bbox_weights = bbox_targets
	pos_label_inds = ((labels >= 0) &
	(labels < num_class)).nonzero().reshape(-1)
	pos_labels = labels[pos_label_inds]

	# if no positive samples, return the original targets
	num_pos = float(pos_label_inds.size(0))
	if num_pos == 0:
	return labels, label_weights, bbox_targets, bbox_weights

	# merge pos_assigned_gt_inds of per image to a single tensor
	gts = list()
	last_max_gt = 0
	for i in range(len(sampling_results)):
	gt_i = sampling_results[i].pos_assigned_gt_inds
	gts.append(gt_i + last_max_gt)
	if len(gt_i) != 0:
	last_max_gt = gt_i.max() + 1
	gts = torch.cat(gts)
	assert len(gts) == num_pos

	cls_score = cls_score.detach()
	bbox_pred = bbox_pred.detach()

	# For single stage detectors, rois here indicate anchors, in shape (N, 4)
	# For two stage detectors, rois are in shape (N, 5)
	if rois.size(-1) == 5:
	pos_rois = rois[pos_label_inds][:, 1:]
	else:
	pos_rois = rois[pos_label_inds]

	if bbox_pred.size(-1) > 4:
	bbox_pred = bbox_pred.view(bbox_pred.size(0), -1, 4)
	pos_delta_pred = bbox_pred[pos_label_inds, pos_labels].view(-1, 4)
	else:
	pos_delta_pred = bbox_pred[pos_label_inds].view(-1, 4)

	# compute iou of the predicted bbox and the corresponding GT
	pos_delta_target = bbox_targets[pos_label_inds].view(-1, 4)
	pos_bbox_pred = bbox_coder.decode(pos_rois, pos_delta_pred)
	target_bbox_pred = bbox_coder.decode(pos_rois, pos_delta_target)
	ious = bbox_overlaps(pos_bbox_pred, target_bbox_pred, is_aligned=True)

	pos_imp_weights = label_weights[pos_label_inds]
	# Two steps to compute IoU-HLR. Samples are first sorted by IoU locally,
	# then sorted again within the same-rank group
	max_l_num = pos_labels.bincount().max()
	for label in pos_labels.unique():
	l_inds = (pos_labels == label).nonzero().view(-1)
	l_gts = gts[l_inds]
	for t in l_gts.unique():
	t_inds = l_inds[l_gts == t]
	t_ious = ious[t_inds]
	_, t_iou_rank_idx = t_ious.sort(descending=True)
	_, t_iou_rank = t_iou_rank_idx.sort()
	ious[t_inds] += max_l_num - t_iou_rank.float()
	l_ious = ious[l_inds]
	_, l_iou_rank_idx = l_ious.sort(descending=True)
	_, l_iou_rank = l_iou_rank_idx.sort() # IoU-HLR
	# linearly map HLR to label weights
	pos_imp_weights[l_inds] *= (max_l_num - l_iou_rank.float()) / max_l_num

	pos_imp_weights = (bias + pos_imp_weights * (1 - bias)).pow(k)

	# normalize to make the new weighted loss value equal to the original loss
	pos_loss_cls = loss_cls(
	cls_score[pos_label_inds], pos_labels, reduction_override='none')
	if pos_loss_cls.dim() > 1:
	ori_pos_loss_cls = pos_loss_cls * label_weights[pos_label_inds][:,
	None]
	new_pos_loss_cls = pos_loss_cls * pos_imp_weights[:, None]
	else:
	ori_pos_loss_cls = pos_loss_cls * label_weights[pos_label_inds]
	new_pos_loss_cls = pos_loss_cls * pos_imp_weights
	pos_loss_cls_ratio = ori_pos_loss_cls.sum() / new_pos_loss_cls.sum()
	pos_imp_weights = pos_imp_weights * pos_loss_cls_ratio
	label_weights[pos_label_inds] = pos_imp_weights

	bbox_targets = labels, label_weights, bbox_targets, bbox_weights
	return bbox_targets


	def carl_loss(cls_score: Tensor,
	labels: Tensor,
	bbox_pred: Tensor,
	bbox_targets: Tensor,
	loss_bbox: nn.Module,
	k: float = 1,
	bias: float = 0.2,
	avg_factor: Optional[int] = None,
	sigmoid: bool = False,
	num_class: int = 80) -> dict:
	"""Classification-Aware Regression Loss (CARL).

	Args:
	cls_score (Tensor): Predicted classification scores.
	labels (Tensor): Targets of classification.
	bbox_pred (Tensor): Predicted bbox deltas.
	bbox_targets (Tensor): Target of bbox regression.
	loss_bbox (func): Regression loss func of the head.
	bbox_coder (obj): BBox coder of the head.
	k (float): Power of the non-linear mapping. Defaults to 1.
	bias (float): Shift of the non-linear mapping. Defaults to 0.2.
	avg_factor (int, optional): Average factor used in regression loss.
	sigmoid (bool): Activation of the classification score.
	num_class (int): Number of classes, defaults to 80.

	Return:
	dict: CARL loss dict.
	"""
	pos_label_inds = ((labels >= 0) &
	(labels < num_class)).nonzero().reshape(-1)
	if pos_label_inds.numel() == 0:
	return dict(loss_carl=cls_score.sum()[None] * 0.)
	pos_labels = labels[pos_label_inds]

	# multiply pos_cls_score with the corresponding bbox weight
	# and remain gradient
	if sigmoid:
	pos_cls_score = cls_score.sigmoid()[pos_label_inds, pos_labels]
	else:
	pos_cls_score = cls_score.softmax(-1)[pos_label_inds, pos_labels]
	carl_loss_weights = (bias + (1 - bias) * pos_cls_score).pow(k)

	# normalize carl_loss_weight to make its sum equal to num positive
	num_pos = float(pos_cls_score.size(0))
	weight_ratio = num_pos / carl_loss_weights.sum()
	carl_loss_weights *= weight_ratio

	if avg_factor is None:
	avg_factor = bbox_targets.size(0)
	# if is class agnostic, bbox pred is in shape (N, 4)
	# otherwise, bbox pred is in shape (N, #classes, 4)
	if bbox_pred.size(-1) > 4:
	bbox_pred = bbox_pred.view(bbox_pred.size(0), -1, 4)
	pos_bbox_preds = bbox_pred[pos_label_inds, pos_labels]
	else:
	pos_bbox_preds = bbox_pred[pos_label_inds]
	ori_loss_reg = loss_bbox(
	pos_bbox_preds,
	bbox_targets[pos_label_inds],
	reduction_override='none') / avg_factor
	loss_carl = (ori_loss_reg * carl_loss_weights[:, None]).sum()
	return dict(loss_carl=loss_carl[None])