|
|
|
import numpy as np |
|
import unittest |
|
from copy import copy |
|
import cv2 |
|
import torch |
|
from fvcore.common.benchmark import benchmark |
|
from torch.nn import functional as F |
|
|
|
from detectron2.layers.roi_align import ROIAlign, roi_align |
|
|
|
|
|
class ROIAlignTest(unittest.TestCase): |
|
def test_forward_output(self): |
|
input = np.arange(25).reshape(5, 5).astype("float32") |
|
""" |
|
0 1 2 3 4 |
|
5 6 7 8 9 |
|
10 11 12 13 14 |
|
15 16 17 18 19 |
|
20 21 22 23 24 |
|
""" |
|
|
|
output = self._simple_roialign(input, [1, 1, 3, 3], (4, 4), aligned=False) |
|
output_correct = self._simple_roialign(input, [1, 1, 3, 3], (4, 4), aligned=True) |
|
|
|
|
|
old_results = [ |
|
[7.5, 8, 8.5, 9], |
|
[10, 10.5, 11, 11.5], |
|
[12.5, 13, 13.5, 14], |
|
[15, 15.5, 16, 16.5], |
|
] |
|
|
|
|
|
correct_results = [ |
|
[4.5, 5.0, 5.5, 6.0], |
|
[7.0, 7.5, 8.0, 8.5], |
|
[9.5, 10.0, 10.5, 11.0], |
|
[12.0, 12.5, 13.0, 13.5], |
|
] |
|
|
|
|
|
self.assertTrue(np.allclose(output.flatten(), np.asarray(old_results).flatten())) |
|
self.assertTrue( |
|
np.allclose(output_correct.flatten(), np.asarray(correct_results).flatten()) |
|
) |
|
|
|
|
|
|
|
|
|
def test_resize(self): |
|
H, W = 30, 30 |
|
input = np.random.rand(H, W).astype("float32") * 100 |
|
box = [10, 10, 20, 20] |
|
output = self._simple_roialign(input, box, (5, 5), aligned=True) |
|
|
|
input2x = cv2.resize(input, (W // 2, H // 2), interpolation=cv2.INTER_LINEAR) |
|
box2x = [x / 2 for x in box] |
|
output2x = self._simple_roialign(input2x, box2x, (5, 5), aligned=True) |
|
diff = np.abs(output2x - output) |
|
self.assertTrue(diff.max() < 1e-4) |
|
|
|
def test_grid_sample_equivalence(self): |
|
H, W = 30, 30 |
|
input = np.random.rand(H, W).astype("float32") * 100 |
|
box = [10, 10, 20, 20] |
|
for ratio in [1, 2, 3]: |
|
output = self._simple_roialign(input, box, (5, 5), sampling_ratio=ratio) |
|
output_grid_sample = grid_sample_roi_align( |
|
torch.from_numpy(input[None, None, :, :]).float(), |
|
torch.as_tensor(box).float()[None, :], |
|
5, |
|
1.0, |
|
ratio, |
|
) |
|
self.assertTrue(torch.allclose(output, output_grid_sample)) |
|
|
|
def _simple_roialign(self, img, box, resolution, sampling_ratio=0, aligned=True): |
|
""" |
|
RoiAlign with scale 1.0. |
|
""" |
|
if isinstance(resolution, int): |
|
resolution = (resolution, resolution) |
|
op = ROIAlign(resolution, 1.0, sampling_ratio, aligned=aligned) |
|
input = torch.from_numpy(img[None, None, :, :].astype("float32")) |
|
|
|
rois = [0] + list(box) |
|
rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32")) |
|
output = op.forward(input, rois) |
|
if torch.cuda.is_available(): |
|
output_cuda = op.forward(input.cuda(), rois.cuda()).cpu() |
|
self.assertTrue(torch.allclose(output, output_cuda)) |
|
return output[0, 0] |
|
|
|
def _simple_roialign_with_grad(self, img, box, resolution, device): |
|
if isinstance(resolution, int): |
|
resolution = (resolution, resolution) |
|
|
|
op = ROIAlign(resolution, 1.0, 0, aligned=True) |
|
input = torch.from_numpy(img[None, None, :, :].astype("float32")) |
|
|
|
rois = [0] + list(box) |
|
rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32")) |
|
input = input.to(device=device) |
|
rois = rois.to(device=device) |
|
input.requires_grad = True |
|
output = op.forward(input, rois) |
|
return input, output |
|
|
|
def test_empty_box(self): |
|
img = np.random.rand(5, 5) |
|
box = [3, 4, 5, 4] |
|
o = self._simple_roialign(img, box, 7) |
|
self.assertTrue(o.shape == (7, 7)) |
|
self.assertTrue((o == 0).all()) |
|
|
|
for dev in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: |
|
input, output = self._simple_roialign_with_grad(img, box, 7, torch.device(dev)) |
|
output.sum().backward() |
|
self.assertTrue(torch.allclose(input.grad, torch.zeros_like(input))) |
|
|
|
def test_empty_batch(self): |
|
input = torch.zeros(0, 3, 10, 10, dtype=torch.float32) |
|
rois = torch.zeros(0, 5, dtype=torch.float32) |
|
op = ROIAlign((7, 7), 1.0, 0, aligned=True) |
|
output = op.forward(input, rois) |
|
self.assertTrue(output.shape == (0, 3, 7, 7)) |
|
|
|
|
|
def grid_sample_roi_align(input, boxes, output_size, scale, sampling_ratio): |
|
|
|
from detectron2.projects.point_rend.point_features import ( |
|
generate_regular_grid_point_coords, |
|
get_point_coords_wrt_image, |
|
point_sample, |
|
) |
|
|
|
N, _, H, W = input.shape |
|
R = len(boxes) |
|
assert N == 1 |
|
boxes = boxes * scale |
|
grid = generate_regular_grid_point_coords(R, output_size * sampling_ratio, device=boxes.device) |
|
coords = get_point_coords_wrt_image(boxes, grid) |
|
coords = coords / torch.as_tensor([W, H], device=coords.device) |
|
res = point_sample(input, coords.unsqueeze(0), align_corners=False) |
|
res = ( |
|
res.squeeze(0) |
|
.permute(1, 0, 2) |
|
.reshape(R, -1, output_size * sampling_ratio, output_size * sampling_ratio) |
|
) |
|
res = F.avg_pool2d(res, sampling_ratio) |
|
return res |
|
|
|
|
|
def benchmark_roi_align(): |
|
def random_boxes(mean_box, stdev, N, maxsize): |
|
ret = torch.rand(N, 4) * stdev + torch.tensor(mean_box, dtype=torch.float) |
|
ret.clamp_(min=0, max=maxsize) |
|
return ret |
|
|
|
def func(shape, nboxes_per_img, sampling_ratio, device, box_size="large"): |
|
N, _, H, _ = shape |
|
input = torch.rand(*shape) |
|
boxes = [] |
|
batch_idx = [] |
|
for k in range(N): |
|
if box_size == "large": |
|
b = random_boxes([80, 80, 130, 130], 24, nboxes_per_img, H) |
|
else: |
|
b = random_boxes([100, 100, 110, 110], 4, nboxes_per_img, H) |
|
boxes.append(b) |
|
batch_idx.append(torch.zeros(nboxes_per_img, 1, dtype=torch.float32) + k) |
|
boxes = torch.cat(boxes, axis=0) |
|
batch_idx = torch.cat(batch_idx, axis=0) |
|
boxes = torch.cat([batch_idx, boxes], axis=1) |
|
|
|
input = input.to(device=device) |
|
boxes = boxes.to(device=device) |
|
|
|
def bench(): |
|
if False and sampling_ratio > 0 and N == 1: |
|
|
|
grid_sample_roi_align(input, boxes[:, 1:], 7, 1.0, sampling_ratio) |
|
else: |
|
roi_align(input, boxes, 7, 1.0, sampling_ratio, True) |
|
if device == "cuda": |
|
torch.cuda.synchronize() |
|
|
|
return bench |
|
|
|
def gen_args(arg): |
|
args = [] |
|
for size in ["small", "large"]: |
|
for ratio in [0, 2]: |
|
args.append(copy(arg)) |
|
args[-1]["sampling_ratio"] = ratio |
|
args[-1]["box_size"] = size |
|
return args |
|
|
|
arg = dict(shape=(1, 512, 256, 256), nboxes_per_img=512, device="cuda") |
|
benchmark(func, "cuda_roialign", gen_args(arg), num_iters=20, warmup_iters=1) |
|
arg.update({"device": "cpu", "shape": (1, 256, 128, 128)}) |
|
benchmark(func, "cpu_roialign", gen_args(arg), num_iters=5, warmup_iters=1) |
|
|
|
|
|
if __name__ == "__main__": |
|
if torch.cuda.is_available(): |
|
benchmark_roi_align() |
|
unittest.main() |
|
|