File size: 6,208 Bytes
f717329
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# Copyright (c) Facebook, Inc. and its affiliates.

# pyre-unsafe

from typing import Any
import torch
from torch.nn import functional as F

from detectron2.structures import BitMasks, Boxes, BoxMode

from .base import IntTupleBox, make_int_box
from .to_mask import ImageSizeType


def resample_coarse_segm_tensor_to_bbox(coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox):
    """

    Resample coarse segmentation tensor to the given

    bounding box and derive labels for each pixel of the bounding box



    Args:

        coarse_segm: float tensor of shape [1, K, Hout, Wout]

        box_xywh_abs (tuple of 4 int): bounding box given by its upper-left

            corner coordinates, width (W) and height (H)

    Return:

        Labels for each pixel of the bounding box, a long tensor of size [1, H, W]

    """
    x, y, w, h = box_xywh_abs
    w = max(int(w), 1)
    h = max(int(h), 1)
    labels = F.interpolate(coarse_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
    return labels


def resample_fine_and_coarse_segm_tensors_to_bbox(

    fine_segm: torch.Tensor, coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox

):
    """

    Resample fine and coarse segmentation tensors to the given

    bounding box and derive labels for each pixel of the bounding box



    Args:

        fine_segm: float tensor of shape [1, C, Hout, Wout]

        coarse_segm: float tensor of shape [1, K, Hout, Wout]

        box_xywh_abs (tuple of 4 int): bounding box given by its upper-left

            corner coordinates, width (W) and height (H)

    Return:

        Labels for each pixel of the bounding box, a long tensor of size [1, H, W]

    """
    x, y, w, h = box_xywh_abs
    w = max(int(w), 1)
    h = max(int(h), 1)
    # coarse segmentation
    coarse_segm_bbox = F.interpolate(
        coarse_segm,
        (h, w),
        mode="bilinear",
        align_corners=False,
    ).argmax(dim=1)
    # combined coarse and fine segmentation
    labels = (
        F.interpolate(fine_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
        * (coarse_segm_bbox > 0).long()
    )
    return labels


def resample_fine_and_coarse_segm_to_bbox(predictor_output: Any, box_xywh_abs: IntTupleBox):
    """

    Resample fine and coarse segmentation outputs from a predictor to the given

    bounding box and derive labels for each pixel of the bounding box



    Args:

        predictor_output: DensePose predictor output that contains segmentation

            results to be resampled

        box_xywh_abs (tuple of 4 int): bounding box given by its upper-left

            corner coordinates, width (W) and height (H)

    Return:

        Labels for each pixel of the bounding box, a long tensor of size [1, H, W]

    """
    return resample_fine_and_coarse_segm_tensors_to_bbox(
        predictor_output.fine_segm,
        predictor_output.coarse_segm,
        box_xywh_abs,
    )


def predictor_output_with_coarse_segm_to_mask(

    predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType

) -> BitMasks:
    """

    Convert predictor output with coarse and fine segmentation to a mask.

    Assumes that predictor output has the following attributes:

     - coarse_segm (tensor of size [N, D, H, W]): coarse segmentation

         unnormalized scores for N instances; D is the number of coarse

         segmentation labels, H and W is the resolution of the estimate



    Args:

        predictor_output: DensePose predictor output to be converted to mask

        boxes (Boxes): bounding boxes that correspond to the DensePose

            predictor outputs

        image_size_hw (tuple [int, int]): image height Himg and width Wimg

    Return:

        BitMasks that contain a bool tensor of size [N, Himg, Wimg] with

        a mask of the size of the image for each instance

    """
    H, W = image_size_hw
    boxes_xyxy_abs = boxes.tensor.clone()
    boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
    N = len(boxes_xywh_abs)
    masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
    for i in range(len(boxes_xywh_abs)):
        box_xywh = make_int_box(boxes_xywh_abs[i])
        box_mask = resample_coarse_segm_tensor_to_bbox(predictor_output[i].coarse_segm, box_xywh)
        x, y, w, h = box_xywh
        masks[i, y : y + h, x : x + w] = box_mask

    return BitMasks(masks)


def predictor_output_with_fine_and_coarse_segm_to_mask(

    predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType

) -> BitMasks:
    """

    Convert predictor output with coarse and fine segmentation to a mask.

    Assumes that predictor output has the following attributes:

     - coarse_segm (tensor of size [N, D, H, W]): coarse segmentation

         unnormalized scores for N instances; D is the number of coarse

         segmentation labels, H and W is the resolution of the estimate

     - fine_segm (tensor of size [N, C, H, W]): fine segmentation

         unnormalized scores for N instances; C is the number of fine

         segmentation labels, H and W is the resolution of the estimate



    Args:

        predictor_output: DensePose predictor output to be converted to mask

        boxes (Boxes): bounding boxes that correspond to the DensePose

            predictor outputs

        image_size_hw (tuple [int, int]): image height Himg and width Wimg

    Return:

        BitMasks that contain a bool tensor of size [N, Himg, Wimg] with

        a mask of the size of the image for each instance

    """
    H, W = image_size_hw
    boxes_xyxy_abs = boxes.tensor.clone()
    boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
    N = len(boxes_xywh_abs)
    masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
    for i in range(len(boxes_xywh_abs)):
        box_xywh = make_int_box(boxes_xywh_abs[i])
        labels_i = resample_fine_and_coarse_segm_to_bbox(predictor_output[i], box_xywh)
        x, y, w, h = box_xywh
        masks[i, y : y + h, x : x + w] = labels_i > 0
    return BitMasks(masks)