import copy import numpy as np import logging import sys from fvcore.transforms.transform import ( HFlipTransform, NoOpTransform, VFlipTransform, ) from PIL import Image from typing import Tuple from fvcore.transforms.transform import ( BlendTransform, CropTransform, HFlipTransform, NoOpTransform, PadTransform, Transform, TransformList, VFlipTransform, ) from detectron2.data import transforms as T class RandomApplyClip(T.Augmentation): """ Randomly apply an augmentation with a given probability. """ def __init__(self, tfm_or_aug, prob=0.5, clip_frame_cnt=1): """ Args: tfm_or_aug (Transform, Augmentation): the transform or augmentation to be applied. It can either be a `Transform` or `Augmentation` instance. prob (float): probability between 0.0 and 1.0 that the wrapper transformation is applied """ super().__init__() self.aug = T.augmentation._transform_to_aug(tfm_or_aug) assert 0.0 <= prob <= 1.0, f"Probablity must be between 0.0 and 1.0 (given: {prob})" self.prob = prob self._cnt = 0 self.clip_frame_cnt = clip_frame_cnt def get_transform(self, *args): if self._cnt % self.clip_frame_cnt == 0: self.do = self._rand_range() < self.prob self._cnt = 0 # avoiding overflow self._cnt += 1 if self.do: return self.aug.get_transform(*args) else: return NoOpTransform() def __call__(self, aug_input): if self._cnt % self.clip_frame_cnt == 0: self.do = self._rand_range() < self.prob self._cnt = 0 # avoiding overflow self._cnt += 1 if self.do: return self.aug(aug_input) else: return NoOpTransform() class RandomRotationClip(T.Augmentation): """ This method returns a copy of this image, rotated the given number of degrees counter clockwise around the given center. """ def __init__(self, angle, prob=0.5, expand=True, center=None, interp=None, clip_frame_cnt=1): """ Args: angle (list[float]): If ``sample_style=="range"``, a [min, max] interval from which to sample the angle (in degrees). If ``sample_style=="choice"``, a list of angles to sample from expand (bool): choose if the image should be resized to fit the whole rotated image (default), or simply cropped center (list[[float, float]]): If ``sample_style=="range"``, a [[minx, miny], [maxx, maxy]] relative interval from which to sample the center, [0, 0] being the top left of the image and [1, 1] the bottom right. If ``sample_style=="choice"``, a list of centers to sample from Default: None, which means that the center of rotation is the center of the image center has no effect if expand=True because it only affects shifting """ super().__init__() if isinstance(angle, (float, int)): angle = (angle, angle) if center is not None and isinstance(center[0], (float, int)): center = (center, center) self.angle_save = None self.center_save = None self._cnt = 0 self._init(locals()) def get_transform(self, image): h, w = image.shape[:2] if self._cnt % self.clip_frame_cnt == 0: center = None angle = np.random.uniform(self.angle[0], self.angle[1], size=self.clip_frame_cnt) if self.center is not None: center = ( np.random.uniform(self.center[0][0], self.center[1][0]), np.random.uniform(self.center[0][1], self.center[1][1]), ) angle = np.sort(angle) if self._rand_range() < self.prob: angle = angle[::-1] self.angle_save = angle self.center_save = center self._cnt = 0 # avoiding overflow angle = self.angle_save[self._cnt] center = self.center_save self._cnt += 1 if center is not None: center = (w * center[0], h * center[1]) # Convert to absolute coordinates if angle % 360 == 0: return NoOpTransform() return T.RotationTransform(h, w, angle, expand=self.expand, center=center, interp=self.interp) class ResizeScaleClip(T.Augmentation): """ Takes target size as input and randomly scales the given target size between `min_scale` and `max_scale`. It then scales the input image such that it fits inside the scaled target box, keeping the aspect ratio constant. This implements the resize part of the Google's 'resize_and_crop' data augmentation: https://github.com/tensorflow/tpu/blob/master/models/official/detection/utils/input_utils.py#L127 """ def __init__( self, min_scale: float, max_scale: float, target_height: int, target_width: int, interp: int = Image.BILINEAR, clip_frame_cnt=1, ): """ Args: min_scale: minimum image scale range. max_scale: maximum image scale range. target_height: target image height. target_width: target image width. interp: image interpolation method. """ super().__init__() self._init(locals()) self._cnt = 0 def _get_resize(self, image: np.ndarray, scale: float): input_size = image.shape[:2] # Compute new target size given a scale. target_size = (self.target_height, self.target_width) target_scale_size = np.multiply(target_size, scale) # Compute actual rescaling applied to input image and output size. output_scale = np.minimum( target_scale_size[0] / input_size[0], target_scale_size[1] / input_size[1] ) output_size = np.round(np.multiply(input_size, output_scale)).astype(int) return T.ResizeTransform( input_size[0], input_size[1], output_size[0], output_size[1], self.interp ) def get_transform(self, image: np.ndarray): if self._cnt % self.clip_frame_cnt == 0: random_scale = np.random.uniform(self.min_scale, self.max_scale) self.random_scale_save = random_scale self._cnt = 0 # avoiding overflow self._cnt += 1 random_scale = self.random_scale_save return self._get_resize(image, random_scale) class RandomCropClip(T.Augmentation): """ Randomly crop a rectangle region out of an image. """ def __init__(self, crop_type: str, crop_size, clip_frame_cnt=1): """ Args: crop_type (str): one of "relative_range", "relative", "absolute", "absolute_range". crop_size (tuple[float, float]): two floats, explained below. - "relative": crop a (H * crop_size[0], W * crop_size[1]) region from an input image of size (H, W). crop size should be in (0, 1] - "relative_range": uniformly sample two values from [crop_size[0], 1] and [crop_size[1]], 1], and use them as in "relative" crop type. - "absolute" crop a (crop_size[0], crop_size[1]) region from input image. crop_size must be smaller than the input image size. - "absolute_range", for an input of size (H, W), uniformly sample H_crop in [crop_size[0], min(H, crop_size[1])] and W_crop in [crop_size[0], min(W, crop_size[1])]. Then crop a region (H_crop, W_crop). """ # TODO style of relative_range and absolute_range are not consistent: # one takes (h, w) but another takes (min, max) super().__init__() assert crop_type in ["relative_range", "relative", "absolute", "absolute_range"] self._init(locals()) self._cnt = 0 def get_transform(self, image): h, w = image.shape[:2] # 667, 500 if self._cnt % self.clip_frame_cnt == 0: croph, cropw = self.get_crop_size((h, w)) assert h >= croph and w >= cropw, "Shape computation in {} has bugs.".format(self) h0 = np.random.randint(h - croph + 1) # rand(124) -> 5 w0 = np.random.randint(w - cropw + 1) # rand(111) -> 634 h1 = np.random.randint(h0, h - croph + 1) w1 = np.random.randint(w0, w - cropw + 1) x = np.sort(np.random.rand(self.clip_frame_cnt)) h = h0 * x + h1 * (1-x) w = w0 * x + w1 * (1-x) h = np.round_(h).astype(int) w = np.round_(w).astype(int) if self._rand_range() < 0.5: h = h[::-1] w = w[::-1] self.hw_save = (h, w) self.crop_h_save, self.crop_w_save = croph, cropw self._cnt = 0 # avoiding overflow _h, _w = self.hw_save[0][self._cnt], self.hw_save[1][self._cnt] self._cnt += 1 return T.CropTransform(_w, _h, self.crop_w_save, self.crop_h_save) def get_crop_size(self, image_size): """ Args: image_size (tuple): height, width Returns: crop_size (tuple): height, width in absolute pixels """ h, w = image_size if self.crop_type == "relative": ch, cw = self.crop_size return int(h * ch + 0.5), int(w * cw + 0.5) elif self.crop_type == "relative_range": crop_size = np.asarray(self.crop_size, dtype=float) ch, cw = crop_size + np.random.rand(2) * (1 - crop_size) return int(h * ch + 0.5), int(w * cw + 0.5) elif self.crop_type == "absolute": return (min(self.crop_size[0], h), min(self.crop_size[1], w)) elif self.crop_type == "absolute_range": assert self.crop_size[0] <= self.crop_size[1] ch = np.random.randint(min(h, self.crop_size[0]), min(h, self.crop_size[1]) + 1) cw = np.random.randint(min(w, self.crop_size[0]), min(w, self.crop_size[1]) + 1) return ch, cw else: raise NotImplementedError("Unknown crop type {}".format(self.crop_type)) class FixedSizeCropClip(T.Augmentation): """ If `crop_size` is smaller than the input image size, then it uses a random crop of the crop size. If `crop_size` is larger than the input image size, then it pads the right and the bottom of the image to the crop size if `pad` is True, otherwise it returns the smaller image. """ def __init__(self, crop_size: Tuple[int], pad: bool = True, pad_value: float = 128.0, clip_frame_cnt=1): """ Args: crop_size: target image (height, width). pad: if True, will pad images smaller than `crop_size` up to `crop_size` pad_value: the padding value. """ super().__init__() self._init(locals()) self._cnt = 0 def _get_crop(self, image: np.ndarray): # Compute the image scale and scaled size. input_size = image.shape[:2] output_size = self.crop_size # Add random crop if the image is scaled up. max_offset = np.subtract(input_size, output_size) max_offset = np.maximum(max_offset, 0) if self._cnt % self.clip_frame_cnt == 0: offset = np.multiply(max_offset, np.random.uniform(0.0, 1.0)) offset = np.round(offset).astype(int) self.offset_save = offset self._cnt = 0 # avoiding overflow self._cnt += 1 offset = self.offset_save return CropTransform( offset[1], offset[0], output_size[1], output_size[0], input_size[1], input_size[0] ) def _get_pad(self, image: np.ndarray): # Compute the image scale and scaled size. input_size = image.shape[:2] output_size = self.crop_size # Add padding if the image is scaled down. pad_size = np.subtract(output_size, input_size) pad_size = np.maximum(pad_size, 0) original_size = np.minimum(input_size, output_size) return PadTransform( 0, 0, pad_size[1], pad_size[0], original_size[1], original_size[0], self.pad_value ) def get_transform(self, image: np.ndarray): transforms = [self._get_crop(image)] if self.pad: transforms.append(self._get_pad(image)) return TransformList(transforms) class ResizeShortestEdgeClip(T.Augmentation): """ Scale the shorter edge to the given size, with a limit of `max_size` on the longer edge. If `max_size` is reached, then downscale so that the longer edge does not exceed max_size. """ def __init__( self, short_edge_length, max_size=sys.maxsize, sample_style="range", interp=Image.BILINEAR, clip_frame_cnt=1 ): """ Args: short_edge_length (list[int]): If ``sample_style=="range"``, a [min, max] interval from which to sample the shortest edge length. If ``sample_style=="choice"``, a list of shortest edge lengths to sample from. max_size (int): maximum allowed longest edge length. sample_style (str): either "range" or "choice". """ super().__init__() assert sample_style in ["range", "choice", "range_by_clip", "choice_by_clip"], sample_style self.is_range = ("range" in sample_style) if isinstance(short_edge_length, int): short_edge_length = (short_edge_length, short_edge_length) if self.is_range: assert len(short_edge_length) == 2, ( "short_edge_length must be two values using 'range' sample style." f" Got {short_edge_length}!" ) self._cnt = 0 self._init(locals()) def get_transform(self, image): if self._cnt % self.clip_frame_cnt == 0: if self.is_range: self.size = np.random.randint(self.short_edge_length[0], self.short_edge_length[1] + 1) else: self.size = np.random.choice(self.short_edge_length) self._cnt = 0 # avoiding overflow if self.size == 0: return NoOpTransform() self._cnt += 1 h, w = image.shape[:2] scale = self.size * 1.0 / min(h, w) if h < w: newh, neww = self.size, scale * w else: newh, neww = scale * h, self.size if max(newh, neww) > self.max_size: scale = self.max_size * 1.0 / max(newh, neww) newh = newh * scale neww = neww * scale neww = int(neww + 0.5) newh = int(newh + 0.5) return T.ResizeTransform(h, w, newh, neww, self.interp) class RandomFlipClip(T.Augmentation): """ Flip the image horizontally or vertically with the given probability. """ def __init__(self, prob=0.5, *, horizontal=True, vertical=False, clip_frame_cnt=1): """ Args: prob (float): probability of flip. horizontal (boolean): whether to apply horizontal flipping vertical (boolean): whether to apply vertical flipping """ super().__init__() if horizontal and vertical: raise ValueError("Cannot do both horiz and vert. Please use two Flip instead.") if not horizontal and not vertical: raise ValueError("At least one of horiz or vert has to be True!") self._cnt = 0 self._init(locals()) def get_transform(self, image): if self._cnt % self.clip_frame_cnt == 0: self.do = self._rand_range() < self.prob self._cnt = 0 # avoiding overflow self._cnt += 1 h, w = image.shape[:2] if self.do: if self.horizontal: return HFlipTransform(w) elif self.vertical: return VFlipTransform(h) else: return NoOpTransform() def build_augmentation(cfg, is_train): logger = logging.getLogger(__name__) aug_list = [] if is_train: use_lsj = cfg.INPUT.LSJ_AUG.ENABLED if use_lsj: image_size = cfg.INPUT.LSJ_AUG.IMAGE_SIZE min_scale = cfg.INPUT.LSJ_AUG.MIN_SCALE max_scale = cfg.INPUT.LSJ_AUG.MAX_SCALE if cfg.INPUT.RANDOM_FLIP != "none": if cfg.INPUT.RANDOM_FLIP == "flip_by_clip": flip_clip_frame_cnt = cfg.INPUT.SAMPLING_FRAME_NUM else: flip_clip_frame_cnt = 1 aug_list.append( # NOTE using RandomFlip modified for the support of flip maintenance RandomFlipClip( horizontal=(cfg.INPUT.RANDOM_FLIP == "horizontal") or (cfg.INPUT.RANDOM_FLIP == "flip_by_clip"), vertical=cfg.INPUT.RANDOM_FLIP == "vertical", clip_frame_cnt=flip_clip_frame_cnt, ) ) aug_list.extend([ T.ResizeScale( min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size ), T.FixedSizeCrop(crop_size=(image_size, image_size)), ]) else: min_size = cfg.INPUT.MIN_SIZE_TRAIN max_size = cfg.INPUT.MAX_SIZE_TRAIN sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING clip_frame_cnt = cfg.INPUT.SAMPLING_FRAME_NUM if "by_clip" in cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING else 1 # Crop if cfg.INPUT.CROP.ENABLED: crop_aug = RandomApplyClip( T.AugmentationList([ ResizeShortestEdgeClip([400, 500, 600], 1333, sample_style, clip_frame_cnt=clip_frame_cnt), RandomCropClip(cfg.INPUT.PSEUDO.CROP.TYPE, cfg.INPUT.PSEUDO.CROP.SIZE, clip_frame_cnt=clip_frame_cnt) ]), clip_frame_cnt=clip_frame_cnt ) aug_list.append(crop_aug) # Resize aug_list.append(ResizeShortestEdgeClip(min_size, max_size, sample_style, clip_frame_cnt=clip_frame_cnt)) # Flip if cfg.INPUT.RANDOM_FLIP != "none": if cfg.INPUT.RANDOM_FLIP == "flip_by_clip": flip_clip_frame_cnt = cfg.INPUT.SAMPLING_FRAME_NUM else: flip_clip_frame_cnt = 1 aug_list.append( # NOTE using RandomFlip modified for the support of flip maintenance RandomFlipClip( horizontal=(cfg.INPUT.RANDOM_FLIP == "horizontal") or (cfg.INPUT.RANDOM_FLIP == "flip_by_clip"), vertical=cfg.INPUT.RANDOM_FLIP == "vertical", clip_frame_cnt=flip_clip_frame_cnt, ) ) # Additional augmentations : brightness, contrast, saturation, rotation augmentations = cfg.INPUT.AUGMENTATIONS if "brightness" in augmentations: aug_list.append(T.RandomBrightness(0.9, 1.1)) if "contrast" in augmentations: aug_list.append(T.RandomContrast(0.9, 1.1)) if "saturation" in augmentations: aug_list.append(T.RandomSaturation(0.9, 1.1)) if "rotation" in augmentations: aug_list.append( T.RandomRotation( [-15, 15], expand=False, center=[(0.4, 0.4), (0.6, 0.6)], sample_style="range" ) ) else: # Resize min_size = cfg.INPUT.MIN_SIZE_TEST max_size = cfg.INPUT.MAX_SIZE_TEST sample_style = "choice" aug_list.append(T.ResizeShortestEdge(min_size, max_size, sample_style)) return aug_list def build_pseudo_augmentation(cfg, is_train): logger = logging.getLogger(__name__) aug_list = [] if is_train: use_lsj = cfg.INPUT.LSJ_AUG.ENABLED if use_lsj: image_size = cfg.INPUT.LSJ_AUG.IMAGE_SIZE min_scale = cfg.INPUT.LSJ_AUG.MIN_SCALE max_scale = cfg.INPUT.LSJ_AUG.MAX_SCALE if cfg.INPUT.RANDOM_FLIP != "none": if cfg.INPUT.RANDOM_FLIP == "flip_by_clip": clip_frame_cnt = cfg.INPUT.SAMPLING_FRAME_NUM else: clip_frame_cnt = 1 aug_list.append( # NOTE using RandomFlip modified for the support of flip maintenance RandomFlipClip( horizontal=(cfg.INPUT.RANDOM_FLIP == "horizontal") or (cfg.INPUT.RANDOM_FLIP == "flip_by_clip"), vertical=cfg.INPUT.RANDOM_FLIP == "vertical", clip_frame_cnt=clip_frame_cnt, ) ) # Additional augmentations : brightness, contrast, saturation, rotation augmentations = cfg.INPUT.PSEUDO.AUGMENTATIONS if "brightness" in augmentations: aug_list.append(T.RandomBrightness(0.9, 1.1)) if "contrast" in augmentations: aug_list.append(T.RandomContrast(0.9, 1.1)) if "saturation" in augmentations: aug_list.append(T.RandomSaturation(0.9, 1.1)) if "rotation" in augmentations: aug_list.append( RandomRotationClip( [-15, 15], expand=False, center=[(0.4, 0.4), (0.6, 0.6)], clip_frame_cnt=clip_frame_cnt, ) ) aug_list.extend([ ResizeScaleClip( min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size, clip_frame_cnt=clip_frame_cnt, ), FixedSizeCropClip(crop_size=(image_size, image_size), clip_frame_cnt=clip_frame_cnt), ]) else: min_size = cfg.INPUT.PSEUDO.MIN_SIZE_TRAIN max_size = cfg.INPUT.PSEUDO.MAX_SIZE_TRAIN sample_style = cfg.INPUT.PSEUDO.MIN_SIZE_TRAIN_SAMPLING clip_frame_cnt = cfg.INPUT.SAMPLING_FRAME_NUM # Crop if cfg.INPUT.PSEUDO.CROP.ENABLED: crop_aug = RandomApplyClip( T.AugmentationList([ ResizeShortestEdgeClip([400, 500, 600], 1333, sample_style, clip_frame_cnt=clip_frame_cnt), RandomCropClip(cfg.INPUT.PSEUDO.CROP.TYPE, cfg.INPUT.PSEUDO.CROP.SIZE, clip_frame_cnt=clip_frame_cnt) ]), clip_frame_cnt=clip_frame_cnt ) aug_list.append(crop_aug) # Resize aug_list.append(ResizeShortestEdgeClip(min_size, max_size, sample_style, clip_frame_cnt=clip_frame_cnt)) # Flip aug_list.append( # NOTE using RandomFlip modified for the support of flip maintenance RandomFlipClip( horizontal=(cfg.INPUT.RANDOM_FLIP == "horizontal") or (cfg.INPUT.RANDOM_FLIP == "flip_by_clip"), vertical=cfg.INPUT.RANDOM_FLIP == "vertical", clip_frame_cnt=clip_frame_cnt, ) ) # Additional augmentations : brightness, contrast, saturation, rotation augmentations = cfg.INPUT.PSEUDO.AUGMENTATIONS if "brightness" in augmentations: aug_list.append(T.RandomBrightness(0.9, 1.1)) if "contrast" in augmentations: aug_list.append(T.RandomContrast(0.9, 1.1)) if "saturation" in augmentations: aug_list.append(T.RandomSaturation(0.9, 1.1)) if "rotation" in augmentations: aug_list.append( RandomRotationClip( [-15, 15], expand=False, center=[(0.4, 0.4), (0.6, 0.6)], clip_frame_cnt=clip_frame_cnt, ) ) else: # Resize min_size = cfg.INPUT.MIN_SIZE_TEST max_size = cfg.INPUT.MAX_SIZE_TEST sample_style = "choice" aug_list.append(T.ResizeShortestEdge(min_size, max_size, sample_style)) return aug_list