File size: 8,619 Bytes
da2e2ac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
import cv2
import numpy as np
import torch
from PIL import Image
from det_map.data.datasets.dataclasses import Camera
from det_map.data.pipelines.color_utils import bgr2hsv, hsv2bgr, mmlabNormalize
class PrepareImageInputs(object):
"""Load multi channel images from a list of separate channel files.
Expects results['img_filename'] to be a list of filenames.
Args:
to_float32 (bool): Whether to convert the img to float32.
Defaults to False.
color_type (str): Color type of the file. Defaults to 'unchanged'.
"""
def __init__(
self,
data_config,
is_train=False,
opencv_pp=False,
):
self.is_train = is_train
self.data_config = data_config
self.normalize_img = mmlabNormalize
self.opencv_pp = opencv_pp
def get_rot(self, h):
return torch.Tensor([
[np.cos(h), np.sin(h)],
[-np.sin(h), np.cos(h)],
])
def img_transform(self, img, post_rot, post_tran, resize, resize_dims,
crop, flip, rotate):
# adjust image
if not self.opencv_pp:
img = self.img_transform_core(img, resize_dims, crop, flip, rotate)
# post-homography transformation
post_rot *= resize
post_tran -= torch.Tensor(crop[:2])
if flip:
A = torch.Tensor([[-1, 0], [0, 1]])
b = torch.Tensor([crop[2] - crop[0], 0])
post_rot = A.matmul(post_rot)
post_tran = A.matmul(post_tran) + b
A = self.get_rot(rotate / 180 * np.pi)
b = torch.Tensor([crop[2] - crop[0], crop[3] - crop[1]]) / 2
b = A.matmul(-b) + b
post_rot = A.matmul(post_rot)
post_tran = A.matmul(post_tran) + b
if self.opencv_pp:
img = self.img_transform_core_opencv(img, post_rot, post_tran, crop)
return img, post_rot, post_tran
def img_transform_core_opencv(self, img, post_rot, post_tran,
crop):
img = np.array(img).astype(np.float32)
img = cv2.warpAffine(img,
np.concatenate([post_rot,
post_tran.reshape(2, 1)],
axis=1),
(crop[2] - crop[0], crop[3] - crop[1]),
flags=cv2.INTER_LINEAR)
return img
def img_transform_core(self, img, resize_dims, crop, flip, rotate):
# adjust image
img = img.resize(resize_dims)
img = img.crop(crop)
if flip:
img = img.transpose(method=Image.FLIP_LEFT_RIGHT)
img = img.rotate(rotate)
return img
def sample_augmentation(self, H, W, flip=None, scale=None):
fH, fW = eval(self.data_config['input_size'])
if self.is_train:
resize = float(fW) / float(W)
resize += np.random.uniform(*eval(self.data_config['resize']))
resize_dims = (int(W * resize), int(H * resize))
newW, newH = resize_dims
random_crop_height = \
self.data_config.get('random_crop_height', False)
if random_crop_height:
crop_h = int(np.random.uniform(max(0.3 * newH, newH - fH),
newH - fH))
else:
crop_h = \
int((1 - np.random.uniform(*eval(self.data_config['crop_h']))) *
newH) - fH
crop_w = int(np.random.uniform(0, max(0, newW - fW)))
crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)
flip = self.data_config['flip'] and np.random.choice([0, 1])
rotate = np.random.uniform(*eval(self.data_config['rot']))
if self.data_config.get('vflip', False) and np.random.choice([0, 1]):
rotate += 180
else:
resize = float(fW) / float(W)
if scale is not None:
resize += scale
else:
resize += self.data_config.get('resize_test', 0.0)
resize_dims = (int(W * resize), int(H * resize))
newW, newH = resize_dims
crop_h = int((1 - np.mean(eval(self.data_config['crop_h']))) * newH) - fH
crop_w = int(max(0, newW - fW) / 2)
crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)
flip = False if flip is None else flip
rotate = 0
return resize, resize_dims, crop, flip, rotate
def photo_metric_distortion(self, img, pmd):
"""Call function to perform photometric distortion on images.
Args:
results (dict): Result dict from loading pipeline.
Returns:
dict: Result dict with images distorted.
"""
if np.random.rand() > pmd.get('rate', 1.0):
return img
img = np.array(img).astype(np.float32)
assert img.dtype == np.float32, \
'PhotoMetricDistortion needs the input image of dtype np.float32,' \
' please set "to_float32=True" in "LoadImageFromFile" pipeline'
# random brightness
if np.random.randint(2):
delta = np.random.uniform(-pmd['brightness_delta'],
pmd['brightness_delta'])
img += delta
# mode == 0 --> do random contrast first
# mode == 1 --> do random contrast last
mode = np.random.randint(2)
if mode == 1:
if np.random.randint(2):
alpha = np.random.uniform(pmd['contrast_lower'],
pmd['contrast_upper'])
img *= alpha
# convert color from BGR to HSV
img = bgr2hsv(img)
# random saturation
if np.random.randint(2):
img[..., 1] *= np.random.uniform(pmd['saturation_lower'],
pmd['saturation_upper'])
# random hue
if np.random.randint(2):
img[..., 0] += np.random.uniform(-pmd['hue_delta'], pmd['hue_delta'])
img[..., 0][img[..., 0] > 360] -= 360
img[..., 0][img[..., 0] < 0] += 360
# convert color from HSV to BGR
img = hsv2bgr(img)
# random contrast
if mode == 0:
if np.random.randint(2):
alpha = np.random.uniform(pmd['contrast_lower'],
pmd['contrast_upper'])
img *= alpha
# randomly swap channels
if np.random.randint(2):
img = img[..., np.random.permutation(3)]
return Image.fromarray(img.astype(np.uint8))
def get_inputs(self, cam: Camera, flip=None, scale=None):
img = Image.fromarray(cam.image)
# original copy of image
cam.canvas = torch.tensor(np.array(img))
post_rot = torch.eye(2)
post_tran = torch.zeros(2)
# image view augmentation (resize, crop, horizontal flip, rotate)
img_augs = self.sample_augmentation(
H=img.height, W=img.width, flip=flip, scale=scale)
resize, resize_dims, crop, flip, rotate = img_augs
img, post_rot2, post_tran2 = \
self.img_transform(img, post_rot,
post_tran,
resize=resize,
resize_dims=resize_dims,
crop=crop,
flip=flip,
rotate=rotate)
# for convenience, make augmentation matrices 3x3
post_tran = torch.zeros(3)
post_rot = torch.eye(3)
post_tran[:2] = post_tran2
post_rot[:2, :2] = post_rot2
if self.is_train and self.data_config.get('pmd', None) is not None:
img = self.photo_metric_distortion(img, self.data_config['pmd'])
# original image
cam.image = self.normalize_img(img)
cam.post_rot = post_rot
cam.post_tran = post_tran
cam.sensor2lidar_rotation = torch.tensor(cam.sensor2lidar_rotation)
cam.sensor2lidar_translation = torch.tensor(cam.sensor2lidar_translation)
cam.intrinsics = torch.tensor(cam.intrinsics)
cam.distortion = torch.tensor(cam.distortion)
return cam
def __call__(self, results):
return self.get_inputs(results)
|