# Copyright (c) Facebook, Inc. and its affiliates. # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. import cv2 import numpy as np import torch import logging class Compose(object): """Composes several video_transforms together. Args: transforms (List[Transform]): list of transforms to compose. Example: >>> video_transforms.Compose([ >>> video_transforms.CenterCrop(10), >>> video_transforms.ToTensor(), >>> ]) """ def __init__(self, transforms, aug_seed=0): self.transforms = transforms self.set_random_state(aug_seed) def __call__(self, data, idx=None, copy_id=0): for t in self.transforms: data = t(data, idx, copy_id) return data def set_random_state(self, seed=None): for i, t in enumerate(self.transforms): t.set_random_state(seed=(seed+i)) class Transform(object): """basse class for all transformation""" def set_random_state(self, seed=None): self.rng = np.random.RandomState(seed) #################################### # Customized Transformations #################################### class Normalize(Transform): """Given mean: (R, G, B) and std: (R, G, B), will normalize each channel of the torch.*Tensor, i.e. channel = (channel - mean) / std """ def __init__(self, mean, std): self.mean = mean self.std = std def __call__(self, tensor, idx=None, copy_id=0): for t, m, s in zip(tensor, self.mean, self.std): t.sub_(m).div_(s) return tensor class CropScale(Transform): """Combine Crop and scale operation for faster speed & less memory cost """ def __init__(self, crop_size, crop_type='random_crop', make_square=False, aspect_ratio=[1.0, 1.0], slen=[224, 288], interpolation=cv2.INTER_LINEAR): # random scale assert slen[1] >= slen[0], \ "slen ({}) should be in increase order".format(scale) assert aspect_ratio[1] >= aspect_ratio[0], \ "aspect_ratio ({}) should be in increase order".format(aspect_ratio) self.slen = slen # [min factor, max factor] self.aspect_ratio = None if aspect_ratio[0] * aspect_ratio[1] == 1. else aspect_ratio self.make_square = make_square # random crop if isinstance(crop_size, int): self.crop_size = (crop_size, crop_size) else: self.crop_size = crop_size if crop_type == 'random_crop': self.func_crop = self.random_crop elif crop_type == 'center_crop': self.func_crop = self.center_crop elif crop_type == 'mixed_crop': self.func_crop = self.mixed_crop else: raise NotImplementedError # others self.interpolation = interpolation self.rng = np.random.RandomState(0) def random_scale(self, h, w): # rescale image new_w = w new_h = h if not self.make_square else w if self.aspect_ratio: random_aspect_ratio = 0.5 * (self.rng.uniform(self.aspect_ratio[0], self.aspect_ratio[1]) + self.rng.uniform(self.aspect_ratio[0], self.aspect_ratio[1])) if self.rng.rand() > 0.5: random_aspect_ratio = 1.0 / random_aspect_ratio new_w *= random_aspect_ratio new_h /= random_aspect_ratio resize_factor = self.rng.uniform(self.slen[0], self.slen[1]) / min(new_w, new_h) new_w *= resize_factor new_h *= resize_factor return (new_h, new_w) def random_crop(self, h, w, idx=None, copy_id=0): th, tw = self.crop_size x1 = self.rng.uniform(0, w - tw) y1 = self.rng.uniform(0, h - th) return (y1, y1+th, x1, x1+tw) def center_crop(self, h, w, idx=None, copy_id=0): th, tw = self.crop_size x1 = 0.5 * (w - tw) y1 = 0.5 * (h - th) return (y1, y1+th, x1, x1+tw) def mixed_crop(self, h, w, idx=None, copy_id=0): if copy_id == 0: x1 = 0.5 * (w - tw) y1 = 0.5 * (h - th) else: x1 = self.rng.uniform(0, w - tw) y1 = self.rng.uniform(0, h - th) return (y1, y1+th, x1, x1+tw) def __call__(self, data, idx=None, copy_id=0): h, w, c = data.shape new_h, new_w = self.random_scale(h, w) # compute roi new_y1, new_y2, new_x1, new_x2 = self.func_crop(new_h, new_w, idx=idx, copy_id=copy_id) # map to orginal image y1 = int((new_y1 * h) / new_h) y2 = int((new_y2 * h) / new_h) x1 = int((new_x1 * w) / new_w) x2 = int((new_x2 * w) / new_w) # excute cropped_data = data[y1:y2, x1:x2, :] output = cv2.resize(cropped_data, self.crop_size, self.interpolation) return output class Resize(Transform): """ Rescales the input numpy array to the given 'size'. 'size' will be the size of the smaller edge. For example, if height > width, then image will be rescaled to (size * height / width, size) size: size of the smaller edge interpolation: Default: cv2.INTER_LINEAR """ def __init__(self, size, interpolation=cv2.INTER_LINEAR): self.size = size # [w, h] self.interpolation = interpolation def __call__(self, data, idx=None, copy_id=0): h, w, c = data.shape if isinstance(self.size, int): slen = self.size if min(w, h) == slen: return data if w < h: new_w = self.size new_h = int(self.size * h / w) else: new_w = int(self.size * w / h) new_h = self.size else: new_w = self.size[0] new_h = self.size[1] if (h != new_h) or (w != new_w): scaled_data = cv2.resize(data, (new_w, new_h), self.interpolation) else: scaled_data = data return scaled_data class RandomScale(Transform): """ Rescales the input numpy array to the given 'size'. 'size' will be the size of the smaller edge. For example, if height > width, then image will be rescaled to (size * height / width, size) size: size of the smaller edge interpolation: Default: cv2.INTER_LINEAR """ def __init__(self, make_square=False, aspect_ratio=[1.0, 1.0], slen=[224, 288], interpolation=cv2.INTER_LINEAR): assert slen[1] >= slen[0], \ "slen ({}) should be in increase order".format(scale) assert aspect_ratio[1] >= aspect_ratio[0], \ "aspect_ratio ({}) should be in increase order".format(aspect_ratio) self.slen = slen # [min factor, max factor] self.aspect_ratio = aspect_ratio self.make_square = make_square self.interpolation = interpolation self.rng = np.random.RandomState(0) def __call__(self, data, idx=None, copy_id=0): h, w, c = data.shape new_w = w new_h = h if not self.make_square else w if self.aspect_ratio: random_aspect_ratio = 0.5 * (self.rng.uniform(self.aspect_ratio[0], self.aspect_ratio[1]) + self.rng.uniform(self.aspect_ratio[0], self.aspect_ratio[1])) if self.rng.rand() > 0.5: random_aspect_ratio = 1.0 / random_aspect_ratio new_w *= random_aspect_ratio new_h /= random_aspect_ratio resize_factor = self.rng.uniform(self.slen[0], self.slen[1]) / min(new_w, new_h) new_w *= resize_factor new_h *= resize_factor scaled_data = cv2.resize(data, (int(new_w+1), int(new_h+1)), self.interpolation) return scaled_data class CenterCrop(Transform): """Crops the given numpy array at the center to have a region of the given size. size can be a tuple (target_height, target_width) or an integer, in which case the target will be of a square shape (size, size) """ def __init__(self, size): if isinstance(size, int): self.size = (size, size) else: self.size = size def __call__(self, data, idx=None, copy_id=0): h, w, c = data.shape th, tw = self.size x1 = int(round((w - tw) / 2.)) y1 = int(round((h - th) / 2.)) cropped_data = data[y1:(y1+th), x1:(x1+tw), :] return cropped_data class MixedCrop(Transform): """Crops the given numpy array at the random location to have a region of the given size. size can be a tuple (target_height, target_width) or an integer, in which case the target will be of a square shape (size, size) """ def __init__(self, size): if isinstance(size, int): self.size = (size, size) else: self.size = size self.rng = np.random.RandomState(0) def __call__(self, data, idx=None, copy_id=0): h, w, c = data.shape th, tw = self.size if copy_id == 0: x1 = int(round(0.5 * float(w - tw))) y1 = int(round(0.5 * float(h - th))) else: x1 = self.rng.choice(range(w - tw + 1)) y1 = self.rng.choice(range(h - th + 1)) cropped_data = data[y1:(y1+th), x1:(x1+tw), :] return cropped_data class RandomCrop(Transform): """Crops the given numpy array at the random location to have a region of the given size. size can be a tuple (target_height, target_width) or an integer, in which case the target will be of a square shape (size, size) """ def __init__(self, size): if isinstance(size, int): self.size = (size, size) else: self.size = size self.rng = np.random.RandomState(0) def __call__(self, data, idx=None, copy_id=0): h, w, c = data.shape th, tw = self.size x1 = self.rng.choice(range(w - tw + 1)) y1 = self.rng.choice(range(h - th + 1)) cropped_data = data[y1:(y1+th), x1:(x1+tw), :] return cropped_data class EvenlyHorizontalFlip(Transform): """Randomly horizontally flips the given numpy array with a probability of 0.5 """ def __init__(self, num_repeat): self.num_repeat = num_repeat def __call__(self, data, idx=None, copy_id=0): if not copy_id < self.num_repeat: data = np.fliplr(data) data = np.ascontiguousarray(data) return data class RandomHorizontalFlip(Transform): """Randomly horizontally flips the given numpy array with a probability of 0.5 """ def __init__(self): self.rng = np.random.RandomState(0) def __call__(self, data, idx=None, copy_id=0): if self.rng.rand() < 0.5: data = np.fliplr(data) data = np.ascontiguousarray(data) return data class RandomVerticalFlip(Transform): """Randomly vertically flips the given numpy array with a probability of 0.5 """ def __init__(self): self.rng = np.random.RandomState(0) def __call__(self, data, idx=None, copy_id=0): if self.rng.rand() < 0.5: data = np.flipud(data) data = np.ascontiguousarray(data) return data class PixelJitter(Transform): def __init__(self, vars=[-20, 20]): self.vars = vars self.rng = np.random.RandomState(0) def __call__(self, data, idx=None, copy_id=0): h, w, c = data.shape low = self.rng.uniform(self.vars[0], 0) high = self.rng.uniform(0, self.vars[1]) random_pixel = self.rng.uniform(low=low, high=high, size=(h,w,c)).astype(np.float32) augmented_data = data + random_pixel return augmented_data class RandomRGB(Transform): def __init__(self, vars=[10, 10, 10]): self.vars = vars self.rng = np.random.RandomState(0) def __call__(self, data, idx=None, copy_id=0): h, w, c = data.shape random_vars = [int(round(self.rng.uniform(-x, x))) for x in self.vars] base = len(random_vars) augmented_data = np.zeros(data.shape) for ic in range(0, c): var = random_vars[ic%base] augmented_data[:,:,ic] = np.minimum(np.maximum(data[:,:,ic] + var, 0), 255) return augmented_data class RandomHLS(Transform): def __init__(self, vars=[15, 35, 25]): self.vars = vars self.rng = np.random.RandomState(0) def __call__(self, data, idx=None, copy_id=0): h, w, c = data.shape assert c%3 == 0, "input channel = %d, illegal"%c random_vars = [int(self.rng.uniform(-x, x)) for x in self.vars] base = len(random_vars) augmented_data = np.zeros(data.shape, ) for i_im in range(0, int(c/3)): augmented_data[:,:,3*i_im:(3*i_im+3)] = \ cv2.cvtColor(data[:,:,3*i_im:(3*i_im+3)], cv2.COLOR_RGB2HLS) hls_limits = [180, 255, 255] for ic in range(0, c): var = random_vars[ic%base] limit = hls_limits[ic%base] augmented_data[:,:,ic] = np.minimum(np.maximum(augmented_data[:,:,ic] + var, 0), limit) for i_im in range(0, int(c/3)): augmented_data[:,:,3*i_im:(3*i_im+3)] = \ cv2.cvtColor(augmented_data[:,:,3*i_im:(3*i_im+3)].astype(np.uint8), \ cv2.COLOR_HLS2RGB) return augmented_data class ToTensor(Transform): """Converts a numpy.ndarray (H x W x C) in the range [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]. """ def __init__(self, dim=3): self.dim = dim def __call__(self, image, idx=None, copy_id=0): if isinstance(image, np.ndarray): # H, W, C = image.shape # handle numpy array image = torch.from_numpy(image.transpose((2, 0, 1))) # backward compatibility return image.float() / 255.