from __future__ import division import torch import math import random import numpy as np import numbers import types import collections import warnings import cv2 from PIL import Image, ImageOps, ImageEnhance, PILLOW_VERSION try: import accimage except ImportError: accimage = None def _is_pil_image(img): if accimage is not None: return isinstance(img, (Image.Image, accimage.Image)) else: return isinstance(img, Image.Image) def normalize(tensor, mean, std): """Normalize a ``torch.tensor`` Args: tensor (torch.tensor): tensor to be normalized. mean: (list): the mean of BGR std: (list): the std of BGR Returns: Tensor: Normalized tensor. """ for t, m, s in zip(tensor, mean, std): t.sub_(m).div_(s) return tensor def to_tensor(pic): """Convert a ``numpy.ndarray`` to tensor. See ``ToTensor`` for more details. Args: pic (numpy.ndarray): Image to be converted to tensor. Returns: Tensor: Converted image. """ img = torch.from_numpy(pic.transpose((2, 0, 1))) return img.float() def resize(img, mask, kpt, ratio): """Resize the ``numpy.ndarray`` and points as ratio. Args: img (numpy.ndarray): Image to be resized. mask (numpy.ndarray): Mask to be resized. kpt (list): Keypoints to be resized. ratio (tuple or number): the ratio to resize. Returns: numpy.ndarray: Resized image. numpy.ndarray: Resized mask. lists: Resized keypoints. """ if not (isinstance(ratio, numbers.Number) or (isinstance(ratio, collections.Iterable) and len(ratio) == 2)): raise TypeError('Got inappropriate ratio arg: {}'.format(ratio)) h, w, _ = img.shape if w < 64: img = cv2.copyMakeBorder(img, 0, 0, 0, 64 - w, cv2.BORDER_CONSTANT, value=(128, 128, 128)) mask = cv2.copyMakeBorder(mask, 0, 0, 0, 64 - w, cv2.BORDER_CONSTANT, value=(1, 1, 1)) w = 64 if isinstance(ratio, numbers.Number): length = len(kpt) for j in range(length): kpt[j][0] *= ratio kpt[j][1] *= ratio kpt[j][2] *= ratio return cv2.resize(img, (0, 0), fx=ratio, fy=ratio), cv2.resize(mask, (0, 0), fx=ratio, fy=ratio), kpt else: length = len(kpt) for j in range(length): kpt[j][0] *= ratio[0] kpt[j][1] *= ratio[1] kpt[j][2] *= ratio[2] return np.ascontiguousarray(cv2.resize(img, (0, 0), fx=ratio[0], fy=ratio[1])), np.ascontiguousarray( cv2.resize(mask, (0, 0), fx=ratio[0], fy=ratio[1])), kpt def rotate(img, mask, kpt, degree): """Rotate the ``numpy.ndarray`` and points as degree. Args: img (numpy.ndarray): Image to be rotated. mask (numpy.ndarray): Mask to be rotated. kpt (list): Keypoints to be rotated. degree (number): the degree to rotate. Returns: numpy.ndarray: Resized image. numpy.ndarray: Resized mask. list: Resized keypoints. """ height, width, _ = img.shape img_center = (width / 2.0, height / 2.0) rotateMat = cv2.getRotationMatrix2D(img_center, degree, 1.0) cos_val = np.abs(rotateMat[0, 0]) sin_val = np.abs(rotateMat[0, 1]) new_width = int(height * sin_val + width * cos_val) new_height = int(height * cos_val + width * sin_val) rotateMat[0, 2] += (new_width / 2.) - img_center[0] rotateMat[1, 2] += (new_height / 2.) - img_center[1] img = cv2.warpAffine(img, rotateMat, (new_width, new_height), borderValue=(128, 128, 128)) mask = cv2.warpAffine(mask, rotateMat, (new_width, new_height), borderValue=(1, 1, 1)) length = len(kpt) for j in range(length): x = kpt[j][0] y = kpt[j][1] p = np.array([x, y, 1]) p = rotateMat.dot(p) kpt[j][0] = p[0] kpt[j][1] = p[1] return np.ascontiguousarray(img), np.ascontiguousarray(mask), kpt def adjust_brightness(img, brightness_factor): """Adjust brightness of an Image. Args: img (PIL Image): PIL Image to be adjusted. brightness_factor (float): How much to adjust the brightness. Can be any non negative number. 0 gives a black image, 1 gives the original image while 2 increases the brightness by a factor of 2. Returns: PIL Image: Brightness adjusted image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) enhancer = ImageEnhance.Brightness(img) img = enhancer.enhance(brightness_factor) return img def adjust_contrast(img, contrast_factor): """Adjust contrast of an Image. Args: img (PIL Image): PIL Image to be adjusted. contrast_factor (float): How much to adjust the contrast. Can be any non negative number. 0 gives a solid gray image, 1 gives the original image while 2 increases the contrast by a factor of 2. Returns: PIL Image: Contrast adjusted image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) enhancer = ImageEnhance.Contrast(img) img = enhancer.enhance(contrast_factor) return img def adjust_saturation(img, saturation_factor): """Adjust color saturation of an image. Args: img (PIL Image): PIL Image to be adjusted. saturation_factor (float): How much to adjust the saturation. 0 will give a black and white image, 1 will give the original image while 2 will enhance the saturation by a factor of 2. Returns: PIL Image: Saturation adjusted image. """ if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) enhancer = ImageEnhance.Color(img) img = enhancer.enhance(saturation_factor) return img def adjust_hue(img, hue_factor): """Adjust hue of an image. The image hue is adjusted by converting the image to HSV and cyclically shifting the intensities in the hue channel (H). The image is then converted back to original image mode. `hue_factor` is the amount of shift in H channel and must be in the interval `[-0.5, 0.5]`. See https://en.wikipedia.org/wiki/Hue for more details on Hue. Args: img (PIL Image): PIL Image to be adjusted. hue_factor (float): How much to shift the hue channel. Should be in [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in HSV space in positive and negative direction respectively. 0 means no shift. Therefore, both -0.5 and 0.5 will give an image with complementary colors while 0 gives the original image. Returns: PIL Image: Hue adjusted image. """ if not (-0.5 <= hue_factor <= 0.5): raise ValueError('hue_factor is not in [-0.5, 0.5].'.format(hue_factor)) if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) input_mode = img.mode if input_mode in {'L', '1', 'I', 'F'}: return img h, s, v = img.convert('HSV').split() np_h = np.array(h, dtype=np.uint8) # uint8 addition take cares of rotation across boundaries with np.errstate(over='ignore'): np_h += np.uint8(hue_factor * 255) h = Image.fromarray(np_h, 'L') img = Image.merge('HSV', (h, s, v)).convert(input_mode) return img def hflip(img, mask, kpt): height, width, _ = img.shape mask = mask.reshape((height, width, 1)) img = img[:, ::-1, :] mask = mask[:, ::-1, :] length = len(kpt) for j in range(length): # if kpt[j][2] > 0: kpt[j][0] = width - 1 - kpt[j][0] if length == 17: swap_pair = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] if length == 13: swap_pair = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12] ] if length == 68: swap_pair = [[0, 16], [1, 15], [2, 14], [3, 13], [4, 12], [5, 11], [6, 10], [7, 9], [17, 26], [18, 25], [19, 24], [20, 23], [21, 22], [31, 35], [32, 34], [36, 45], [37, 44], [38, 43], [39, 42], [40, 47], [41, 46], [48, 54], [49, 53], [50, 52], [59, 55], [60, 64], [61, 63], [58, 56], [67, 65], ] if length == 5: swap_pair = [[0, 1], [3, 4]] if length == 10: swap_pair = [[0, 1], [3, 4] , [5, 9], [6, 8]] for x in swap_pair: temp_point0 = kpt[x[0]].copy() temp_point1 = kpt[x[1]].copy() kpt[x[0]] = temp_point1 kpt[x[1]] = temp_point0 return np.ascontiguousarray(img), np.ascontiguousarray(mask), kpt def crop(img, mask, kpt, offset_left, offset_up, w, h): length = len(kpt) for y in range(length): kpt[y][0] -= offset_left kpt[y][1] -= offset_up height, width, _ = img.shape mask = mask.reshape((height, width)) new_img = np.empty((h, w, 3), dtype=img.dtype) new_img.fill(128) new_mask = np.empty((h, w), dtype=mask.dtype) new_mask.fill(1) st_x = 0 ed_x = w st_y = 0 ed_y = h or_st_x = offset_left or_ed_x = offset_left + w or_st_y = offset_up or_ed_y = offset_up + h if offset_left < 0: st_x = -offset_left or_st_x = 0 if offset_left + w > width: ed_x = width - offset_left or_ed_x = width if offset_up < 0: st_y = -offset_up or_st_y = 0 if offset_up + h > height: ed_y = height - offset_up or_ed_y = height new_img[st_y: ed_y, st_x: ed_x, :] = img[or_st_y: or_ed_y, or_st_x: or_ed_x, :].copy() new_mask[st_y: ed_y, st_x: ed_x] = mask[or_st_y: or_ed_y, or_st_x: or_ed_x].copy() return np.ascontiguousarray(new_img), np.ascontiguousarray(new_mask), kpt class RandomResized(object): """Resize the given numpy.ndarray to random size and aspect ratio. Args: scale_min: the min scale to resize. scale_max: the max scale to resize. """ def __init__(self, targetsize=256 , scale_min=0.5, scale_max=1.1): self.scale_min = scale_min self.scale_max = scale_max self.targetsize = targetsize @staticmethod def get_params(img, targetsize ,scale_min, scale_max): height, width, _ = img.shape ratio = random.uniform(scale_min, scale_max) ratio = (float(targetsize) / max(height,width))*ratio return ratio def __call__(self, img, mask, kpt): """ Args: img (numpy.ndarray): Image to be resized. mask (numpy.ndarray): Mask to be resized. kpt (list): keypoints to be resized. Returns: numpy.ndarray: Randomly resize image. numpy.ndarray: Randomly resize mask. list: Randomly resize keypoints. """ ratio = self.get_params(img, self.targetsize ,self.scale_min, self.scale_max) return resize(img, mask, kpt, ratio) class TestResized(object): """Resize the given numpy.ndarray to the size for test. Args: size: the size to resize. """ def __init__(self, size): assert (isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2)) if isinstance(size, int): self.size = (size, size) else: self.size = size @staticmethod def get_params(img, output_size): height, width, _ = img.shape radiow = output_size[0] * 1.0 / width radioh = output_size[1] * 1.0 / height return min(radioh, radiow) def __call__(self, img, mask, kpt): """ Args: img (numpy.ndarray): Image to be resized. mask (numpy.ndarray): Mask to be resized. kpt (list): keypoints to be resized. Returns: numpy.ndarray: Randomly resize image. numpy.ndarray: Randomly resize mask. list: Randomly resize keypoints. """ ratio = self.get_params(img, self.size) height, width, c = img.shape if height > width: newimg = np.zeros((height, height, c), img.dtype) start = int((height - width) / 2) newimg[:, start:start + width, :] = img else: newimg = np.zeros((width, width, c), img.dtype) start = int((width - height) / 2) newimg[start:start + height, :, :] = img if newimg.shape[0] < 64: ratio = ratio / 64.0 * newimg.shape[0] newimg = cv2.resize(newimg, (64, 64)) return resize(newimg, mask, kpt, ratio) class RandomRotate(object): """Rotate the input numpy.ndarray and points to the given degree. Args: degree (number): Desired rotate degree. """ def __init__(self, max_degree): assert isinstance(max_degree, numbers.Number) self.max_degree = max_degree @staticmethod def get_params(max_degree): """Get parameters for ``rotate`` for a random rotate. Returns: number: degree to be passed to ``rotate`` for random rotate. """ degree = random.uniform(-max_degree, max_degree) return degree def __call__(self, img, mask, kpt): """ Args: img (numpy.ndarray): Image to be rotated. mask (numpy.ndarray): Mask to be rotated. kpt (list): Keypoints to be rotated. Returns: numpy.ndarray: Rotated image. list: Rotated key points. """ degree = self.get_params(self.max_degree) return rotate(img, mask, kpt, degree) class ColorJitter(object): """Randomly change the brightness, contrast and saturation of an image. Args: brightness (float): How much to jitter brightness. brightness_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]. contrast (float): How much to jitter contrast. contrast_factor is chosen uniformly from [max(0, 1 - contrast), 1 + contrast]. saturation (float): How much to jitter saturation. saturation_factor is chosen uniformly from [max(0, 1 - saturation), 1 + saturation]. hue(float): How much to jitter hue. hue_factor is chosen uniformly from [-hue, hue]. Should be >=0 and <= 0.5. """ def __init__(self, brightness=0, contrast=0, saturation=0, hue=0): self.brightness = brightness self.contrast = contrast self.saturation = saturation self.hue = hue @staticmethod def get_params(brightness, contrast, saturation, hue): if brightness > 0: brightness_factor = random.uniform(max(0, 1 - brightness), 1 + brightness) if contrast > 0: contrast_factor = random.uniform(max(0, 1 - contrast), 1 + contrast) if saturation > 0: saturation_factor = random.uniform(max(0, 1 - saturation), 1 + saturation) if hue > 0: hue_factor = random.uniform(-hue, hue) return brightness_factor, contrast, saturation, hue def __call__(self, img, mask, kpt): brightness_factor, contrast, saturation, hue = self.get_params(self.brightness, self.contrast, self.saturation, self.hue) img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) img = adjust_brightness(img, brightness_factor) img = adjust_contrast(img, contrast) img = adjust_saturation(img, saturation) img = adjust_hue(img, hue) img = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) return img, mask, kpt class RandomCrop(object): """Crop the given numpy.ndarray and at a random location. Args: size (int): Desired output size of the crop. """ def __init__(self, size, center_perturb_max=40): assert isinstance(size, numbers.Number) self.size = (int(size), int(size)) # (w, h) self.center_perturb_max = center_perturb_max @staticmethod def get_params(img, output_size, center_perturb_max): """Get parameters for ``crop`` for a random crop. Args: img (numpy.ndarray): Image to be cropped. output_size (tuple): Expected output size of the crop. Returns: tuple: params (i, j, h, w) to be passed to ``crop`` for random crop. """ ratio_x = random.uniform(0, 1) ratio_y = random.uniform(0, 1) x_offset = int((ratio_x - 0.5) * 2 * center_perturb_max) y_offset = int((ratio_y - 0.5) * 2 * center_perturb_max) center_x = img.shape[1] / 2.0 + x_offset center_y = img.shape[0] / 2.0 + y_offset return int(round(center_x - output_size[0] / 2)), int(round(center_y - output_size[1] / 2)) def __call__(self, img, mask, kpt): """ Args: img (numpy.ndarray): Image to be cropped. mask (numpy.ndarray): Mask to be cropped. kpt (list): keypoints to be cropped. Returns: numpy.ndarray: Cropped image. numpy.ndarray: Cropped mask. list: Cropped keypoints. """ offset_left, offset_up = self.get_params(img, self.size, self.center_perturb_max) return crop(img, mask, kpt, offset_left, offset_up, self.size[0], self.size[1]) class RandomNoise(object): def __init__(self, meanscale=0.3, samplescale=0.3): self.meanscale = meanscale self.samplescale = samplescale @staticmethod def get_params(img, meanscale, samplescale): meanscale = random.uniform(0, 1) * meanscale samplescale = random.uniform(0, 1) * samplescale meanrandom = np.random.random(img.shape) samplerandom = np.random.random(img.shape) imagemean = np.mean(img) meanrandom = (meanrandom - 0.5) * (meanscale * imagemean) img = img.astype("float32") img = ((samplerandom - 0.5) * (2 * samplescale) + 1) * img img = img + meanrandom return img def __call__(self, img, mask, kpt): img = self.get_params(img, self.meanscale, self.samplescale) return img, mask, kpt class RandomHorizontalFlip(object): """Random horizontal flip the image. Args: prob (number): the probability to flip. """ def __init__(self, prob=0.5): self.prob = prob def __call__(self, img, mask, kpt): """ Args: img (numpy.ndarray): Image to be flipped. mask (numpy.ndarray): Mask to be flipped. kpt (list): Keypoints to be flipped. Returns: numpy.ndarray: Randomly flipped image. list: Randomly flipped points. """ if random.random() < self.prob: return hflip(img, mask, kpt) return img, mask, kpt class Compose(object): """Composes several imgtransforms together. Args: imgtransforms (list of ``Transform`` objects): list of transforms to compose. Example: >>> imgtransforms.Compose([ >>> imgtransforms.CenterCrop(10), >>> imgtransforms.ToTensor(), >>> ]) """ def __init__(self, imgtransforms): self.transforms = imgtransforms def __call__(self, img, mask, kpt): for t in self.transforms: if isinstance(t, RandomResized): img, mask, kpt = t(img, mask, kpt) else: img, mask, kpt = t(img, mask, kpt) return img, mask, kpt