witnn/tools/imgtransforms.py

643 lines
20 KiB
Python
Executable File

from __future__ import division
import torch
import math
import random
import numpy as np
import numbers
import types
import collections
import warnings
import cv2
from PIL import Image, ImageOps, ImageEnhance, PILLOW_VERSION
try:
import accimage
except ImportError:
accimage = None
def _is_pil_image(img):
if accimage is not None:
return isinstance(img, (Image.Image, accimage.Image))
else:
return isinstance(img, Image.Image)
def normalize(tensor, mean, std):
"""Normalize a ``torch.tensor``
Args:
tensor (torch.tensor): tensor to be normalized.
mean: (list): the mean of BGR
std: (list): the std of BGR
Returns:
Tensor: Normalized tensor.
"""
for t, m, s in zip(tensor, mean, std):
t.sub_(m).div_(s)
return tensor
def to_tensor(pic):
"""Convert a ``numpy.ndarray`` to tensor.
See ``ToTensor`` for more details.
Args:
pic (numpy.ndarray): Image to be converted to tensor.
Returns:
Tensor: Converted image.
"""
img = torch.from_numpy(pic.transpose((2, 0, 1)))
return img.float()
def resize(img, mask, kpt, ratio):
"""Resize the ``numpy.ndarray`` and points as ratio.
Args:
img (numpy.ndarray): Image to be resized.
mask (numpy.ndarray): Mask to be resized.
kpt (list): Keypoints to be resized.
ratio (tuple or number): the ratio to resize.
Returns:
numpy.ndarray: Resized image.
numpy.ndarray: Resized mask.
lists: Resized keypoints.
"""
if not (isinstance(ratio, numbers.Number) or (isinstance(ratio, collections.Iterable) and len(ratio) == 2)):
raise TypeError('Got inappropriate ratio arg: {}'.format(ratio))
h, w, _ = img.shape
if w < 64:
img = cv2.copyMakeBorder(img, 0, 0, 0, 64 - w, cv2.BORDER_CONSTANT, value=(128, 128, 128))
mask = cv2.copyMakeBorder(mask, 0, 0, 0, 64 - w, cv2.BORDER_CONSTANT, value=(1, 1, 1))
w = 64
if isinstance(ratio, numbers.Number):
length = len(kpt)
for j in range(length):
kpt[j][0] *= ratio
kpt[j][1] *= ratio
kpt[j][2] *= ratio
return cv2.resize(img, (0, 0), fx=ratio, fy=ratio), cv2.resize(mask, (0, 0), fx=ratio, fy=ratio), kpt
else:
length = len(kpt)
for j in range(length):
kpt[j][0] *= ratio[0]
kpt[j][1] *= ratio[1]
kpt[j][2] *= ratio[2]
return np.ascontiguousarray(cv2.resize(img, (0, 0), fx=ratio[0], fy=ratio[1])), np.ascontiguousarray(
cv2.resize(mask, (0, 0), fx=ratio[0], fy=ratio[1])), kpt
def rotate(img, mask, kpt, degree):
"""Rotate the ``numpy.ndarray`` and points as degree.
Args:
img (numpy.ndarray): Image to be rotated.
mask (numpy.ndarray): Mask to be rotated.
kpt (list): Keypoints to be rotated.
degree (number): the degree to rotate.
Returns:
numpy.ndarray: Resized image.
numpy.ndarray: Resized mask.
list: Resized keypoints.
"""
height, width, _ = img.shape
img_center = (width / 2.0, height / 2.0)
rotateMat = cv2.getRotationMatrix2D(img_center, degree, 1.0)
cos_val = np.abs(rotateMat[0, 0])
sin_val = np.abs(rotateMat[0, 1])
new_width = int(height * sin_val + width * cos_val)
new_height = int(height * cos_val + width * sin_val)
rotateMat[0, 2] += (new_width / 2.) - img_center[0]
rotateMat[1, 2] += (new_height / 2.) - img_center[1]
img = cv2.warpAffine(img, rotateMat, (new_width, new_height), borderValue=(128, 128, 128))
mask = cv2.warpAffine(mask, rotateMat, (new_width, new_height), borderValue=(1, 1, 1))
length = len(kpt)
for j in range(length):
x = kpt[j][0]
y = kpt[j][1]
p = np.array([x, y, 1])
p = rotateMat.dot(p)
kpt[j][0] = p[0]
kpt[j][1] = p[1]
return np.ascontiguousarray(img), np.ascontiguousarray(mask), kpt
def adjust_brightness(img, brightness_factor):
"""Adjust brightness of an Image.
Args:
img (PIL Image): PIL Image to be adjusted.
brightness_factor (float): How much to adjust the brightness. Can be
any non negative number. 0 gives a black image, 1 gives the
original image while 2 increases the brightness by a factor of 2.
Returns:
PIL Image: Brightness adjusted image.
"""
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
enhancer = ImageEnhance.Brightness(img)
img = enhancer.enhance(brightness_factor)
return img
def adjust_contrast(img, contrast_factor):
"""Adjust contrast of an Image.
Args:
img (PIL Image): PIL Image to be adjusted.
contrast_factor (float): How much to adjust the contrast. Can be any
non negative number. 0 gives a solid gray image, 1 gives the
original image while 2 increases the contrast by a factor of 2.
Returns:
PIL Image: Contrast adjusted image.
"""
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
enhancer = ImageEnhance.Contrast(img)
img = enhancer.enhance(contrast_factor)
return img
def adjust_saturation(img, saturation_factor):
"""Adjust color saturation of an image.
Args:
img (PIL Image): PIL Image to be adjusted.
saturation_factor (float): How much to adjust the saturation. 0 will
give a black and white image, 1 will give the original image while
2 will enhance the saturation by a factor of 2.
Returns:
PIL Image: Saturation adjusted image.
"""
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
enhancer = ImageEnhance.Color(img)
img = enhancer.enhance(saturation_factor)
return img
def adjust_hue(img, hue_factor):
"""Adjust hue of an image.
The image hue is adjusted by converting the image to HSV and
cyclically shifting the intensities in the hue channel (H).
The image is then converted back to original image mode.
`hue_factor` is the amount of shift in H channel and must be in the
interval `[-0.5, 0.5]`.
See https://en.wikipedia.org/wiki/Hue for more details on Hue.
Args:
img (PIL Image): PIL Image to be adjusted.
hue_factor (float): How much to shift the hue channel. Should be in
[-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in
HSV space in positive and negative direction respectively.
0 means no shift. Therefore, both -0.5 and 0.5 will give an image
with complementary colors while 0 gives the original image.
Returns:
PIL Image: Hue adjusted image.
"""
if not (-0.5 <= hue_factor <= 0.5):
raise ValueError('hue_factor is not in [-0.5, 0.5].'.format(hue_factor))
if not _is_pil_image(img):
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
input_mode = img.mode
if input_mode in {'L', '1', 'I', 'F'}:
return img
h, s, v = img.convert('HSV').split()
np_h = np.array(h, dtype=np.uint8)
# uint8 addition take cares of rotation across boundaries
with np.errstate(over='ignore'):
np_h += np.uint8(hue_factor * 255)
h = Image.fromarray(np_h, 'L')
img = Image.merge('HSV', (h, s, v)).convert(input_mode)
return img
def hflip(img, mask, kpt):
height, width, _ = img.shape
mask = mask.reshape((height, width, 1))
img = img[:, ::-1, :]
mask = mask[:, ::-1, :]
length = len(kpt)
for j in range(length):
# if kpt[j][2] > 0:
kpt[j][0] = width - 1 - kpt[j][0]
if length == 17:
swap_pair = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]
if length == 13:
swap_pair = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12] ]
if length == 68:
swap_pair = [[0, 16], [1, 15], [2, 14], [3, 13], [4, 12], [5, 11], [6, 10], [7, 9],
[17, 26], [18, 25], [19, 24], [20, 23], [21, 22],
[31, 35], [32, 34],
[36, 45], [37, 44], [38, 43], [39, 42], [40, 47], [41, 46],
[48, 54], [49, 53], [50, 52], [59, 55], [60, 64], [61, 63], [58, 56], [67, 65],
]
if length == 5:
swap_pair = [[0, 1], [3, 4]]
if length == 10:
swap_pair = [[0, 1], [3, 4] , [5, 9], [6, 8]]
for x in swap_pair:
temp_point0 = kpt[x[0]].copy()
temp_point1 = kpt[x[1]].copy()
kpt[x[0]] = temp_point1
kpt[x[1]] = temp_point0
return np.ascontiguousarray(img), np.ascontiguousarray(mask), kpt
def crop(img, mask, kpt, offset_left, offset_up, w, h):
length = len(kpt)
for y in range(length):
kpt[y][0] -= offset_left
kpt[y][1] -= offset_up
height, width, _ = img.shape
mask = mask.reshape((height, width))
new_img = np.empty((h, w, 3), dtype=img.dtype)
new_img.fill(128)
new_mask = np.empty((h, w), dtype=mask.dtype)
new_mask.fill(1)
st_x = 0
ed_x = w
st_y = 0
ed_y = h
or_st_x = offset_left
or_ed_x = offset_left + w
or_st_y = offset_up
or_ed_y = offset_up + h
if offset_left < 0:
st_x = -offset_left
or_st_x = 0
if offset_left + w > width:
ed_x = width - offset_left
or_ed_x = width
if offset_up < 0:
st_y = -offset_up
or_st_y = 0
if offset_up + h > height:
ed_y = height - offset_up
or_ed_y = height
new_img[st_y: ed_y, st_x: ed_x, :] = img[or_st_y: or_ed_y, or_st_x: or_ed_x, :].copy()
new_mask[st_y: ed_y, st_x: ed_x] = mask[or_st_y: or_ed_y, or_st_x: or_ed_x].copy()
return np.ascontiguousarray(new_img), np.ascontiguousarray(new_mask), kpt
class RandomResized(object):
"""Resize the given numpy.ndarray to random size and aspect ratio.
Args:
scale_min: the min scale to resize.
scale_max: the max scale to resize.
"""
def __init__(self, targetsize=256 , scale_min=0.5, scale_max=1.1):
self.scale_min = scale_min
self.scale_max = scale_max
self.targetsize = targetsize
@staticmethod
def get_params(img, targetsize ,scale_min, scale_max):
height, width, _ = img.shape
ratio = random.uniform(scale_min, scale_max)
ratio = (float(targetsize) / max(height,width))*ratio
return ratio
def __call__(self, img, mask, kpt):
"""
Args:
img (numpy.ndarray): Image to be resized.
mask (numpy.ndarray): Mask to be resized.
kpt (list): keypoints to be resized.
Returns:
numpy.ndarray: Randomly resize image.
numpy.ndarray: Randomly resize mask.
list: Randomly resize keypoints.
"""
ratio = self.get_params(img, self.targetsize ,self.scale_min, self.scale_max)
return resize(img, mask, kpt, ratio)
class TestResized(object):
"""Resize the given numpy.ndarray to the size for test.
Args:
size: the size to resize.
"""
def __init__(self, size):
assert (isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2))
if isinstance(size, int):
self.size = (size, size)
else:
self.size = size
@staticmethod
def get_params(img, output_size):
height, width, _ = img.shape
radiow = output_size[0] * 1.0 / width
radioh = output_size[1] * 1.0 / height
return min(radioh, radiow)
def __call__(self, img, mask, kpt):
"""
Args:
img (numpy.ndarray): Image to be resized.
mask (numpy.ndarray): Mask to be resized.
kpt (list): keypoints to be resized.
Returns:
numpy.ndarray: Randomly resize image.
numpy.ndarray: Randomly resize mask.
list: Randomly resize keypoints.
"""
ratio = self.get_params(img, self.size)
height, width, c = img.shape
if height > width:
newimg = np.zeros((height, height, c), img.dtype)
start = int((height - width) / 2)
newimg[:, start:start + width, :] = img
else:
newimg = np.zeros((width, width, c), img.dtype)
start = int((width - height) / 2)
newimg[start:start + height, :, :] = img
if newimg.shape[0] < 64:
ratio = ratio / 64.0 * newimg.shape[0]
newimg = cv2.resize(newimg, (64, 64))
return resize(newimg, mask, kpt, ratio)
class RandomRotate(object):
"""Rotate the input numpy.ndarray and points to the given degree.
Args:
degree (number): Desired rotate degree.
"""
def __init__(self, max_degree):
assert isinstance(max_degree, numbers.Number)
self.max_degree = max_degree
@staticmethod
def get_params(max_degree):
"""Get parameters for ``rotate`` for a random rotate.
Returns:
number: degree to be passed to ``rotate`` for random rotate.
"""
degree = random.uniform(-max_degree, max_degree)
return degree
def __call__(self, img, mask, kpt):
"""
Args:
img (numpy.ndarray): Image to be rotated.
mask (numpy.ndarray): Mask to be rotated.
kpt (list): Keypoints to be rotated.
Returns:
numpy.ndarray: Rotated image.
list: Rotated key points.
"""
degree = self.get_params(self.max_degree)
return rotate(img, mask, kpt, degree)
class ColorJitter(object):
"""Randomly change the brightness, contrast and saturation of an image.
Args:
brightness (float): How much to jitter brightness. brightness_factor
is chosen uniformly from [max(0, 1 - brightness), 1 + brightness].
contrast (float): How much to jitter contrast. contrast_factor
is chosen uniformly from [max(0, 1 - contrast), 1 + contrast].
saturation (float): How much to jitter saturation. saturation_factor
is chosen uniformly from [max(0, 1 - saturation), 1 + saturation].
hue(float): How much to jitter hue. hue_factor is chosen uniformly from
[-hue, hue]. Should be >=0 and <= 0.5.
"""
def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
self.brightness = brightness
self.contrast = contrast
self.saturation = saturation
self.hue = hue
@staticmethod
def get_params(brightness, contrast, saturation, hue):
if brightness > 0:
brightness_factor = random.uniform(max(0, 1 - brightness), 1 + brightness)
if contrast > 0:
contrast_factor = random.uniform(max(0, 1 - contrast), 1 + contrast)
if saturation > 0:
saturation_factor = random.uniform(max(0, 1 - saturation), 1 + saturation)
if hue > 0:
hue_factor = random.uniform(-hue, hue)
return brightness_factor, contrast, saturation, hue
def __call__(self, img, mask, kpt):
brightness_factor, contrast, saturation, hue = self.get_params(self.brightness, self.contrast, self.saturation,
self.hue)
img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
img = adjust_brightness(img, brightness_factor)
img = adjust_contrast(img, contrast)
img = adjust_saturation(img, saturation)
img = adjust_hue(img, hue)
img = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)
return img, mask, kpt
class RandomCrop(object):
"""Crop the given numpy.ndarray and at a random location.
Args:
size (int): Desired output size of the crop.
"""
def __init__(self, size, center_perturb_max=40):
assert isinstance(size, numbers.Number)
self.size = (int(size), int(size)) # (w, h)
self.center_perturb_max = center_perturb_max
@staticmethod
def get_params(img, output_size, center_perturb_max):
"""Get parameters for ``crop`` for a random crop.
Args:
img (numpy.ndarray): Image to be cropped.
output_size (tuple): Expected output size of the crop.
Returns:
tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
"""
ratio_x = random.uniform(0, 1)
ratio_y = random.uniform(0, 1)
x_offset = int((ratio_x - 0.5) * 2 * center_perturb_max)
y_offset = int((ratio_y - 0.5) * 2 * center_perturb_max)
center_x = img.shape[1] / 2.0 + x_offset
center_y = img.shape[0] / 2.0 + y_offset
return int(round(center_x - output_size[0] / 2)), int(round(center_y - output_size[1] / 2))
def __call__(self, img, mask, kpt):
"""
Args:
img (numpy.ndarray): Image to be cropped.
mask (numpy.ndarray): Mask to be cropped.
kpt (list): keypoints to be cropped.
Returns:
numpy.ndarray: Cropped image.
numpy.ndarray: Cropped mask.
list: Cropped keypoints.
"""
offset_left, offset_up = self.get_params(img, self.size, self.center_perturb_max)
return crop(img, mask, kpt, offset_left, offset_up, self.size[0], self.size[1])
class RandomNoise(object):
def __init__(self, meanscale=0.3, samplescale=0.3):
self.meanscale = meanscale
self.samplescale = samplescale
@staticmethod
def get_params(img, meanscale, samplescale):
meanscale = random.uniform(0, 1) * meanscale
samplescale = random.uniform(0, 1) * samplescale
meanrandom = np.random.random(img.shape)
samplerandom = np.random.random(img.shape)
imagemean = np.mean(img)
meanrandom = (meanrandom - 0.5) * (meanscale * imagemean)
img = img.astype("float32")
img = ((samplerandom - 0.5) * (2 * samplescale) + 1) * img
img = img + meanrandom
return img
def __call__(self, img, mask, kpt):
img = self.get_params(img, self.meanscale, self.samplescale)
return img, mask, kpt
class RandomHorizontalFlip(object):
"""Random horizontal flip the image.
Args:
prob (number): the probability to flip.
"""
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, img, mask, kpt):
"""
Args:
img (numpy.ndarray): Image to be flipped.
mask (numpy.ndarray): Mask to be flipped.
kpt (list): Keypoints to be flipped.
Returns:
numpy.ndarray: Randomly flipped image.
list: Randomly flipped points.
"""
if random.random() < self.prob:
return hflip(img, mask, kpt)
return img, mask, kpt
class Compose(object):
"""Composes several imgtransforms together.
Args:
imgtransforms (list of ``Transform`` objects): list of transforms to compose.
Example:
>>> imgtransforms.Compose([
>>> imgtransforms.CenterCrop(10),
>>> imgtransforms.ToTensor(),
>>> ])
"""
def __init__(self, imgtransforms):
self.transforms = imgtransforms
def __call__(self, img, mask, kpt):
for t in self.transforms:
if isinstance(t, RandomResized):
img, mask, kpt = t(img, mask, kpt)
else:
img, mask, kpt = t(img, mask, kpt)
return img, mask, kpt