from __future__ import division
from functools import wraps
import random
from warnings import warn
import cv2
import numpy as np
from scipy.ndimage.filters import gaussian_filter
from albumentations.augmentations.bbox_utils import denormalize_bbox, normalize_bbox
MAX_VALUES_BY_DTYPE = {
np.dtype('uint8'): 255,
np.dtype('uint16'): 65535,
np.dtype('uint32'): 4294967295,
np.dtype('float32'): 1.0,
}
def clip(img, dtype, maxval):
return np.clip(img, 0, maxval).astype(dtype)
def clipped(func):
@wraps(func)
def wrapped_function(img, *args, **kwargs):
dtype = img.dtype
maxval = MAX_VALUES_BY_DTYPE.get(dtype, 1.0)
return clip(func(img, *args, **kwargs), dtype, maxval)
return wrapped_function
[docs]def preserve_shape(func):
"""Preserve shape of the image.
"""
@wraps(func)
def wrapped_function(img, *args, **kwargs):
shape = img.shape
result = func(img, *args, **kwargs)
result = result.reshape(shape)
return result
return wrapped_function
[docs]def preserve_channel_dim(func):
"""Preserve dummy channel dim.
"""
@wraps(func)
def wrapped_function(img, *args, **kwargs):
shape = img.shape
result = func(img, *args, **kwargs)
if len(shape) == 3 and shape[-1] == 1 and len(result.shape) == 2:
result = np.expand_dims(result, axis=-1)
return result
return wrapped_function
def vflip(img):
return np.ascontiguousarray(img[::-1, ...])
def hflip(img):
return np.ascontiguousarray(img[:, ::-1, ...])
@preserve_shape
def random_flip(img, code):
return cv2.flip(img, code)
def transpose(img):
return img.transpose(1, 0, 2) if len(img.shape) > 2 else img.transpose(1, 0)
def rot90(img, factor):
img = np.rot90(img, factor)
return np.ascontiguousarray(img)
def normalize(img, mean, std, max_pixel_value=255.0):
mean = np.array(mean, dtype=np.float32)
mean *= max_pixel_value
std = np.array(std, dtype=np.float32)
std *= max_pixel_value
denominator = np.reciprocal(std, dtype=np.float32)
img = img.astype(np.float32)
img -= mean
img *= denominator
return img
def cutout(img, num_holes, max_h_size, max_w_size):
# Make a copy of the input image since we don't want to modify it directly
img = img.copy()
height, width = img.shape[:2]
for n in range(num_holes):
y = random.randint(0, height)
x = random.randint(0, width)
y1 = np.clip(y - max_h_size // 2, 0, height)
y2 = np.clip(y + max_h_size // 2, 0, height)
x1 = np.clip(x - max_w_size // 2, 0, width)
x2 = np.clip(x + max_w_size // 2, 0, width)
img[y1: y2, x1: x2] = 0
return img
@preserve_channel_dim
def rotate(img, angle, interpolation=cv2.INTER_LINEAR, border_mode=cv2.BORDER_REFLECT_101):
height, width = img.shape[:2]
matrix = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1.0)
img = cv2.warpAffine(img, matrix, (width, height), flags=interpolation, borderMode=border_mode)
return img
@preserve_channel_dim
def scale(img, scale, interpolation=cv2.INTER_LINEAR):
height, width = img.shape[:2]
new_height, new_width = int(height * scale), int(width * scale)
img = cv2.resize(img, (new_width, new_height), interpolation=interpolation)
return img
@preserve_channel_dim
def resize(img, height, width, interpolation=cv2.INTER_LINEAR):
img = cv2.resize(img, (width, height), interpolation=interpolation)
return img
@preserve_channel_dim
def shift_scale_rotate(img, angle, scale, dx, dy, interpolation=cv2.INTER_LINEAR, border_mode=cv2.BORDER_REFLECT_101):
height, width = img.shape[:2]
center = (width / 2, height / 2)
matrix = cv2.getRotationMatrix2D(center, angle, scale)
matrix[0, 2] += dx * width
matrix[1, 2] += dy * height
img = cv2.warpAffine(img, matrix, (width, height), flags=interpolation, borderMode=border_mode)
return img
def crop(img, x_min, y_min, x_max, y_max):
height, width = img.shape[:2]
if x_max <= x_min or y_max <= y_min:
raise ValueError(
'We should have x_min < x_max and y_min < y_max. But we got'
' (x_min = {x_min}, y_min = {y_min}, x_max = {x_max}, y_max = {y_max})'.format(
x_min=x_min,
x_max=x_max,
y_min=y_min,
y_max=y_max
)
)
if x_min < 0 or x_max > width or y_min < 0 or y_max > height:
raise ValueError(
'Values for crop should be non negative and equal or smaller than image sizes'
'(x_min = {x_min}, y_min = {y_min}, x_max = {x_max}, y_max = {y_max}'
'height = {height}, width = {width})'.format(
x_min=x_min,
x_max=x_max,
y_min=y_min,
y_max=y_max,
height=height,
width=width
)
)
return img[y_min:y_max, x_min:x_max]
def get_center_crop_coords(height, width, crop_height, crop_width):
y1 = (height - crop_height) // 2
y2 = y1 + crop_height
x1 = (width - crop_width) // 2
x2 = x1 + crop_width
return x1, y1, x2, y2
def center_crop(img, crop_height, crop_width):
height, width = img.shape[:2]
if height < crop_height or width < crop_width:
raise ValueError(
'Requested crop size ({crop_height}, {crop_width}) is '
'larger than the image size ({height}, {width})'.format(
crop_height=crop_height,
crop_width=crop_width,
height=height,
width=width,
)
)
x1, y1, x2, y2 = get_center_crop_coords(height, width, crop_height, crop_width)
img = img[y1:y2, x1:x2]
return img
def get_random_crop_coords(height, width, crop_height, crop_width, h_start, w_start):
y1 = int((height - crop_height) * h_start)
y2 = y1 + crop_height
x1 = int((width - crop_width) * w_start)
x2 = x1 + crop_width
return x1, y1, x2, y2
def random_crop(img, crop_height, crop_width, h_start, w_start):
height, width = img.shape[:2]
if height < crop_height or width < crop_width:
raise ValueError(
'Requested crop size ({crop_height}, {crop_width}) is '
'larger than the image size ({height}, {width})'.format(
crop_height=crop_height,
crop_width=crop_width,
height=height,
width=width,
)
)
x1, y1, x2, y2 = get_random_crop_coords(height, width, crop_height, crop_width, h_start, w_start)
img = img[y1:y2, x1:x2]
return img
def clamping_crop(img, x_min, y_min, x_max, y_max):
h, w = img.shape[:2]
if x_min < 0:
x_min = 0
if y_min < 0:
y_min = 0
if y_max >= h:
y_max = h - 1
if x_max >= w:
x_max = w - 1
return img[int(y_min):int(y_max), int(x_min):int(x_max)]
def shift_hsv(img, hue_shift, sat_shift, val_shift):
dtype = img.dtype
img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
if dtype == np.uint8:
img = img.astype(np.int32)
hue, sat, val = cv2.split(img)
hue = cv2.add(hue, hue_shift)
hue = np.where(hue < 0, hue + 180, hue)
hue = np.where(hue > 180, hue - 180, hue)
hue = hue.astype(dtype)
sat = clip(cv2.add(sat, sat_shift), dtype, 255 if dtype == np.uint8 else 1.0)
val = clip(cv2.add(val, val_shift), dtype, 255 if dtype == np.uint8 else 1.0)
img = cv2.merge((hue, sat, val)).astype(dtype)
img = cv2.cvtColor(img, cv2.COLOR_HSV2RGB)
return img
@clipped
def shift_rgb(img, r_shift, g_shift, b_shift):
if img.dtype == np.uint8:
img = img.astype('int32')
r_shift, g_shift, b_shift = np.int32(r_shift), np.int32(g_shift), np.int32(b_shift)
else:
# Make a copy of the input image since we don't want to modify it directly
img = img.copy()
img[..., 0] += r_shift
img[..., 1] += g_shift
img[..., 2] += b_shift
return img
def clahe(img, clip_limit=2.0, tile_grid_size=(8, 8)):
if img.dtype != np.uint8:
raise TypeError('clahe supports only uint8 inputs')
img = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=tile_grid_size)
img[:, :, 0] = clahe.apply(img[:, :, 0])
img = cv2.cvtColor(img, cv2.COLOR_LAB2RGB)
return img
@preserve_channel_dim
def pad(img, min_height, min_width, border_mode=cv2.BORDER_REFLECT_101, value=[0, 0, 0]):
height, width = img.shape[:2]
if height < min_height:
h_pad_top = int((min_height - height) / 2.0)
h_pad_bottom = min_height - height - h_pad_top
else:
h_pad_top = 0
h_pad_bottom = 0
if width < min_width:
w_pad_left = int((min_width - width) / 2.0)
w_pad_right = min_width - width - w_pad_left
else:
w_pad_left = 0
w_pad_right = 0
if border_mode == cv2.BORDER_CONSTANT:
img = cv2.copyMakeBorder(img, h_pad_top, h_pad_bottom, w_pad_left,
w_pad_right, border_mode, value=value)
else:
img = cv2.copyMakeBorder(img, h_pad_top, h_pad_bottom, w_pad_left, w_pad_right, border_mode)
assert img.shape[0] == max(min_height, height)
assert img.shape[1] == max(min_width, width)
return img
@preserve_shape
def blur(img, ksize):
return cv2.blur(img, (ksize, ksize))
def _func_max_size(img, max_size, interpolation, func):
height, width = img.shape[:2]
scale = max_size / float(func(width, height))
if scale != 1.0:
out_size = tuple(int(dim * scale) for dim in (width, height))
img = cv2.resize(img, out_size, interpolation=interpolation)
return img
@preserve_channel_dim
def longest_max_size(img, max_size, interpolation):
return _func_max_size(img, max_size, interpolation, max)
@preserve_channel_dim
def smallest_max_size(img, max_size, interpolation):
return _func_max_size(img, max_size, interpolation, min)
@preserve_shape
def median_blur(img, ksize):
if img.dtype == np.float32 and ksize not in {3, 5}:
raise ValueError(
'Invalid ksize value {}. For a float32 image the only valid ksize values are 3 and 5'.format(ksize))
return cv2.medianBlur(img, ksize)
@preserve_shape
def motion_blur(img, ksize):
assert ksize > 2
kernel = np.zeros((ksize, ksize), dtype=np.uint8)
xs, xe = random.randint(0, ksize - 1), random.randint(0, ksize - 1)
if xs == xe:
ys, ye = random.sample(range(ksize), 2)
else:
ys, ye = random.randint(0, ksize - 1), random.randint(0, ksize - 1)
cv2.line(kernel, (xs, ys), (xe, ye), 1, thickness=1)
return cv2.filter2D(img, -1, kernel / np.sum(kernel))
@preserve_shape
def jpeg_compression(img, quality):
input_dtype = img.dtype
needs_float = False
if input_dtype == np.float32:
warn('Jpeg compression augmentation '
'is most effective with uint8 inputs, '
'{} is used as input.'.format(input_dtype),
UserWarning)
img = from_float(img, dtype=np.dtype('uint8'))
needs_float = True
elif input_dtype not in (np.uint8, np.float32):
raise ValueError('Unexpected dtype {} for Jpeg augmentation'.format(input_dtype))
_, encoded_img = cv2.imencode('.jpg', img, (cv2.IMWRITE_JPEG_QUALITY, quality))
img = cv2.imdecode(encoded_img, cv2.IMREAD_UNCHANGED)
if needs_float:
img = to_float(img, max_value=255)
return img
[docs]@preserve_shape
def optical_distortion(img, k=0, dx=0, dy=0, interpolation=cv2.INTER_LINEAR, border_mode=cv2.BORDER_REFLECT_101):
"""Barrel / pincushion distortion. Unconventional augment.
Reference:
| https://stackoverflow.com/questions/6199636/formulas-for-barrel-pincushion-distortion
| https://stackoverflow.com/questions/10364201/image-transformation-in-opencv
| https://stackoverflow.com/questions/2477774/correcting-fisheye-distortion-programmatically
| http://www.coldvision.io/2017/03/02/advanced-lane-finding-using-opencv/
"""
height, width = img.shape[:2]
fx = width
fy = width
cx = width * 0.5 + dx
cy = height * 0.5 + dy
camera_matrix = np.array([[fx, 0, cx],
[0, fy, cy],
[0, 0, 1]], dtype=np.float32)
distortion = np.array([k, k, 0, 0, 0], dtype=np.float32)
map1, map2 = cv2.initUndistortRectifyMap(camera_matrix, distortion, None, None, (width, height), cv2.CV_32FC1)
img = cv2.remap(img, map1, map2, interpolation=interpolation, borderMode=border_mode)
return img
[docs]@preserve_shape
def grid_distortion(img, num_steps=10, xsteps=[], ysteps=[], interpolation=cv2.INTER_LINEAR,
border_mode=cv2.BORDER_REFLECT_101):
"""
Reference:
http://pythology.blogspot.sg/2014/03/interpolation-on-regular-distorted-grid.html
"""
height, width = img.shape[:2]
x_step = width // num_steps
xx = np.zeros(width, np.float32)
prev = 0
for idx, x in enumerate(range(0, width, x_step)):
start = x
end = x + x_step
if end > width:
end = width
cur = width
else:
cur = prev + x_step * xsteps[idx]
xx[start:end] = np.linspace(prev, cur, end - start)
prev = cur
y_step = height // num_steps
yy = np.zeros(height, np.float32)
prev = 0
for idx, y in enumerate(range(0, height, y_step)):
start = y
end = y + y_step
if end > height:
end = height
cur = height
else:
cur = prev + y_step * ysteps[idx]
yy[start:end] = np.linspace(prev, cur, end - start)
prev = cur
map_x, map_y = np.meshgrid(xx, yy)
map_x = map_x.astype(np.float32)
map_y = map_y.astype(np.float32)
img = cv2.remap(img, map_x, map_y, interpolation=interpolation, borderMode=border_mode)
return img
def invert(img):
return 255 - img
def channel_shuffle(img):
ch_arr = [0, 1, 2]
random.shuffle(ch_arr)
img = img[..., ch_arr]
return img
@preserve_shape
def gamma_transform(img, gamma):
if img.dtype == np.uint8:
invGamma = 1.0 / gamma
table = np.array([((i / 255.0) ** invGamma) * 255 for i in np.arange(0, 256)]).astype("uint8")
img = cv2.LUT(img, table)
else:
img = np.power(img, gamma)
return img
@clipped
def gauss_noise(image, var):
mean = var
sigma = var ** 0.5
random_state = np.random.RandomState(random.randint(0, 2 ** 32 - 1))
gauss = random_state.normal(mean, sigma, image.shape)
gauss = (gauss - np.min(gauss)).astype(np.uint8)
return image.astype(np.int32) + gauss
@clipped
def brightness_contrast_adjust(img, alpha=1, beta=0):
img = img.astype('float32') * alpha + beta * np.mean(img)
return img
def to_gray(img):
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
if np.mean(gray) > 127:
gray = 255 - gray
return cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
def to_float(img, max_value=None):
if max_value is None:
try:
max_value = MAX_VALUES_BY_DTYPE[img.dtype]
except KeyError:
raise RuntimeError(
'Can\'t infer the maximum value for dtype {}. You need to specify the maximum value manually by '
'passing the max_value argument'.format(img.dtype)
)
return img.astype('float32') / max_value
def from_float(img, dtype, max_value=None):
if max_value is None:
try:
max_value = MAX_VALUES_BY_DTYPE[dtype]
except KeyError:
raise RuntimeError(
'Can\'t infer the maximum value for dtype {}. You need to specify the maximum value manually by '
'passing the max_value argument'.format(dtype)
)
return (img * max_value).astype(dtype)
def bbox_shift_scale_rotate(bbox, angle, scale, dx, dy, interpolation, rows, cols, **params):
center = (0.5, 0.5)
matrix = cv2.getRotationMatrix2D(center, angle, scale)
matrix[0, 2] += dx
matrix[1, 2] += dy
x = np.array([bbox[0], bbox[2], bbox[2], bbox[0]])
y = np.array([bbox[1], bbox[1], bbox[3], bbox[3]])
ones = np.ones(shape=(len(x)))
points_ones = np.vstack([x, y, ones]).transpose()
tr_points = matrix.dot(points_ones.T).T
return [min(tr_points[:, 0]), min(tr_points[:, 1]), max(tr_points[:, 0]), max(tr_points[:, 1])]
[docs]def bbox_vflip(bbox, rows, cols):
"""Flip a bounding box vertically around the x-axis."""
x_min, y_min, x_max, y_max = bbox
return [x_min, 1 - y_max, x_max, 1 - y_min]
[docs]def bbox_hflip(bbox, rows, cols):
"""Flip a bounding box horizontally around the y-axis."""
x_min, y_min, x_max, y_max = bbox
return [1 - x_max, y_min, 1 - x_min, y_max]
[docs]def bbox_flip(bbox, d, rows, cols):
"""Flip a bounding box either vertically, horizontally or both depending on the value of `d`.
Raises:
ValueError: if value of `d` is not -1, 0 or 1.
"""
if d == 0:
bbox = bbox_vflip(bbox, rows, cols)
elif d == 1:
bbox = bbox_hflip(bbox, rows, cols)
elif d == -1:
bbox = bbox_hflip(bbox, rows, cols)
bbox = bbox_vflip(bbox, rows, cols)
else:
raise ValueError('Invalid d value {}. Valid values are -1, 0 and 1'.format(d))
return bbox
[docs]def crop_bbox_by_coords(bbox, crop_coords, crop_height, crop_width, rows, cols):
"""Crop a bounding box using the provided coordinates of bottom-left and top-right corners in pixels and the
required height and width of the crop.
"""
bbox = denormalize_bbox(bbox, rows, cols)
x_min, y_min, x_max, y_max = bbox
x1, y1, x2, y2 = crop_coords
cropped_bbox = [x_min - x1, y_min - y1, x_max - x1, y_max - y1]
return normalize_bbox(cropped_bbox, crop_height, crop_width)
def bbox_crop(bbox, x_min, y_min, x_max, y_max, rows, cols):
crop_coords = [x_min, y_min, x_max, y_max]
crop_height = y_max - y_min
crop_width = x_max - x_min
return crop_bbox_by_coords(bbox, crop_coords, crop_height, crop_width, rows, cols)
def bbox_center_crop(bbox, crop_height, crop_width, rows, cols):
crop_coords = get_center_crop_coords(rows, cols, crop_height, crop_width)
return crop_bbox_by_coords(bbox, crop_coords, crop_height, crop_width, rows, cols)
def bbox_random_crop(bbox, crop_height, crop_width, h_start, w_start, rows, cols):
crop_coords = get_random_crop_coords(rows, cols, crop_height, crop_width, h_start, w_start)
return crop_bbox_by_coords(bbox, crop_coords, crop_height, crop_width, rows, cols)
[docs]def bbox_rot90(bbox, factor, rows, cols):
"""Rotates a bounding box by 90 degrees CCW (see np.rot90)
Args:
bbox (tuple): A tuple (x_min, y_min, x_max, y_max).
factor (int): Number of CCW rotations. Must be in range [0;3] See np.rot90.
rows (int): Image rows.
cols (int): Image cols.
"""
if factor < 0 or factor > 3:
raise ValueError('Parameter n must be in range [0;3]')
x_min, y_min, x_max, y_max = bbox
if factor == 1:
bbox = [y_min, 1 - x_max, y_max, 1 - x_min]
if factor == 2:
bbox = [1 - x_max, 1 - y_max, 1 - x_min, 1 - y_min]
if factor == 3:
bbox = [1 - y_max, x_min, 1 - y_min, x_max]
return bbox
[docs]def bbox_rotate(bbox, angle, rows, cols, interpolation):
"""Rotates a bounding box by angle degrees
Args:
bbox (tuple): A tuple (x_min, y_min, x_max, y_max).
angle (int): Angle of rotation in degrees
rows (int): Image rows.
cols (int): Image cols.
interpolation (int): interpolation method.
return a tuple (x_min, y_min, x_max, y_max)
"""
x = np.array([bbox[0], bbox[2], bbox[2], bbox[0]])
y = np.array([bbox[1], bbox[1], bbox[3], bbox[3]])
x = x - 0.5
y = y - 0.5
angle = np.deg2rad(angle)
x_t = np.cos(angle) * x + np.sin(angle) * y
y_t = -np.sin(angle) * x + np.cos(angle) * y
x_t = x_t + 0.5
y_t = y_t + 0.5
return [min(x_t), min(y_t), max(x_t), max(y_t)]
[docs]def bbox_transpose(bbox, axis, rows, cols):
"""Transposes a bounding box along given axis.
Args:
bbox (tuple): A tuple (x_min, y_min, x_max, y_max).
axis (int): 0 - main axis, 1 - secondary axis.
rows (int): Image rows.
cols (int): Image cols.
"""
x_min, y_min, x_max, y_max = bbox
if axis != 0 and axis != 1:
raise ValueError('Axis must be either 0 or 1.')
if axis == 0:
bbox = [y_min, x_min, y_max, x_max]
if axis == 1:
bbox = [1 - y_max, 1 - x_max, 1 - y_min, 1 - x_min]
return bbox