python source code of augmentations

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import cv2
import numpy as np
from numba import jit, uint8, float32, int32

from lib.show_images import debugShowBoxes


class AugmentationBase(object):

    def __init__(self, target_width, target_height, apply_prob=1.0, debug=False):
        self.apply_prob = apply_prob
        self._tw = target_width
        self._th = target_height

        self._debug = debug

    def apply(self, image, boxes, meta_image):
        raise NotImplementedError('Inherit and implement')

    def image_resize(self, image, boxes, target_x=None, target_y=None):

        target_x = self._tw if target_x is None else target_x
        target_y = self._th if target_y is None else target_y

        if float(image.shape[0]) / float(image.shape[1]) < target_y / target_x:
            f = float(target_x) / image.shape[1]
            dsize = (target_x, int(image.shape[0] * f))
        else:
            f = float(target_y) / image.shape[0]
            dsize = (int(image.shape[1] * f), target_y)

        image = cv2.resize(image, dsize=dsize)

        scaled_boxes = boxes * np.atleast_2d(np.array([f, f, f, f]))

        if self._debug:
            pass

        return image, scaled_boxes

    def border_replicate(self, image, boxes, border_type=cv2.BORDER_REPLICATE):
        """
        :param border_type: cv2 borderType 
        :return: image of target_size
        """
        target_x = self._tw
        target_y = self._th
        resized_image = cv2.copyMakeBorder(image,
                                           top=0,
                                           left=0,
                                           right=target_x - image.shape[1],
                                           bottom=target_y - image.shape[0],
                                           borderType=border_type)

        return resized_image, boxes



class PartilPage(AugmentationBase):
    """ 
    Crop part of the image and only keep boxes that are fully inside the cropped region
    """

    def apply(self, image, boxes, meta_image):
        if np.random.rand() > self.apply_prob:
            return image, boxes, meta_image
        idx = np.array([])
        count = 0
        # try five times
        while not idx.size and count < 5:
            count += 1
            idx, z = self.box_filter(boxes, image.shape, (2*self._th, 2*self._tw))
            idx = np.array(idx)
        # if failed just skip the cropping
        if idx.size < 5 or max(z) < 1:
            return image, boxes, meta_image
        random_img = image[z[1]:z[3], z[0]:z[2], :]
        new_boxes = boxes[idx, :]
        new_boxes = adjust_boxes(new_boxes, (z[0], z[1]))
        meta_image._old_boxes = boxes
        meta_image.bboxes = new_boxes
        if len(meta_image.words) > 0:
            meta_image._old_words = meta_image.words
            meta_image.words = [meta_image.words[ind] for ind in idx]
        return random_img, new_boxes, meta_image

    @staticmethod
    def sq_size(limit, size):
        dy = int(size[0] / 2)
        dx = int(size[1] / 2)
        y = np.random.randint(dy, max(limit[0] - dy, dy+1))
        x = np.random.randint(dx, max(limit[1] - dx, dx+1))
        return x - dx, y - dy, x + dx, y + dy

    @staticmethod
    def box_filter(boxes, limits, size, border=20):
        z = PartilPage.sq_size(limits, size)
        # Pick boxes that fall inside new image boundaries
        good_idx = np.where((boxes[:, 0] > z[0]) & (boxes[:, 1] > z[1]) & (boxes[:, 2] < z[2]) & (boxes[:, 3] < z[3]))[0]
        # If boundaries are empty...
        if good_idx.shape[0] < 1:
            return [], (0, 0, 0, 0)
        good_boxes = boxes[good_idx, :]
        limits_of_good_boxes = np.concatenate((good_boxes[:, :2].min(0), good_boxes[:, 2:].max(0)))

        new_z = np.array([max(limits_of_good_boxes[0] - border, 0), max(limits_of_good_boxes[1] - border, 0),
                          min(limits_of_good_boxes[2] + border, limits[1]), min(limits_of_good_boxes[3] + border, limits[0])])\
            .astype(np.int32)
        return good_idx, new_z

@jit
def adjust_boxes(boxes, start_point):
    new_boxes = boxes - np.array(start_point*2)[np.newaxis, :]
    return new_boxes


class Resize(AugmentationBase):
    """ 
    Plain resize - keeps aspect ration and uses border replication to fit target image size.
    If image size is same as target size this augmentation will produce identity
    """

    def apply(self, image, boxes, meta_image):
        new_image, new_boxes = self.image_resize(image, boxes)
        final_image, final_boxes = self.border_replicate(new_image, new_boxes)
        return final_image, final_boxes, meta_image


class Slant(AugmentationBase):
    def __init__(self, apply_prob=0.3, slant_prob=0.5, **kwargs):
        super(Slant, self).__init__(**kwargs)
        self.slant_prob = slant_prob
        self._apply_prob = apply_prob

    def apply(self, image, boxes, meta_image):
        if np.random.rand() < self._apply_prob:
            aug_img = self.augment_boxes(image, boxes.astype(np.int32), self.slant_prob)
            return aug_img, boxes, meta_image
        return image, boxes, meta_image

    @staticmethod
    # @jit(uint8(uint8, int32, float32), nogil=True)
    def augment_boxes(image, boxes, prob):
        for b in boxes:
            img = image[b[1]:b[3], b[0]:b[2], :]
            p = np.random.rand()
            # if p > prob:
            augmented = Slant.img_aug(img[:, :, 0])
            augmented = expand(augmented)
            # else:
            #     augmented = img
            image[b[1]:b[3], b[0]:b[2]] = augmented
        return image

    @staticmethod
    # @jit(uint8(uint8), nogil=True)
    def img_aug(img):
        h, w = img.shape
        m = cv2.moments(img)
        if abs(m['mu02']) < 1e-2:
            return img
        skew = m['mu11'] / m['mu02']
        M = np.float32([[1, skew, -0.5 * w * skew], [0, 1, 0]])
        img = cv2.warpAffine(img, M, (w, h), flags=cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR)
        return img


class DialationErosio(AugmentationBase):
    """
    Dialate or erode word images
    """
    def __init__(self, dialation_prob=0.5, skip_gray_prob=0.0, apply_prob=0.3, **kwargs):
        super(DialationErosio, self).__init__(**kwargs)
        self.skip_gray_prob = skip_gray_prob
        self.apply_prob = apply_prob
        self.dialation_prob = dialation_prob

    def apply(self, image, boxes, meta_image):
        if np.random.rand() < self.skip_gray_prob:
            return image, boxes, meta_image
        # tic = time.time()
        gray_scale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        # toc1 = time.time() - tic
        # print ('Gray Scale: %4.3f' % toc1)
        if np.random.rand() > self.apply_prob:
            gray_scale = expand(gray_scale)
            return gray_scale, boxes, meta_image
        kernel = np.ones((5, 5), np.uint8)
        gray_scale = augment_boxes(gray_scale, boxes.astype(np.int32), kernel, self.dialation_prob)
        # toc2 = time.time() - tic - toc1
        # print('Agument: %4.3f' % toc2)
        # gray_scale = np.repeat(gray_scale[:,:, np.newaxis], 3, axis=2)
        gray_scale = expand(gray_scale)
        q = 0.4 + 0.2*np.random.rand()
        gray_scale = np.array(q*gray_scale + (1-q)*image, dtype=np.uint8)
        # print('expand: %4.3f' % toc3)
        return gray_scale, boxes, meta_image

@jit(uint8(uint8, int32, uint8, float32), nogil=True)
def augment_boxes(gray_scale, boxes, kernel, prob):
    for b in boxes:
        img = gray_scale[b[1]:b[3], b[0]:b[2]]
        p = np.random.rand()
        if p < prob and img.shape[0] > 1 and img.shape[1] > 1:
        # if img.shape[0] > 1 and img.shape[1] > 1:
            augmented = cv2.dilate(img, kernel=kernel, iterations=1)
        else:
            augmented = img
        gray_scale[b[1]:b[3], b[0]:b[2]] = augmented
    return gray_scale

@jit(uint8(uint8), nogil=True)
def expand(gray_scale):
    gray_scale = np.repeat(gray_scale[:, :, np.newaxis], 3, axis=2)
    return gray_scale


class KeepWordsOnly(AugmentationBase):
    """
    Embed a small version of the image inside a black box of target size
    This should be helpful for learning to identify small scripts
    """

    def __init__(self, target_width, target_height, debug=False, **kwargs):
        super(KeepWordsOnly, self).__init__(target_width, target_height, debug)
        self._low_bound, self._high_bound = kwargs.get('ratio_bounds', (0.2, 0.7))
        self._prob = kwargs.get('prob', 1.)

    def apply(self, image, boxes, meta_image):
        mins = boxes.min(0).astype(np.int32) - 5
        maxs = boxes.max(0).astype(np.int32) + 5

        new_image = image[mins[1]:maxs[3], mins[0]:maxs[2], :]
        if new_image.shape[0] < 10 or new_image.shape[1] < 10:
            black_image = np.zeros(shape=image.shape, dtype=image.dtype)
            black_image[mins[1]:maxs[3], mins[0]:maxs[2], :] = image[mins[1]:maxs[3], mins[0]:maxs[2], :]
            return black_image, boxes
        new_boxes = boxes - np.array([mins[0], mins[1], mins[0], mins[1]])
        image = None
        return new_image, new_boxes, meta_image


class ImageEmbed(AugmentationBase):
    """
    Embed a small version of the image inside a black box of target size
    This should be helpful for learning to identify small scripts
    """

    def __init__(self, target_width, target_height, debug=False, **kwargs):
        super(ImageEmbed, self).__init__(target_width, target_height, debug)
        self._low_bound, self._high_bound = kwargs.get('ratio_bounds', (0.2, 0.7))
        self._prob = kwargs.get('prob', 1.)

    def apply(self, image, boxes, meta_image):
        if np.random.rand() >= self._prob:
            return image, boxes, meta_image
        black_image = np.zeros(shape=(self._th, self._tw, 3), dtype=image.dtype)
        tw, th, x0, y0 = self.pick_random_size()
        scale_image, scaled_boxes = self.image_resize(image, boxes, target_x=tw, target_y=th)
        black_image[y0:(y0 + scale_image.shape[0]), x0:(x0 + scale_image.shape[1]), :] = scale_image
        translated_boxes = scaled_boxes + [x0, y0, x0, y0]
        return black_image, translated_boxes, meta_image

    def pick_random_size(self):
        ratio = self._low_bound + (self._high_bound - self._low_bound) * np.random.rand()
        tw = np.int32(ratio * self._tw)
        th = np.int32(ratio * self._th)

        x0 = np.random.randint(0, self._tw)
        y0 = np.random.randint(0, self._th)
        while not (x0 + tw < self._tw and y0 + th < self._th):
            x0 = np.random.randint(0, self._tw)
            y0 = np.random.randint(0, self._th)

        return tw, th, x0, y0

class GaussianNoise(AugmentationBase):

    def __init__(self, target_width, target_height, debug=False, **kwargs):
        super(GaussianNoise, self).__init__(target_width, target_height, debug)
        # Noise apply probability
        self._prob = kwargs.get('prob', 0.5)

    def apply(self, image, boxes, meta_image):
        # Noise apply probability
        if np.random.rand() >= self._prob:
            return image, boxes, meta_image
        new_img = image * np.random.normal(np.ones(image.shape[:-1] + (1, )), 0.15, image.shape[:-1] + (1, ))
        return new_img, boxes, meta_image


class BoxRearange(AugmentationBase):

    @staticmethod
    def box_mover(box, start_point=(0, 0)):
        box = np.array(box)
        box[2] -= box[0] - start_point[0]
        box[0] -= box[0] - start_point[0]
        box[3] -= box[1] - start_point[1]
        box[1] -= box[1] - start_point[1]
        return box

    def apply(self, image, boxes, meta_image):
        new_format = WordAranger(image, boxes, meta_image)
        return new_format.get_new_page()


def split(box):
    if np.random.rand() < 0.5:
        s = np.random.randint(box[0], box[2])
        box1 = [box[0], box[1], s, box[3]]
        box2 = [s, box[1], box[2], box[3]]
    else:
        s = np.random.randint(box[1], box[3])
        box1 = [box[0], box[1], box[2], s]
        box2 = [box[0], s, box[2], box[3]]
    return box1, box2


class ImageSplitter(object):
    def __init__(self, image_shape, max_h, max_w, max_splits=1):
        self.max_w = max_w
        self.max_h = max_h
        self.max_splits = max_splits
        self.h = image_shape[0]
        self.w = image_shape[1]
        self._boxes = [(0, 0, self.w, self.h)]
        self._splits = 0

    def get_next_box(self):
        box = self._boxes.pop(0)
        if self._splits < self.max_splits and (box[2] - box[0] > 2*self.max_w and box[3] - box[1] > 2*self.max_h):
            self._splits += 1
            boxes = self.split(box)
            for b in boxes:
                self._boxes.append(b)
            return self.get_next_box()
        init_x = box[0]
        init_y = box[1]
        final_x = box[2]
        final_y = box[3]
        line_arrays = init_y + np.cumsum(np.random.randint(self.max_h, self.max_h + 10, int((final_y - init_y) / self.max_h) - 1))
        return init_x, final_x, init_y, final_y, line_arrays

    def split(self, box):
        s = 0
        return_boxes = []
        indicator = (np.random.rand() < 0.5) #and box[2] - box[0] > 2*self.max_w) or (box[2] - box[0] > 2*self.max_w and box[3] - box[1] < 2*self.max_h)
        if indicator:
            while len(return_boxes) < 1:
                s = np.random.randint(box[0] + self.max_w, box[2] - self.max_w)
                box1 = (box[0], box[1], s, box[3])
                box2 = (s, box[1], box[2], box[3])
                if box1[2] - box1[0] > self.max_w:
                    return_boxes.append(box1)
                if box2[2] - box2[0] > self.max_w:
                    return_boxes.append(box2)
        else:
            while s - box[1] < self.max_h and box[3] - s < self.max_h:
                s = np.random.randint(box[1] + self.max_h, box[3] - self.max_h)
                box1 = (box[0], box[1], box[2], s)
                box2 = (box[0], s, box[2], box[3])
                if box1[3] - box1[1] > self.max_h:
                    return_boxes.append(box1)
                if box2[3] - box2[1] > self.max_h:
                    return_boxes.append(box2)

        return return_boxes


class WordAranger(object):

    def __init__(self, image, boxes, meta_image, fill_meta_images=None):
        self.fill_meta_images = fill_meta_images
        self.meta_image = meta_image
        self.boxes = np.array(boxes).tolist()
        self._permutation = np.random.permutation(range(len(self.boxes))).tolist()
        self.image = image
        self._new_word_list = []
        self._new_bboxes = []

        self.w = image.shape[1]
        self.h = image.shape[0]
        self.empty = self.image is None

        if self.empty:
            dh = np.random.randint(1200, 4500)
            self.image = np.ones((dh, int(dh/1.33), 3))*128.
        self.canvas = self._get_canvas()

    def get_new_page(self):
        self.fill_page()
        self.meta_image.words = self._new_word_list
        bboxes = np.array(self._new_bboxes)
        self.meta_image.bboxes = bboxes
        return self.canvas.astype(np.uint8), bboxes, self.meta_image

    def fill_page(self):
        zero_point_x = np.random.randint(5, 100)
        zero_point_y = np.random.randint(5, 100)
        end = [self.w, zero_point_y]

        abs_max_h = 0

        while end[1] < self.h - abs_max_h:
            strt = [zero_point_x, end[1]]
            line_gap = np.random.randint(5, 50)
            max_h = self.fill_line(strt, end)
            if max_h == strt[1]:
                break
            abs_max_h = max(max_h - end[1], abs_max_h)
            end[1] = max_h + line_gap
            strt[1] = max_h + line_gap

    def _get_canvas(self):
        img = self.image
        dx = np.mean(img, axis=(0, 1))[np.newaxis, np.newaxis, :]
        dy = np.ones(img.shape, dtype=np.uint8) * 227
        dy[:, :, :] = dx
        dy = dy + 0.02 * dy * np.random.randn(*dy.shape[:-1])[:, :, np.newaxis]
        return dy

    def fill_canvas(self, new_box, old_box):
        img = self.image
        dy = self.canvas
        new_box = np.array(new_box, dtype=np.int32)
        old_box = np.array(old_box, dtype=np.int32)
        gamma = np.random.beta(1.1, 0.1)
        dy[new_box[1]:new_box[3], new_box[0]:new_box[2], :] = (gamma)*img[old_box[1]:old_box[3], old_box[0]:old_box[2], :] + \
                                                              (1-gamma)*dy[new_box[1]:new_box[3], new_box[0]:new_box[2], :]

    def fill_line(self, strt, end):
        gappines = np.random.randint(20, 150)
        box, word_dict = self.get_next_word()
        max_h = strt[1]

        if box is None or word_dict is None:
            return end[1]

        while strt[0] + box[2] - box[0] < end[0]:
            new_box = WordAranger.box_mover(box, strt)
            gap = np.random.randint(5, gappines)
            strt[0] += (new_box[2] - new_box[0] + gap)
            max_h = max(new_box[3], max_h)

            word_dict['box'] = new_box.tolist()
            self._new_word_list.append(word_dict)
            self._new_bboxes.append(new_box)

            self.fill_canvas(new_box, box)

            box, word_dict = self.get_next_word()
            if box is None and word_dict is None:
                break

        if box is not None and word_dict is not None:
            self.boxes.append(box)
            self.meta_image.words.append(word_dict)
        return max_h

    def get_next_word(self):
        if len(self._permutation):
            idx = self._permutation.pop()
            box = self.boxes[idx]
            word_dict = self.meta_image.words[idx]
            return box, word_dict
        return None, None

    @staticmethod
    def box_mover(box, start_point=(0, 0)):
        box = np.array(box)
        box[2] -= box[0] - start_point[0]
        box[0] -= box[0] - start_point[0]
        box[3] -= box[1] - start_point[1]
        box[1] -= box[1] - start_point[1]
        return box



# @jit
def reorder_boxes(image, boxes):
    dx = np.mean(image, axis=(0, 1))[np.newaxis, np.newaxis, :]
    dy = np.ones(image.shape, dtype=np.uint8) * 227
    dy[:, :, :] = dx
    dy = dy + 0.02 * dy * np.random.randn(*dy.shape[:-1])[:, :, np.newaxis]
    mw = np.diff(boxes[:, ::2]).max()
    mh = np.diff(boxes[:, 1::2]).max()
    splitter = ImageSplitter(image.shape, mh, mw, max_splits=1)
    init_x, final_x, init_y, final_y, line_arrays = splitter.get_next_box()
    next_x = init_x + 5
    j = 0
    gamma = 0.95
    new_boxes = []
    for n, b in enumerate(boxes):
        box_img = image[b[1]:b[3], b[0]:b[2], :].astype(np.uint8)
        line_limit = next_x + (b[2] - b[0])
        while line_limit > final_x or final_y < line_arrays[j] + (b[3] - b[1]):
            j = j + 1
            if j > len(line_arrays) - 1:
                init_x, final_x, init_y, final_y, line_arrays = splitter.get_next_box()
                j = 0
            next_x = init_x + 5
            line_limit = next_x + (b[2] - b[0])

        new_box = BoxRearange.box_mover(b, (next_x, line_arrays[j] + rnd_shift()))
        new_boxes.append(new_box)
        dy[new_box[1]:new_box[3], new_box[0]:new_box[2], :] = gamma * box_img + (1 - gamma) * dy[new_box[1]:new_box[3], new_box[0]:new_box[2], :]
        next_x = new_box[2] + max(rnd_shift(), 0)

    return dy, np.array(new_boxes)


def rnd_shift(phi=5):
    return np.random.randint(-phi, phi)


def test_augmentations():
    import data
    from data.simple_pipe import PipelineBase
    from data.data_extenders import phoc_embedding, from_image_to_heatmap, regression_bbox_targets, tf_boxes

    iamdb = data.IamDataset('datasets/iamdb')
    iamdb.run()
    it = iamdb.get_iterator(infinite=True)

    pipeline = PipelineBase(it, batch_size=1, target_x=900, target_y=1200)
    pipeline.add_augmentation(BoxRearange)
    pipeline.add_augmentation(DialationErosio, apply_prob=0.2)
    pipeline.add_augmentation(Slant, apply_prob=0.2)
    pipeline.add_augmentation(Resize)

    # Heatmap
    pipeline.add_extender('heatmap', from_image_to_heatmap, in_batch='vstack', trim=0.2)
    # Regression
    pipeline.add_extender(('reg_target', 'reg_flags'), regression_bbox_targets, in_batch='vstack', fmap_w=112,
                          fmap_h=150)
    # TF Boxes
    pipeline.add_extender(('tf_gt_boxes',), tf_boxes, in_batch='hstack')
    # Phoc Extenders
    pipeline.add_extender(('phocs', 'tf_gt_boxes'), phoc_embedding, in_batch='hstack')
    pipeline.run(num_producers=1)

    for i in range(100):
        batch = pipeline.pull_data()

        img, boxes = batch['image'][0, :], batch['gt_boxes']
        debugShowBoxes(img / 255. / 255., boxes=boxes[:, 1:], wait=300)

if __name__ == '__main__':
    test_augmentations()