python source code of ilsvrc2017

"""ILSVRC 2017 Classicifation Dataset.
DEPRECATED version. For the purpose of keeping history only.
Use ilsvrc2017_cls_multithread.py instead.
"""

import os
import cv2
import numpy as np
import random

import config as cfg


class ilsvrc_cls:

    def __init__(self, image_set, rebuild=False, data_aug=True):
        self.name = 'ilsvrc_2017'
        self.devkit_path = cfg.ILSVRC_PATH
        self.data_path = self.devkit_path
        self.cache_path = cfg.CACHE_PATH
        self.batch_size = cfg.BATCH_SIZE
        self.image_size = cfg.IMAGE_SIZE
        self.image_set = image_set
        self.rebuild = rebuild
        self.data_aug = data_aug
        self.cursor = 0
        self.load_classes()
        # self.gt_labels = None
        assert os.path.exists(self.devkit_path), \
            'VOCdevkit path does not exist: {}'.format(self.devkit_path)
        assert os.path.exists(self.data_path), \
            'Path does not exist: {}'.format(self.data_path)
        self.prepare()

    def prepare(self):
        """Create a list of ground truth that includes input path and label.
        """

        if (self.image_set == "train"):
            imgset_fname = "train_cls.txt"
        else:
            imgset_fname = self.image_set + ".txt"
        imgset_file = os.path.join(
            self.data_path, 'ImageSets', 'CLS-LOC', imgset_fname)
        print('Processing gt_labels using ' + imgset_file)
        gt_labels = []
        with open(imgset_file, 'r') as f:
            for line in f.readlines():
                img_path = line.strip().split()[0]
                label = self.class_to_ind[img_path.split("/")[0]]
                imname = os.path.join(
                    self.data_path, 'Data', 'CLS-LOC', self.image_set, img_path + ".JPEG")
                gt_labels.append(
                    {'imname': imname, 'label': label})
        random.shuffle(gt_labels)
        self.gt_labels = gt_labels

    def load_classes(self):
        """Use the folder name to get labels."""
        if (self.image_set == "train"):
            img_folder = os.path.join(
                self.data_path, 'Data', 'CLS-LOC', 'train')
            print('Loading class info from ' + img_folder)
            self.classes = [item for item in os.listdir(img_folder)
                            if os.path.isdir(os.path.join(img_folder, item))]
            self.num_class = len(self.classes)
            assert (self.num_class == 1000), "number of classes is not 1000!"
            self.class_to_ind = dict(
                list(zip(self.classes, list(range(self.num_class)))))

    def get(self):
        """Get shuffled images and labels according to batchsize.

        Return: 
            images: 4D numpy array
            labels: 1D numpy array
        """
        images = np.zeros(
            (self.batch_size, self.image_size, self.image_size, 3))
        labels = np.zeros(self.batch_size)
        count = 0
        while count < self.batch_size:
            imname = self.gt_labels[self.cursor]['imname']
            images[count, :, :, :] = self.image_read(imname, data_aug=self.data_aug)
            labels[count] = self.gt_labels[self.cursor]['label']
            count += 1
            self.cursor += 1
            if self.cursor >= len(self.gt_labels):
                random.shuffle(self.gt_labels)
                self.cursor = 0
        return images, labels

    def image_read(self, imname, data_aug=False):
        image = cv2.imread(imname)

        #####################
        # Data Augmentation #
        #####################
        if data_aug:
            flip = bool(random.getrandbits(1))
            rotate_deg = random.randint(0, 359)
            # 75% chance to do random crop
            # another 25% change in maintaining input at 224x224
            # this help simplify the input processing for test, val
            # TODO: can make multiscale test input later
            random_crop_chance = random.randint(0, 3)
            too_small = False
            color_pert = bool(random.getrandbits(1))

            if flip:
                image = image[:, ::-1, :]
            # assume color image
            rows, cols, _ = image.shape
            M = cv2.getRotationMatrix2D((cols / 2, rows / 2), rotate_deg, 1)
            image = cv2.warpAffine(image, M, (cols, rows))

            # color perturbation
            if color_pert:
                hue_shift_sign = bool(random.getrandbits(1))
                hue_shift = random.randint(0, 10)
                saturation_shift_sign = bool(random.getrandbits(1))
                saturation_shift = random.randint(0, 10)
                hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
                # TODO: currently not sure what cv2 does to values 
                # that are larger than the maximum.
                # It seems it does not cut at the max 
                # nor normalize the whole by multiplying a factor.
                # need to expore this in more detail
                if hue_shift_sign:
                    hsv[:, :, 0] += hue_shift
                else:
                    hsv[:, :, 0] -= hue_shift
                if saturation_shift_sign:
                    hsv[:, :, 1] += saturation_shift
                else:
                    hsv[:, :, 1] -= saturation_shift
                image = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

            # random crop
            if random_crop_chance > 0:
                # current random crop upbound is 292 (1.3 x 224)
                short_side_len = random.randint(
                    self.image_size, cfg.RAND_CROP_UPBOUND)
                short_side = min([cols, rows])
                if short_side == cols:
                    scaled_cols = short_side_len
                    factor = float(short_side_len) / cols
                    scaled_rows = int(rows * factor)
                else:
                    scaled_rows = short_side_len
                    factor = float(short_side_len) / rows
                    scaled_cols = int(cols * factor)
                # print "scaled_cols and rows:", scaled_cols, scaled_rows
                if scaled_cols < 224 or scaled_rows < 224:
                    too_small = True
                    print "Image is too small,", imname
                else:
                    image = cv2.resize(image, (scaled_cols, scaled_rows))
                    col_offset = random.randint(0, scaled_cols - self.image_size)
                    row_offset = random.randint(0, scaled_rows - self.image_size)
                    # print "col_offset and row_offset:", col_offset, row_offset
                    image = image[row_offset:self.image_size + row_offset,
                                col_offset:self.image_size + col_offset]
                # assuming still using image size 224x224
                # print "image shape is", image.shape

            if random_crop_chance == 0 or too_small:
                image = cv2.resize(image, (self.image_size, self.image_size))

        else:
            image = cv2.resize(image, (self.image_size, self.image_size))

        image = image.astype(np.float32)
        image = (image / 255.0) * 2.0 - 1.0

        return image