"""ILSVRC 2017 Classicifation Dataset. Use val_split to set the portion for validation set. """ import os import cv2 import math import numpy as np import random import pickle import copy from tqdm import trange, tqdm import config as cfg class tf_flowers: def __init__(self, val_split, rebuild=False, data_aug=False): self.name = 'TF_flowers' self.devkit_path = cfg.FLOWERS_PATH self.data_path = self.devkit_path self.cache_path = cfg.CACHE_PATH self.batch_size = cfg.BATCH_SIZE self.image_size = cfg.IMAGE_SIZE self.rebuild = rebuild self.data_aug = data_aug self.num_class = 5 self.classes = ['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips'] self.class_to_ind = dict( list(zip(self.classes, list(range(self.num_class))))) self.train_cursor = 0 self.val_cursor = 0 self.epoch = 1 self.gt_labels = None self.val_split = val_split assert os.path.exists(self.devkit_path), \ 'TF_flowers path does not exist: {}'.format(self.devkit_path) assert os.path.exists(self.data_path), \ 'Path does not exist: {}'.format(self.data_path) self.prepare() def prepare(self): """Create a list of ground truth that includes input path and label. Then, split the data into training set and validation set according to val_split. """ # TODO: may still need to implement test cache_file = os.path.join( self.cache_path, 'TF_flowers_gt_labels.pkl') if os.path.isfile(cache_file) and not self.rebuild: print('Loading gt_labels from: ' + cache_file) with open(cache_file, 'rb') as f: gt_labels = pickle.load(f) print('{} dataset gt_labels loaded from {}'. format(self.name, cache_file)) else: print('Processing gt_labels using...') gt_labels = [] for c in tqdm(self.classes): label = self.class_to_ind[c] c_data_dir = os.path.join(self.data_path, c) for f in os.listdir(c_data_dir): if f[-4:].lower() == '.jpg': imname = os.path.join(c_data_dir, f) gt_labels.append({'imname': imname, 'label': label}) print('Saving gt_labels to: ' + cache_file) with open(cache_file, 'wb') as f: pickle.dump(gt_labels, f) random.shuffle(gt_labels) self.gt_labels = gt_labels self.dataset_size = len(gt_labels) self.total_batch = int( math.ceil(self.dataset_size / float(self.batch_size))) cut_idx = int(self.dataset_size * self.val_split) self.val_gt_labels = copy.deepcopy(gt_labels[:cut_idx]) self.train_gt_labels = copy.deepcopy(gt_labels[cut_idx:]) print('training set size: {:d}, validation set size: {:d}' .format(len(self.train_gt_labels), len(self.val_gt_labels))) def get_train(self): return self._get('train') def get_val(self): return self._get('val') def _get(self, image_set): """Get shuffled images and labels according to batchsize. Use image_set to set whether to get training set or validation set. validation set data will not have data augmentation. Return: images: 4D numpy array labels: 1D numpy array """ if image_set == 'val': gt_labels = self.val_gt_labels cursor = self.val_cursor data_aug = False elif image_set == 'train': gt_labels = self.train_gt_labels cursor = self.train_cursor data_aug = self.data_aug images = np.zeros( (self.batch_size, self.image_size, self.image_size, 3)) labels = np.zeros(self.batch_size) count = 0 while count < self.batch_size: imname = gt_labels[cursor]['imname'] images[count, :, :, :] = self.image_read( imname, data_aug=data_aug) labels[count] = gt_labels[cursor]['label'] count += 1 cursor += 1 if cursor >= len(gt_labels): random.shuffle(self.train_gt_labels) cursor = 0 if image_set == 'train': self.epoch += 1 if image_set == 'val': self.val_cursor = cursor elif image_set == 'train': self.train_cursor = cursor return images, labels def image_read(self, imname, data_aug=False): image = cv2.imread(imname) ##################### # Data Augmentation # ##################### if data_aug: flip = bool(random.getrandbits(1)) rotate_deg = random.randint(0, 359) # 75% chance to do random crop # another 25% change in maintaining input at 224x224 # this help simplify the input processing for test, val # TODO: can make multiscale test input later random_crop_chance = random.randint(0, 3) too_small = False color_pert = bool(random.getrandbits(1)) exposure_shift = bool(random.getrandbits(1)) if flip: image = image[:, ::-1, :] # assume color image rows, cols, _ = image.shape M = cv2.getRotationMatrix2D((cols / 2, rows / 2), rotate_deg, 1) image = cv2.warpAffine(image, M, (cols, rows)) # color perturbation if color_pert: hue_shift_sign = bool(random.getrandbits(1)) hue_shift = random.randint(0, 10) saturation_shift_sign = bool(random.getrandbits(1)) saturation_shift = random.randint(0, 10) hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) # TODO: currently not sure what cv2 does to values # that are larger than the maximum. # It seems it does not cut at the max # nor normalize the whole by multiplying a factor. # need to expore this in more detail if hue_shift_sign: hsv[:, :, 0] += hue_shift else: hsv[:, :, 0] -= hue_shift if saturation_shift_sign: hsv[:, :, 1] += saturation_shift else: hsv[:, :, 1] -= saturation_shift image = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) if exposure_shift: brighter = bool(random.getrandbits(1)) if brighter: gamma = random.uniform(1, 2) else: gamma = random.uniform(0.5, 1) image = ((image / 255.0) ** (1.0 / gamma)) * 255 # random crop if random_crop_chance > 0: # current random crop upbound is 292 (1.3 x 224) short_side_len = random.randint( self.image_size, cfg.RAND_CROP_UPBOUND) short_side = min([cols, rows]) if short_side == cols: scaled_cols = short_side_len factor = float(short_side_len) / cols scaled_rows = int(rows * factor) else: scaled_rows = short_side_len factor = float(short_side_len) / rows scaled_cols = int(cols * factor) # print "scaled_cols and rows:", scaled_cols, scaled_rows if scaled_cols < 224 or scaled_rows < 224: too_small = True print "Image is too small,", imname else: image = cv2.resize(image, (scaled_cols, scaled_rows)) col_offset = random.randint( 0, scaled_cols - self.image_size) row_offset = random.randint( 0, scaled_rows - self.image_size) # print "col_offset and row_offset:", col_offset, row_offset image = image[row_offset:self.image_size + row_offset, col_offset:self.image_size + col_offset] # assuming still using image size 224x224 # print "image shape is", image.shape if random_crop_chance == 0 or too_small: image = cv2.resize(image, (self.image_size, self.image_size)) else: image = cv2.resize(image, (self.image_size, self.image_size)) image = image.astype(np.float32) image = (image / 255.0) * 2.0 - 1.0 return image