import os import pickle import hashlib import time import numpy as np import matplotlib.pyplot as plt plt.rcParams['figure.figsize'] = (12.0, 12.0) from PIL import Image from joblib import Parallel, delayed ### Create config file class DatasetGeneratorConfig(): # Scene image shape IMAGE_WIDTH = 96 IMAGE_HEIGHT = 96 # Target image shape TARGET_WIDTH = 32 TARGET_HEIGHT = 32 # Number of distractors = characters placed behind the target DISTRACTORS = 31 # Number of occluders = characters placed atop of the target OCCLUDERS = 0 # Percentage of empty images [0,1] EMPTY = 0 # Drawer split DRAWER_SPLIT = 'all' #one of: 'all', 'train', 'val' DRAWER_SPLIT_POINT = 10 # Data augmentation settings MAX_ROTATION = 20 MAX_SHEAR = 10 MAX_SCALE = 2 # Number of images per parallel job JOBLENGTH = 2000 def set_drawer_split(self): #split char instances if self.DRAWER_SPLIT == 'train': self.LOW_INSTANCE = 0 self.HIGH_INSTANCE = self.DRAWER_SPLIT_POINT elif self.DRAWER_SPLIT == 'val': self.LOW_INSTANCE = self.DRAWER_SPLIT_POINT self.HIGH_INSTANCE = 20 elif self.DRAWER_SPLIT == 'all': self.LOW_INSTANCE = 0 self.HIGH_INSTANCE = 20 else: print("A drawer split has to be chosen from ['all', 'train', 'val']") ### Define Data Augmentation Functions # Define rotation functions def rot_x(phi,theta,ptx,pty): return np.cos(phi+theta)*ptx + np.sin(phi-theta)*pty def rot_y(phi,theta,ptx,pty): return -np.sin(phi+theta)*ptx + np.cos(phi-theta)*pty # Apply affine transformations and scale characters for data augmentation def prepare_char(some_char, angle=20, shear=10, scale=2): phi = np.radians(np.random.uniform(-angle,angle)) theta = np.radians(np.random.uniform(-shear,shear)) a = scale**np.random.uniform(-1,1) b = scale**np.random.uniform(-1,1) (x,y) = some_char.size x = a*x y = b*y xextremes = [rot_x(phi,theta,0,0),rot_x(phi,theta,0,y),rot_x(phi,theta,x,0),rot_x(phi,theta,x,y)] yextremes = [rot_y(phi,theta,0,0),rot_y(phi,theta,0,y),rot_y(phi,theta,x,0),rot_y(phi,theta,x,y)] mnx = min(xextremes) mxx = max(xextremes) mny = min(yextremes) mxy = max(yextremes) aff_bas = np.array([[a*np.cos(phi+theta), b*np.sin(phi-theta), -mnx],[-a*np.sin(phi+theta), b*np.cos(phi-theta), -mny],[0, 0, 1]]) aff_prm = np.linalg.inv(aff_bas) some_char = some_char.transform((int(mxx-mnx),int(mxy-mny)), method = Image.AFFINE, data = np.ndarray.flatten(aff_prm[0:2,:])) some_char = some_char.resize((int(32*(mxx-mnx)/105),int(32*(mxy-mny)/105))) return some_char # Crop scaled images to character size def crop_image(image): im_arr = np.asarray(image) lines_y = np.all(im_arr == 0, axis=1) lines_x = np.all(im_arr == 0, axis=0) k = 0 l = len(lines_y)-1 m = 0 n = len(lines_x)-1 while lines_y[k] == True: k = k+1 while lines_y[l] == True: l = l-1 while lines_x[m] == True: m = m+1 while lines_x[n] == True: n = n-1 cropped_image = image.crop((m,k,n,l)) #plt.imshow(image.crop((m,k,n,l))) return cropped_image # Color characters with a random RGB color def color_char(tmp_im): size = tmp_im.size tmp_im = tmp_im.convert('RGBA') tmp_arr = np.asarray(tmp_im) rnd = np.random.rand(3) stuff = tmp_arr[:,:,0] > 0 tmp_arr = tmp_arr*[rnd[0], rnd[1], rnd[2], 1] tmp_arr[:,:,3] = tmp_arr[:,:,3]*stuff tmp_arr = tmp_arr.astype('uint8') tmp_im = Image.fromarray(tmp_arr) return tmp_im ### Define Image Generation Functions # Generate one image with clutter def make_cluttered_image(chars, char, n_distractors, config, verbose=0): '''Inputs: chars: Dataset of characters char: target character nclutt: number of distractors empty: if True do not include target character''' # While loop added for error handling l=0 while l < 1: #initialize image and segmentation mask im = Image.new('RGBA', (config.IMAGE_WIDTH,config.IMAGE_HEIGHT), (0,0,0,255)) seg = Image.new('RGBA', (config.IMAGE_WIDTH,config.IMAGE_HEIGHT), (0,0,0,255)) #generate background clutter j = 0 while j < n_distractors: # draw random character instance rnd_char = np.random.randint(0,len(chars)) rnd_ind = np.random.randint(config.LOW_INSTANCE,config.HIGH_INSTANCE) some_char = chars[rnd_char][rnd_ind] try: # augment random character tmp_im = prepare_char(some_char) tmp_im = crop_image(tmp_im) tmp_im = color_char(tmp_im) j = j+1 except: if verbose > 0: print('Error generating distractors') continue # add augmented random character to image im.paste(tmp_im, (np.random.randint(0,im.size[0]-tmp_im.size[0]+1), np.random.randint(0,im.size[1]-tmp_im.size[1]+1)), mask = tmp_im) # if empty: draw another random character instead of the target empty = np.random.random() < config.EMPTY if empty: rnd_char = np.random.randint(0,len(chars)) rnd_ind = np.random.randint(config.LOW_INSTANCE,config.HIGH_INSTANCE) char = chars[rnd_char][rnd_ind] j = 0 while j < 1: try: # augment target character glt_im = prepare_char(char) #transform char glt_im = crop_image(glt_im) #crop char glt_im_bw = glt_im glt_im = color_char(glt_im) #color char j = j+1 except: if verbose > 0: print('Error augmenting target character') continue # place augmentad target char left = np.random.randint(0,im.size[0]-glt_im.size[0]+1) upper = np.random.randint(0,im.size[1]-glt_im.size[1]+1) im.paste(glt_im, (left, upper), mask = glt_im) #make segmentation mask if not empty: seg.paste(glt_im_bw, (left, upper), mask = glt_im_bw) # generate occlusion j = 0 while j < config.OCCLUDERS: # draw random character rnd_char = np.random.randint(0,len(chars)) rnd_ind = np.random.randint(config.LOW_INSTANCE,config.HIGH_INSTANCE) some_char = chars[rnd_char][rnd_ind] try: # augment occluding character tmp_im = prepare_char(some_char) tmp_im = crop_image(tmp_im) tmp_im = color_char(tmp_im) j = j + 1 except: if verbose > 0: print('Error generating occlusion') continue # place occluding character im.paste(tmp_im, (np.random.randint(0,im.size[0]-tmp_im.size[0]+1), np.random.randint(0,im.size[1]-tmp_im.size[1]+1)), mask = tmp_im) #convert image from RGBA to RGB for saving im = im.convert('RGB') seg = seg.convert('1') l=l+1 return im, seg def make_target(chars, char, config, verbose=0): '''Inputs: chars: Dataset of characters char: target character''' # Legacy while loop to generate multiple targets for data augemntation # Multiple targets did not improve performance in our experiments l=0 while l < 1: try: # initialize image im = Image.new('RGBA', (config.TARGET_WIDTH,config.TARGET_HEIGHT), (0,0,0,255)) # augment target character (no scaling is applied) glt_im = prepare_char(char, angle=config.MAX_ROTATION, shear=config.MAX_SHEAR, scale=1) #transform char glt_im = crop_image(glt_im) #crop char glt_im = color_char(glt_im) #color char #place target character left = (im.size[0]-glt_im.size[0])//2 upper = (im.size[1]-glt_im.size[1])//2 im.paste(glt_im, (left, upper), mask = glt_im) #convert image from RGBA to RGB for saving im = im.convert('RGB') except: if verbose > 0: print('Error generating target') continue l=l+1 return im def make_image(chars, k, config, seed=None): '''Inputs: chars: Dataset of characters angle: legacy shear: legacy scale: legacy joblength: number of images to create in each job k: job index seed: random seed to generate different results in each job coloring: legacy''' # Generate random seed np.random.seed(seed) # Initialize batch data storage r_ims = np.zeros((config.JOBLENGTH,config.IMAGE_WIDTH,config.IMAGE_HEIGHT,3), dtype='uint8') r_seg = np.zeros((config.JOBLENGTH,config.IMAGE_WIDTH,config.IMAGE_HEIGHT,1), dtype='uint8') r_tar = np.zeros((config.JOBLENGTH,config.TARGET_WIDTH,config.TARGET_HEIGHT,3), dtype='uint8') for i in range(config.JOBLENGTH): #select a char char_char = np.random.randint(0,len(chars)) char_ind = np.random.randint(config.LOW_INSTANCE,config.HIGH_INSTANCE) char = chars[char_char][char_ind] # choose random number of distractors for datasets with varying clutter # selects the one fixed number of distractors in other cases n_distractors = np.random.choice([config.DISTRACTORS]) #generate images and segmentation masks ims, seg = make_cluttered_image(chars, char, n_distractors, config) #generate targets tar = make_target(chars, char, config) # Append to dataset r_ims[i,:,:,:] = ims r_seg[i,:,:,0] = seg r_tar[i,:,:,:] = tar return r_ims, r_seg, r_tar ### Multiprocessing Dataset Generation Routine def generate_dataset(path, dataset_size, chars, config, seed=None, save=True, show=False, checksum=None): '''Inputs: path: Save path N: number of images chars: Dataset of characters char_locs: legacy split: train/val split of drawer instances save: If True save dataset to path show: If true plot generated images''' t = time.time() # Define necessary number of jobs N = dataset_size M = dataset_size//config.JOBLENGTH # Initialize data data_ims = np.zeros((N,config.IMAGE_WIDTH,config.IMAGE_HEIGHT,3), dtype='uint8') data_seg = np.zeros((N,config.IMAGE_WIDTH,config.IMAGE_HEIGHT,1), dtype='uint8') data_tar = np.zeros((N,config.TARGET_WIDTH,config.TARGET_HEIGHT,3), dtype='uint8') # Execute parallel data generation #for i in range(0,N): #with Parallel(n_jobs=10, verbose=50) as parallel: print('Executing %.d tasks'%(M)) if seed: np.random.seed(seed) print('Seed fixed') seeds = np.unique(np.random.randint(2**32, size=2*M)) results = Parallel(n_jobs=-1, verbose=50)(delayed(make_image)(chars, k, config, seed=seeds[k]) for k in range(M)) # feed results into the dataset for i in range(0,M): for j in range(config.JOBLENGTH): data_ims[i*config.JOBLENGTH+j,:,:,:] = results[i][0][j,...] data_seg[i*config.JOBLENGTH+j,:,:,:] = results[i][1][j,...] data_tar[i*config.JOBLENGTH+j,:,:,:] = results[i][2][j,...] #save dataset save = save if save == True: if not os.path.exists(path): os.makedirs(path) np.save(path + 'images', data_ims.astype('uint8')) np.save(path + 'segmentation', data_seg.astype('uint8')) np.save(path + 'targets', data_tar.astype('uint8')) #show outputs show = show if show == True: for i in range(0,N): plt.figure plt.subplot(131) plt.imshow(data_tar[i,...]) plt.subplot(132) plt.imshow(data_ims[i,...]) plt.subplot(133) plt.imshow(data_seg[i,...,0]) plt.show() print("Duration:", time.time()-t) # Test checksum last_image = data_ims[-1,...] print("Hash:", hashlib.md5(last_image).digest()) if checksum: if hashlib.md5(last_image).digest() == checksum: print("Dataset was correctly created!") else: print("Incorrect hash value!") ### Data loader def load_dataset(dataset_dir, subset): assert subset in ['train', 'val-train', 'test-train', 'val-one-shot', 'test-one-shot'] path = os.path.join(dataset_dir, subset) # Load data in memory mapping mode to reduce RAM usage ims = np.load(os.path.join(path, 'images.npy'), mmap_mode='r') seg = np.load(os.path.join(path, 'segmentation.npy'), mmap_mode='r') tar = np.load(os.path.join(path, 'targets.npy'), mmap_mode='r') return ims, seg, tar