import numpy as np import os, sys from numpy import sqrt, prod, ones, floor, repeat, pi, exp, zeros, sum from numpy.random import RandomState from theano.tensor.nnet import conv2d from theano import shared, config, _asarray, function import theano.tensor as T floatX = config.floatX from sklearn.feature_extraction.image import PatchExtractor from sklearn.decomposition import PCA from skimage import exposure from skimage import io from skimage import img_as_float, img_as_ubyte, img_as_uint, img_as_int from skimage.color import label2rgb, rgb2hsv, hsv2rgb from skimage.io import ImageCollection, imsave, imshow from skimage.transform import resize def compare_mask_image_filenames(filenames_images, filenames_mask, replace_from='', replace_to='', msg="Filename images and mask mismatch"): image = [i.split('/')[-1] for i in filenames_images] mask = [i.split('/')[-1].replace(replace_from, replace_to) for i in filenames_mask] assert np.array_equal(image, mask), msg def convert_RGB_mask_to_index(im, colors, ignore_missing_labels=False): """ :param im: mask in RGB format (classes are RGB colors) :param colors: the color map should be in the following format colors = OrderedDict([ ("Sky", np.array([[128, 128, 128]], dtype=np.uint8)), ("Building", np.array([[128, 0, 0], # Building [64, 192, 0], # Wall [0, 128, 64] # Bridge ], dtype=np.uint8) ... ]) :param ignore_missing_labels: if True the function continue also if some pixels fail the mappint :return: the mask in index class format """ out = (np.ones(im.shape[:2]) * 255).astype(np.uint8) for grey_val, (label, rgb) in enumerate(colors.items()): for el in rgb: match_pxls = np.where((im == np.asarray(el)).sum(-1) == 3) out[match_pxls] = grey_val if ignore_missing_labels: # retrieve the void label if [0, 0, 0] in rgb: void_label = grey_val # debug # outpath = '/Users/marcus/exp/datasets/camvid/grey_test/o.png' # imsave(outpath, out) ###### if ignore_missing_labels: match_missing = np.where(out == 255) if match_missing[0].size > 0: print "Ignoring missing labels" out[match_missing] = void_label assert (out != 255).all(), "rounding errors or missing classes in colors" return out.astype(np.uint8) def resize(): pass def crop(): pass def zero_pad(im, resize_size, inpath="", pad_value=0): """ :param im: the image you want to resize :param resize_size: the new size of the image :param inpath: [optional] to debug, the path of the image :return: the zero-pad image in the new dimensions """ if im.ndim == 3: h, w, _ = im.shape elif im.ndim == 2: h, w = im.shape rw, rh = resize_size pad_w = rw - w pad_h = rh - h pad_l = pad_r = pad_u = pad_d = 0 if pad_w > 0: pad_l = int(pad_w / 2) pad_r = pad_w - pad_l if pad_h > 0: pad_u = int(pad_h / 2) pad_d = pad_h - pad_u if im.ndim == 3: im = np.pad(im, ((pad_u, pad_d), (pad_l, pad_r), (0, 0)), mode='constant', constant_values=pad_value) elif im.ndim == 2: im = np.pad(im, ((pad_u, pad_d), (pad_l, pad_r)), mode='constant', constant_values=pad_value) assert (im.shape[1], im.shape[0]) == resize_size, \ "Resize size doesn't match: resize_size->{} resized->{}"\ " filename : {}".format(resize_size, [im.shape[1], im.shape[0]], inpath ) return im def rgb2illumination_invariant(img, alpha, hist_eq=False): """ this is an implementation of the illuminant-invariant color space published by Maddern2014 http://www.robots.ox.ac.uk/~mobile/Papers/2014ICRA_maddern.pdf :param img: :param alpha: camera paramete :return: """ ii_img = 0.5 + np.log(img[:, :, 1] + 1e-8) - \ alpha * np.log(img[:, :, 2] + 1e-8) - \ (1 - alpha) * np.log(img[:, :, 0] + 1e-8) # ii_img = exposure.rescale_intensity(ii_img, out_range=(0, 1)) if hist_eq: ii_img = exposure.equalize_hist(ii_img) print np.max(ii_img) print np.min(ii_img) return ii_img def save_image(outpath, img): import errno try: os.makedirs(os.path.dirname(outpath)) except OSError as e: if e.errno != errno.EEXIST: raise e pass imsave(outpath, img) def save_RGB_mask(outpath, mask): return def preprocess_dataset(train, valid, test, input_to_float, preprocess_type, patch_size, max_patches): if input_to_float and preprocess_type is None: train_norm = train[0].astype(floatX) / 255. train = (train_norm, train[1]) valid_norm = valid[0].astype(floatX) / 255. valid = (valid_norm, valid[1]) test_norm = test[0].astype(floatX) / 255. test = (test_norm, test[1]) if preprocess_type is None: return train, valid, test # whiten, LCN, GCN, Local Mean Subtract, or normalize if len(train[0]) > 0: train_pre = [] print "" print "Preprocessing {} images of the train set with {} {} ".format( len(train[0]), preprocess_type, patch_size), print "" i = 0 print "Progress: {0:.3g} %".format(i * 100 / len(train[0])), for i, x in enumerate(train[0]): img = np.expand_dims(x, axis=0) x_pre = preprocess(img, preprocess_type, patch_size, max_patches) train_pre.append(x_pre[0]) print "\rProgress: {0:.3g} %".format(i * 100 / len(train[0])), sys.stdout.flush() if input_to_float: train_pre = np.array(train_pre).astype(floatX) / 255. train = (np.array(train_pre), np.array(train[1])) if len(valid[0]) > 0: valid_pre = [] print "" print "Preprocessing {} images of the valid set with {} {} ".format( len(valid[0]), preprocess_type, patch_size), print "" i = 0 print "Progress: {0:.3g} %".format(i * 100 / len(valid[0])), for i, x in enumerate(valid[0]): img = np.expand_dims(x, axis=0) x_pre = preprocess(img, preprocess_type, patch_size, max_patches) valid_pre.append(x_pre[0]) print "\rProgress: {0:.3g} %".format(i * 100 / len(valid[0])), sys.stdout.flush() if input_to_float: valid_pre = np.array(valid_pre).astype(floatX) / 255. valid = (np.array(valid_pre), np.array(valid[1])) if len(test[0]) > 0: test_pre = [] print "" print "Preprocessing {} images of the test set with {} {} ".format( len(test[0]), preprocess_type, patch_size), print "" i = 0 print "Progress: {0:.3g} %".format(i * 100 / len(test[0])), for i, x in enumerate(test[0]): img = np.expand_dims(x, axis=0) x_pre = preprocess(img, preprocess_type, patch_size, max_patches) test_pre.append(x_pre[0]) print "\rProgress: {0:.3g} %".format(i * 100 / len(test[0])), sys.stdout.flush() if input_to_float: test_pre = np.array(test_pre).astype(floatX) / 255. test = (np.array(test_pre), np.array(test[1])) return train, valid, test def preprocess(x, mode=None, patch_size=9, max_patches=int(1e5)): """ :param x: :param mode: :param rng: :param patch_size: :param max_patches: :return: """ if mode == 'conv-zca': x = convolutional_zca(x, patch_size=patch_size, max_patches=max_patches) elif mode == 'sub-lcn': for d in range(x.shape[-1]): x[:, :, :, d] = lecun_lcn(x[:, :, :, d], kernel_size=patch_size) elif mode == 'subdiv-lcn': for d in range(x.shape[-1]): x[:, :, :, d] = lecun_lcn(x[:, :, :, d], kernel_size=patch_size, use_divisor=True) elif mode == 'gcn': for d in range(x.shape[-1]): x[:, :, :, d] = global_contrast_normalization(x[:, :, :, d]) elif mode == 'local_mean_sub': for d in range(x.shape[-1]): x[:, :, :, d] = local_mean_subtraction(x[:, :, :, d], kernel_size=patch_size) # x = x.astype(floatX) return x def lecun_lcn(input, kernel_size=9, threshold=1e-4, use_divisor=False): """ Yann LeCun's local contrast normalization Orginal code in Theano by: Guillaume Desjardins :param input: :param kernel_size: :param threshold: :param use_divisor: :return: """ input_shape = (input.shape[0], 1, input.shape[1], input.shape[2]) input = input.reshape(input_shape).astype(floatX) X = T.tensor4(dtype=floatX) filter_shape = (1, 1, kernel_size, kernel_size) filters = gaussian_filter(kernel_size).reshape(filter_shape) filters = shared(_asarray(filters, dtype=floatX), borrow=True) convout = conv2d(input=X, filters=filters, input_shape=input.shape, filter_shape=filter_shape, border_mode='half') new_X = X - convout if use_divisor: # Scale down norm of kernel_size x kernel_size patch sum_sqr_XX = conv2d(input=T.sqr(T.abs_(new_X)), filters=filters, input_shape=input.shape, filter_shape=filter_shape, border_mode='half') denom = T.sqrt(sum_sqr_XX) per_img_mean = denom.mean(axis=[2, 3]) divisor = T.largest(per_img_mean.dimshuffle(0, 1, 'x', 'x'), denom) divisor = T.maximum(divisor, threshold) new_X = new_X / divisor new_X = new_X.dimshuffle(0, 2, 3, 1) new_X = new_X.flatten(ndim=3) f = function([X], new_X) return f(input) def local_mean_subtraction(input, kernel_size=5): input_shape = (input.shape[0], 1, input.shape[1], input.shape[2]) input = input.reshape(input_shape).astype(floatX) X = T.tensor4(dtype=floatX) filter_shape = (1, 1, kernel_size, kernel_size) filters = mean_filter(kernel_size).reshape(filter_shape) filters = shared(_asarray(filters, dtype=floatX), borrow=True) mean = conv2d(input=X, filters=filters, input_shape=input.shape, filter_shape=filter_shape, border_mode='half') new_X = X - mean f = function([X], new_X) return f(input) def global_contrast_normalization(input, scale=1., subtract_mean=True, use_std=False, sqrt_bias=0., min_divisor=1e-8): input_shape = (input.shape[0], 1, input.shape[1], input.shape[2]) input = input.reshape(input_shape).astype(floatX) X = T.tensor4(dtype=floatX) ndim = X.ndim if not ndim in [3, 4]: raise NotImplementedError("X.dim>4 or X.ndim<3") scale = float(scale) mean = X.mean(axis=ndim-1) new_X = X.copy() if subtract_mean: if ndim == 3: new_X = X - mean[:, :, None] else: new_X = X - mean[:, :, :, None] if use_std: normalizers = T.sqrt(sqrt_bias + X.var(axis=ndim-1)) / scale else: normalizers = T.sqrt(sqrt_bias + (new_X ** 2).sum(axis=ndim-1)) / scale # Don't normalize by anything too small. T.set_subtensor(normalizers[(normalizers < min_divisor).nonzero()], 1.) if ndim == 3: new_X /= normalizers[:, :, None] else: new_X /= normalizers[:, :, :, None] f = function([X], new_X) return f(input) def gaussian_filter(kernel_shape): x = zeros((kernel_shape, kernel_shape), dtype='float32') def gauss(x, y, sigma=2.0): Z = 2 * pi * sigma**2 return 1./Z * exp(-(x**2 + y**2) / (2. * sigma**2)) mid = floor(kernel_shape/ 2.) for i in xrange(0,kernel_shape): for j in xrange(0,kernel_shape): x[i, j] = gauss(i-mid, j-mid) return x / sum(x) def mean_filter(kernel_size): s = kernel_size**2 x = repeat(1. / s, s).reshape((kernel_size, kernel_size)) return x def convolutional_zca(input, patch_size=(9, 9), max_patches=int(1e5)): """ This is an implementation of the convolutional ZCA whitening presented by David Eigen in his phd thesis http://www.cs.nyu.edu/~deigen/deigen-thesis.pdf "Predicting Images using Convolutional Networks: Visual Scene Understanding with Pixel Maps" From paragraph 8.4: A simple adaptation of ZCA to convolutional application is to find the ZCA whitening transformation for a sample of local image patches across the dataset, and then apply this transform to every patch in a larger image. We then use the center pixel of each ZCA patch to create the conv-ZCA output image. The operations of applying local ZCA and selecting the center pixel can be combined into a single convolution kernel, resulting in the following algorithm (explained using RGB inputs and 9x9 kernel): 1. Sample 10M random 9x9 image patches (each with 3 colors) 2. Perform PCA on these to get eigenvectors V and eigenvalues D. 3. Optionally remove small eigenvalues, so V has shape [npca x 3 x 9 x 9]. 4. Construct the whitening kernel k: for each pair of colors (ci,cj), set k[j,i, :, :] = V[:, j, x0, y0]^T * D^{-1/2} * V[:, i, :, :] where (x0, y0) is the center pixel location (e.g. (5,5) for a 9x9 kernel) :param input: 4D tensor of shape [batch_size, rows, col, channels] :param patch_size: size of the patches extracted from the dataset :param max_patches: max number of patches extracted from the dataset :return: conv-zca whitened dataset """ # I don't know if it's correct or not.. but it seems to work mean = np.mean(input, axis=(0, 1, 2)) input -= mean # center the data n_imgs, h, w, n_channels = input.shape patch_size = (patch_size, patch_size) patches = PatchExtractor(patch_size=patch_size, max_patches=max_patches).transform(input) pca = PCA() pca.fit(patches.reshape(patches.shape[0], -1)) # Transpose the components into theano convolution filter type dim = (-1,) + patch_size + (n_channels,) V = shared(pca.components_.reshape(dim). transpose(0, 3, 1, 2).astype(input.dtype)) D = T.nlinalg.diag(1. / np.sqrt(pca.explained_variance_)) x_0 = int(np.floor(patch_size[0] / 2)) y_0 = int(np.floor(patch_size[1] / 2)) filter_shape = [n_channels, n_channels, patch_size[0], patch_size[1]] image_shape = [n_imgs, n_channels, h, w] kernel = T.zeros(filter_shape) VT = V.dimshuffle(2, 3, 1, 0) # V : 243 x 3 x 9 x 9 # VT : 9 x 9 x 3 x 243 # build the kernel for i in range(n_channels): for j in range(n_channels): a = T.dot(VT[x_0, y_0, j, :], D).reshape([1, -1]) b = V[:, i, :, :].reshape([-1, patch_size[0] * patch_size[1]]) c = T.dot(a, b).reshape([patch_size[0], patch_size[1]]) kernel = T.set_subtensor(kernel[j, i, :, :], c) kernel = kernel.astype(floatX) input = input.astype(floatX) input_images = T.tensor4(dtype=floatX) conv_whitening = conv2d(input_images.dimshuffle((0, 3, 1, 2)), kernel, input_shape=image_shape, filter_shape=filter_shape, border_mode='full') s_crop = [(patch_size[0] - 1) // 2, (patch_size[1] - 1) // 2] # e_crop = [s_crop[0] if (s_crop[0] % 2) != 0 else s_crop[0] + 1, # s_crop[1] if (s_crop[1] % 2) != 0 else s_crop[1] + 1] conv_whitening = conv_whitening[:, :, s_crop[0]:-s_crop[0], s_crop[ 1]:-s_crop[1]] conv_whitening = conv_whitening.dimshuffle(0, 2, 3, 1) f_convZCA = function([input_images], conv_whitening) return f_convZCA(input)