# [the original paper](https://arxiv.org/abs/1404.3606) import itertools from chainer.cuda import to_gpu, to_cpu from chainer.functions import convolution_2d import numpy as np from sklearn.decomposition import IncrementalPCA # from utils import gpu_enabled # if gpu_enabled(): # try: # import cupy as xp # except ImportError: # import numpy as xp # else: import numpy as xp def steps(image_shape, filter_shape, step_shape): h, w = image_shape fh, fw = filter_shape sh, sw = step_shape ys = range(0, h-fh+1, sh) xs = range(0, w-fw+1, sw) return ys, xs def components_to_filters(components, n_channels, filter_shape): n_filters = components.shape[0] return components.reshape(n_filters, n_channels, *filter_shape) def output_shape(ys, xs): return len(ys), len(xs) class Patches(object): def __init__(self, image, filter_shape, step_shape): assert(image.ndim == 2) # should be either numpy.ndarray or cupy.ndarray self.ndarray = type(image) self.image = image self.filter_shape = filter_shape self.ys, self.xs = steps(image.shape[0:2], filter_shape, step_shape) @property def patches(self): """ Return image patches of shape (n_patches, filter_height, filter_width) """ fh, fw = self.filter_shape it = list(itertools.product(self.ys, self.xs)) patches = self.ndarray((len(it), fh, fw), dtype=self.image.dtype) for i, (y, x) in enumerate(it): patches[i, :, :] = self.image[y:y+fh, x:x+fw] return patches @property def output_shape(self): return output_shape(self.ys, self.xs) def atleast_4d(images): """Regard gray-scale images as 1-channel images""" assert(np.ndim(images) == 3) n, h, w = images.shape return images.reshape(n, h, w, 1) def to_channels_first(images): # images.shape == (n_images, y, x, n_channels) images = np.swapaxes(images, 1, 3) images = np.swapaxes(images, 2, 3) # images.shape == (n_images, n_channels, y, x) return images def image_to_patch_vectors(image, filter_shape, step_shape): """ Parameters ---------- image: np.array Image to extract patch vectors filter_shape: tuple of ints The shape of a filter step_shape: tuple of ints Step height/width of a filter Returns ------- X: np.array A set of normalized and flattened patches """ X = Patches(image, filter_shape, step_shape).patches # X.shape == (n_patches, filter_height, filter_width) X = X.reshape(X.shape[0], -1) # flatten each patch X = X - X.mean(axis=1, keepdims=True) # Remove mean from each patch. return X # \overline{X}_i in the original paper def binarize(X): X[X > 0] = 1 X[X <= 0] = 0 return X def binary_to_decimal(X): """ Parameters ---------- X: xp.ndarray Feature maps """ # This function expects X of shape (n_images, L2, y, x) # as an argument. # Let's say that X[k] (0 <= k < n_images) can be represented like # X[k] = [map_k[0], map_k[1], ..., map_k[L2-1]] # where the shape of each map_k is (y, x). # Then we calculate # a[0] * map_k[0] + a[1] * map_k[1] + ... + a[L2-1] * map_k[L2-1] # for each X[k], where a = [2^(L2-1), 2^(L2-2), ..., 2^0] # Therefore, the output shape must be (n_images, y, x) a = xp.arange(X.shape[1])[::-1] a = xp.power(2, a) return xp.tensordot(X, a, axes=([1], [0])) def to_tuple_if_int(value): """ If int is given, duplicate it and return as a 2 element tuple. """ if isinstance(value, int): return (value, value) return value class PCANet(object): def __init__(self, image_shape, filter_shape_l1, step_shape_l1, n_l1_output, filter_shape_l2, step_shape_l2, n_l2_output, filter_shape_pooling, step_shape_pooling): """ Parameters ---------- image_shape: int or sequence of ints Input image shape. filter_shape_l1: int or sequence of ints The shape of the kernel in the first convolution layer. If the value is int, a filter of the square shape is applied. If you want to apply a filter of a different aspect ratio, just pass a tuple of shape (height, width). step_shape_l1: int or sequence of ints The shape of kernel step in the first convolution layer. If the value is int, a step of the square shape is applied. If you want to apply a step of a different aspect ratio, just pass a tuple of shape (height, width). n_l1_output: L1 in the original paper. The number of outputs obtained from a set of input images. filter_shape_l2: int or sequence of ints The shape of the kernel in the second convolution layer. If the value is int, a filter of the square shape is applied. If you want to apply a filter of a different aspect ratio, just pass a tuple of shape (height, width). step_shape_l2: int or sequence of ints The shape of kernel step in the second convolution layer. If the value is int, a step of the square shape is applied. If you want to apply a step of a different aspect ratio, just pass a tuple of shape (height, width). n_l2_output: L2 in the original paper. The number of outputs obtained from each L1 output. filter_shape_pooling: int or sequence of ints The shape of the filter in the pooling layer. step_shape_pooling: int or sequence of ints The shape of the filter step in the pooling layer. """ self.image_shape = to_tuple_if_int(image_shape) self.filter_shape_l1 = to_tuple_if_int(filter_shape_l1) self.step_shape_l1 = to_tuple_if_int(step_shape_l1) self.n_l1_output = n_l1_output self.filter_shape_l2 = to_tuple_if_int(filter_shape_l2) self.step_shape_l2 = to_tuple_if_int(step_shape_l2) self.n_l2_output = n_l2_output self.filter_shape_pooling = to_tuple_if_int(filter_shape_pooling) self.step_shape_pooling = to_tuple_if_int(step_shape_pooling) self.n_bins = None # TODO make n_bins specifiable self.pca_l1 = IncrementalPCA(n_l1_output) self.pca_l2 = IncrementalPCA(n_l2_output) def histogram(self, binary_images): """ Separate a given image into blocks and calculate a histogram in each block. Supporse data in a block is in range [0, 3] and the acutual values are :: [0 0 1] [2 2 2] [2 3 3] If default bins ``[-0.5 0.5 1.5 2.5 3.5]`` applied, then the histogram will be ``[2 1 4 2]``. If ``n_bins`` is specified, the range of data divided equally. For example, if the data is in range ``[0, 3]`` and ``n_bins = 2``, bins will be ``[-0.5 1.5 3.5]`` and the histogram will be ``[3 6]``. """ k = pow(2, self.n_l2_output) if self.n_bins is None: self.n_bins = k + 1 bins = xp.linspace(-0.5, k - 0.5, self.n_bins) def bhist(image): # calculate Bhist(T) in the original paper ps = Patches( image, self.filter_shape_pooling, self.step_shape_pooling).patches H = [xp.histogram(p.flatten(), bins)[0] for p in ps] return xp.concatenate(H) return xp.vstack([bhist(image) for image in binary_images]) def process_input(self, images): assert(np.ndim(images) >= 3) assert(images.shape[1:3] == self.image_shape) if np.ndim(images) == 3: # forcibly convert to multi-channel images images = atleast_4d(images) images = to_channels_first(images) return images def fit(self, images): images = self.process_input(images) # images.shape == (n_images, n_channels, y, x) for image in images: X = [] for channel in image: patches = image_to_patch_vectors( channel, self.filter_shape_l1, self.step_shape_l1 ) X.append(patches) patches = np.hstack(X) # patches.shape = (n_patches, n_patches * vector length) self.pca_l1.partial_fit(patches) filters_l1 = components_to_filters( self.pca_l1.components_, n_channels=images.shape[1], filter_shape=self.filter_shape_l1, ) # if gpu_enabled(): # images = to_gpu(images) # filters_l1 = to_gpu(filters_l1) images = convolution_2d( images, filters_l1, stride=self.step_shape_l1 ).data # if gpu_enabled(): # images = to_cpu(images) # filters_l1 = to_cpu(filters_l1) # images.shape == (n_images, L1, y, x) images = images.reshape(-1, *images.shape[2:4]) for image in images: patches = image_to_patch_vectors( image, self.filter_shape_l2, self.step_shape_l2 ) self.pca_l2.partial_fit(patches) return self def transform(self, images): images = self.process_input(images) # images.shape == (n_images, n_channels, y, x) filters_l1 = components_to_filters( self.pca_l1.components_, n_channels=images.shape[1], filter_shape=self.filter_shape_l1, ) filters_l2 = components_to_filters( self.pca_l2.components_, n_channels=1, filter_shape=self.filter_shape_l2 ) # if gpu_enabled(): # images = to_gpu(images) # filters_l1 = to_gpu(filters_l1) # filters_l2 = to_gpu(filters_l2) images = convolution_2d( images, filters_l1, stride=self.step_shape_l1 ).data images = xp.swapaxes(images, 0, 1) # L1.shape == (L1, n_images, y, x) # iterate over each L1 output X = [] for maps in images: n_images, h, w = maps.shape maps = convolution_2d( maps.reshape(n_images, 1, h, w), # 1 channel images filters_l2, stride=self.step_shape_l2 ).data # maps.shape == (n_images, L2, y, x) right here maps = binarize(maps) maps = binary_to_decimal(maps) # maps.shape == (n_images, y, x) x = self.histogram(maps) # x is a set of feature vectors. # The shape of x is (n_images, vector length) X.append(x) # concatenate over L1 X = xp.hstack(X) # if gpu_enabled(): # X = to_cpu(X) X = X.astype(np.float64) # The shape of X is (n_images, L1 * vector length) return X def validate_structure(self): """ Check that the filter visits all pixels of input images without dropping any information. Raise ValueError if the network structure does not satisfy the above constraint. """ def is_valid_(input_shape, filter_shape, step_shape): ys, xs = steps(input_shape, filter_shape, step_shape) fh, fw = filter_shape h, w = input_shape if ys[-1]+fh != h or xs[-1]+fw != w: raise ValueError("Invalid network structure.") return output_shape(ys, xs) output_shape_l1 = is_valid_(self.image_shape, self.filter_shape_l1, self.step_shape_l1) output_shape_l2 = is_valid_(output_shape_l1, self.filter_shape_l2, self.step_shape_l2) is_valid_( output_shape_l2, self.filter_shape_pooling, self.filter_shape_pooling )