""" Implementation of normalized cut segmentation methods. Reference --------- Jianbo Shi and Jitendra Malik. Normalized Cuts and Image Segmentation. IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE, VOL. 22, NO. 8, AUGUST 2000. """ from __future__ import division from builtins import str from builtins import range from builtins import object from past.utils import old_div import os from distutils.version import LooseVersion import itertools as it import abc import numpy as np from scipy.sparse.linalg import eigsh from scipy.sparse import diags from scipy import sparse, ndimage import sima.misc from sima.ROI import ROI, ROIList from .segment import SegmentationStrategy, _check_single_plane from . import oPCA from . import _opca from future.utils import with_metaclass try: import cv2 except ImportError: cv2_available = False else: cv2_available = LooseVersion(cv2.__version__) >= LooseVersion('2.4.8') def normcut_vectors(affinity_matrix, k): """Return the normalized cut vectors. These vectors satisfy are the largest eigenvalues of $D^{-1/2} W D^{-1/2}$, or equivalently the smallest of $D^{-1/2} (D - W) D^{-1/2}$. The first eigenvalue should be constant in all entries, but the second eigenvalue can be used to determine the normalized cut. See Shi & Malik, 2000. Parameters ---------- A : array The affinity matrix containing the weight between graph nodes. k : int The number of cut eigenvectors to return. Returns ------- array The normcut vectors. Shape (num_nodes, k). """ node_degrees = np.array(affinity_matrix.sum(axis=0)).flatten() transformation_matrix = diags(np.sqrt(old_div(1., node_degrees)), 0) normalized_affinity_matrix = transformation_matrix * affinity_matrix * \ transformation_matrix _, vects = eigsh(normalized_affinity_matrix, k + 1, sigma=1.001, which='LM') # Get the largest eigenvalues. cuts = transformation_matrix * vects return cuts class CutRegion(object): """A subgraph of an affinity matrix used for iteratively cutting with the normalized cut procedure. Parameters ---------- affinity_matrix : (sparse) array The affinities between the nodes of the graph. indices : array-like Indices of the nodes in the original graph that are contained in the CutRegion. shape : tuple The shape of the image represented by the graph. """ def __init__(self, affinity_matrix, indices, shape): self.affinity_matrix = affinity_matrix self.indices = indices self.shape = shape assert self.affinity_matrix.shape[0] == self.affinity_matrix.shape[1] assert self.affinity_matrix.shape[0] == len(self.indices) self._discard_isolated_nodes() assert self.affinity_matrix.shape[0] == self.affinity_matrix.shape[1] assert self.affinity_matrix.shape[0] == len(self.indices) def _discard_isolated_nodes(self): """Removes any nodes with degree 0 from the region.""" node_degrees = np.array(self.affinity_matrix.sum(axis=0)).flatten() ix = np.nonzero(node_degrees)[0] if len(ix): self.affinity_matrix = self.affinity_matrix[ix, :][:, ix] self.indices = self.indices[ix] else: self.affinity_matrix = np.zeros((0, 0)) self.indices = self.indices[ix] def _normalized_cut_cost(self, cut): """Return the normalized cut cost for a given cut. Parameters ---------- cut : ndarray of bool True/False indicating one of the segments. """ node_degrees = self.affinity_matrix.sum(axis=0) k = old_div(node_degrees[:, cut].sum(), node_degrees.sum()) node_degrees = diags(np.array(node_degrees).flatten(), 0) b = old_div(k, (1 - k)) y = np.matrix(cut - b * np.logical_not(cut)).T return float(y.T * (node_degrees - self.affinity_matrix) * y) / ( y.T * node_degrees * y) def split(self): """Split the region according to the normalized cut criterion. Returns ------- list of CutRegion The regions created by splitting. float The normalized cut cost. """ if not cv2_available: raise ImportError('OpenCV >= 2.4.8 required') tmp_im = np.zeros(self.shape[0] * self.shape[1]) tmp_im[self.indices] = 1 labeled_array, num_features = ndimage.label(tmp_im.reshape(self.shape)) if num_features > 1: labeled_array = labeled_array.reshape(-1)[self.indices] segments = [] for i in range(1, num_features + 1): idx = np.nonzero(labeled_array == i)[0] segments.append(CutRegion(self.affinity_matrix[idx, :][:, idx], self.indices[idx], self.shape)) return segments, 0.0 C = normcut_vectors(self.affinity_matrix, 1) im = C[:, -2] im -= im.min() im /= im.max() markers = -np.ones(self.shape[0] * self.shape[1]).astype('uint16') markers[self.indices] = 0 markers[self.indices[im < 0.02]] = 1 markers[self.indices[im > 0.98]] = 2 markers = markers.reshape(self.shape) vis2 = 0.5 * np.ones(self.shape[0] * self.shape[1]) vis2[self.indices] = im vis2 *= (2 ** 8 - 1) vis2 = cv2.cvtColor(np.uint8(vis2.reshape(self.shape)), cv2.COLOR_GRAY2BGR) markers = np.int32(markers) cv2.watershed(vis2, markers) cut = ndimage.morphology.binary_dilation(markers == 2).reshape(-1)[ self.indices] cut[im > 0.98] = True cost = self._normalized_cut_cost(cut) for thresh in np.linspace(0, 1, 20)[1:-1]: tmp_cut = im > thresh tmp_cost = self._normalized_cut_cost(tmp_cut) if tmp_cost < cost: cost = tmp_cost cut = tmp_cut a = cut.nonzero()[0] a = cut.nonzero()[0] b = np.logical_not(cut).nonzero()[0] return ( [CutRegion(self.affinity_matrix[x, :][:, x], self.indices[x], self.shape) for x in [a, b] if len(x)], self._normalized_cut_cost(cut) ) def itercut(affinity_matrix, shape, max_pen=0.01, min_size=40, max_size=200): """Iteratively cut the graph represented by the affinity matrix. Parameters ---------- affinity_matrix : (sparse) array The affinities between the nodes of the graph. shape : tuple The shape of the image represented by the graph. max_pen : float Iterative cutting will continue as long as the cut cost is less than max_pen. min_size, max_size : int Regardless of the cut cost, iterative cutting will not be performed on regions with fewer pixels than min_size and will always be performed on regions larger than max_size. Returns ------- list of CutRegion The regions produced by the iterative cutting procedure. """ cut_cue = [CutRegion(affinity_matrix, np.arange(affinity_matrix.shape[0]), shape)] region_list = [] while len(cut_cue): cut = cut_cue.pop() if len(cut.indices) < min_size: region_list.append(cut) else: cuts, penalty = cut.split() # assert set(cut.indices) == set.union( # *[set(c.indices) for c in cuts]) if penalty < max_pen or len(cut.indices) > max_size: cut_cue.extend(cuts) else: region_list.append(cut) return region_list class AffinityMatrixMethod(with_metaclass(abc.ABCMeta, object)): """Method for calculating the affinity matrix""" @abc.abstractmethod def calculate(self, dataset): """Calculate the affinity matrix for a dataset. Parameters ---------- dataset : sima.ImagingDataset The dataset for which the affinity matrix is to be calculated. Returns ------- affinities : scipy.sparse.coo_matrix The affinities between the image pixels. """ return def _direction(vects, weights=None): if weights is None: vects_ = vects else: vects_ = vects * weights return (vects_.T / np.sqrt((vects_ ** 2).sum(axis=-1).T)).T def _offset_corrs(dataset, pixel_pairs, channel=0, method='EM', num_pcs=75, verbose=False): """ Calculate the offset correlation for specified pixel pairs. Parameters ----------- dataset : sima.ImagingDataset The dataset to be used. pixel_pairs : ndarray of int The pairs of pixels, indexed ((y0, x0), (y1, x1)) for which the correlation is to be calculated. channel : string or int, optional Channel to be used for estimating the pixel correlations, either an integer index or a channel name. Default: 0. method : {'EM', 'fast'}, optional The method for estimating the correlations. EM uses the EM algorithm to perform OPCA. Fast calculates the offset correlations directly, but is more noisy since all PCs are used. Default: EM. num_pcs : int, optional The number of principal components to be used in the estimated correlations with the EM method. Default: 75. verbose : bool, optional Whether to print progress status. Default: False. Returns ------- correlations: dict A dictionary whose keys are the elements of the pixel_pairs input list, and whose values are the calculated offset correlations. """ channel = sima.misc.resolve_channels(channel, dataset.channel_names) if method == 'EM': if dataset.savedir is not None: path = os.path.join( dataset.savedir, 'opca_' + str(channel) + '.npz') else: path = None oPC_vars, oPCs, _ = oPCA.dataset_opca(dataset, channel, num_pcs, path, verbose=verbose) weights = np.sqrt(np.maximum(0, oPC_vars)) D = _direction(oPCs[0], weights) # TODO: Generalize to 3D datasets return { ((u, v), (w, x)): np.dot(D[u, v, :], D[w, x, :]) for u, v, w, x in pixel_pairs } elif method == 'fast': ostdevs, correlations, pixels = _opca._fast_ocorr( dataset, pixel_pairs, channel) ostdevs /= dataset.num_frames - 1. correlations /= 2. * (dataset.num_frames - 1) ostdevs = np.sqrt(np.maximum(0., ostdevs)) for pair_idx, pair in enumerate(pixel_pairs): denom = ostdevs[pair[0], pair[1]] * ostdevs[pair[2], pair[3]] if denom <= 0: correlations[pair_idx] = 0. else: correlations[pair_idx] = max( -1., min(1., correlations[pair_idx] / denom)) return { ((PAIR[0], PAIR[1]), (PAIR[2], PAIR[3])): correlations[pair_idx] for pair_idx, PAIR in enumerate(pixel_pairs)} class BasicAffinityMatrix(AffinityMatrixMethod): """Return a sparse affinity matrix for use with normalized cuts. The affinity :math:`A_{ij}` between each pair of pixels :math:`i,j` is a function of the correlation :math:`c_{i,j}` of the pixel-intensity time series, and the relative locations (:math:`\\mathbf X_i,\mathbf X_j`) of the pixels: .. math:: A_{ij} = e^{k_cc_{ij}} \cdot e^{-\\frac{|\mathbf X_i-\mathbf X_j|_2^2}{\\sigma_{\\mathbf X}^2}}, with :math:`k_c` and :math:`\\sigma_{\mathbf X}^2` being automatically determined constants. Parameters ---------- channel : string or int, optional Channel containing the signal to be processed, either an integer index or a channel name. Default: 0. max_dist : tuple of int, optional Defaults to (2, 2). spatial_decay : tuple of int, optional Defaults to (2, 2). num_pcs : int, optional The number of principal component to use. Default: 75. verbose : bool, optional Whether to print progress status. Default: False. """ def __init__(self, channel=0, max_dist=None, spatial_decay=None, num_pcs=75, verbose=False): if max_dist is None: max_dist = (2, 2) if spatial_decay is None: spatial_decay = (2, 2) self._params = dict(locals()) del self._params['self'] def _calculate_correlations(self, dataset): shape = dataset.frame_shape[1:3] max_dist = self._params['max_dist'] pairs = [] for y, x in it.product(range(shape[0]), range(shape[1])): for dx in range(max_dist[1] + 1): if dx == 0: yrange = list(range(1, max_dist[0] + 1)) else: yrange = list(range(-max_dist[0], max_dist[0] + 1)) for dy in yrange: if (x + dx < shape[1]) and (y + dy >= 0) and \ (y + dy < shape[0]): pairs.append( np.reshape([y, x, y + dy, x + dx], (1, 4))) channel = sima.misc.resolve_channels(self._params['channel'], dataset.channel_names) return _offset_corrs( dataset, np.concatenate(pairs, 0), channel, num_pcs=self._params['num_pcs'], verbose=self._params['verbose']) def _weight(self, r0, r1): Y, X = self._params['spatial_decay'] dy = r1[0] - r0[0] dx = r1[1] - r0[1] return np.exp(9. * self._correlations[(r0, r1)]) * np.exp( -0.5 * ((float(dx) / X) ** 2 + (float(dy) / Y) ** 2)) def _setup(self, dataset): self._correlations = self._calculate_correlations(dataset) def calculate(self, dataset): self._setup(dataset) max_dist = self._params['max_dist'] shape = dataset.frame_shape[1:3] A = sparse.dok_matrix((shape[0] * shape[1], shape[0] * shape[1])) for y, x in it.product(range(shape[0]), range(shape[1])): for dx in range(max_dist[1] + 1): if dx == 0: yrange = list(range(1, max_dist[0] + 1)) else: yrange = list(range(-max_dist[0], max_dist[0] + 1)) for dy in yrange: r0 = (y, x) r1 = (y + dy, x + dx) if (x + dx < shape[1]) and (y + dy >= 0) and \ (y + dy < shape[0]): w = self._weight(r0, r1) assert np.isfinite(w) a = x + y * shape[1] b = a + dx + dy * shape[1] A[a, b] = w A[b, a] = w # TODO: Use symmetric matrix structure return sparse.csr_matrix(sparse.coo_matrix(A), dtype=float) class PlaneNormalizedCuts(SegmentationStrategy): """Segment image by iteratively performing normalized cuts. Parameters ---------- affinity_method : AffinityMatrixMethod The method used to calculate the affinity matrix. max_pen : float Iterative cutting will continue as long as the cut cost is less than max_pen. cut_min_size, cut_max_size : int Regardless of the cut cost, iterative cutting will not be performed on regions with fewer pixels than min_size and will always be performed on regions larger than max_size. Notes ----- The normalized cut procedure [3]_ is iteratively applied, first to the entire image, and then to each cut made from the previous application of the procedure. References ---------- .. [3] Jianbo Shi and Jitendra Malik. Normalized Cuts and Image Segmentation. IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE, VOL. 22, NO. 8, AUGUST 2000. Warning ------- In version 1.0, this method currently only works on datasets with a single plane, or in conjunction with :class:`sima.segment.PlaneWiseSegmentation`. """ def __init__(self, affinity_method=None, cut_max_pen=0.01, cut_min_size=40, cut_max_size=200): super(PlaneNormalizedCuts, self).__init__() if affinity_method is None: affinity_method = BasicAffinityMatrix(channel=0, num_pcs=75) self._params = dict(locals()) del self._params['self'] @classmethod def _rois_from_cuts(cls, cuts): """Return ROI structures each containing the full extent of a cut. Parameters ---------- cuts : list of sima.normcut.CutRegion The segmented regions identified by normalized cuts. Returns ------- sima.ROI.ROIList ROI structures corresponding to each cut. """ ROIs = ROIList([]) for cut in cuts: if len(cut.indices): mask = np.zeros(cut.shape) for x in cut.indices: mask[np.unravel_index(x, cut.shape)] = 1 ROIs.append(ROI(mask=mask)) return ROIs @_check_single_plane def _segment(self, dataset): params = self._params affinity = params['affinity_method'].calculate(dataset) shape = dataset.frame_shape[1:3] cuts = itercut(affinity, shape, params['cut_max_pen'], params['cut_min_size'], params['cut_max_size']) return self._rois_from_cuts(cuts)