Python scipy.spatial.distance() Examples

The following are 30 code examples of scipy.spatial.distance(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.spatial , or try the search function .
Example #1
Source File: utils.py    From nolitsa with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def dist(x, y, metric='chebyshev'):
    """Compute the distance between all sequential pairs of points.

    Computes the distance between all sequential pairs of points from
    two arrays using scipy.spatial.distance.

    Paramters
    ---------
    x : ndarray
        Input array.
    y : ndarray
        Input array.
    metric : string, optional (default = 'chebyshev')
        Metric to use while computing distances.

    Returns
    -------
    d : ndarray
        Array containing distances.
    """
    func = getattr(distance, metric)
    return np.asarray([func(i, j) for i, j in zip(x, y)]) 
Example #2
Source File: seistomopy_gui.py    From SeisTomoPy_V3 with GNU General Public License v3.0 6 votes vote down vote up
def _on_map_mouse_click_event(self, event):
        if None in (event.xdata, event.ydata):
            return
        # Get map coordinates by the inverse transform.
        lng, lat = self.map(event.xdata, event.ydata, inverse=True)
        # Left click: set mid point
        if event.button == 1:
            self.ui.midpt_longitude.setValue(lng)
            self.ui.midpt_latitude.setValue(lat)
            rng = float(self.ui.width_cross.value())/2. # distance in degrees
            r = self.center_pt
            lon, lat = r.longitude, r.latitude
            az = float(self.ui.azimuth.value())
            end_lat,end_lon,end_lat2,end_lon2 = SeisTomoPy.path_tracer(lon,lat,rng,az)
            self.ui.label_4.setText(str(int(end_lat2)))
            self.ui.label_9.setText(str(int(end_lon2)))
            self._plot_path() 
Example #3
Source File: util.py    From multisensory with Apache License 2.0 6 votes vote down vote up
def knnsearch(N, X, k = 1, method = 'brute', p = 2.):
  #if p != 2: assert method == 'kd'

  if method == 'kd':
    kd_ = kd(N)
    return kd_query(kd_, X, k = k, p = p)
  elif method == 'brute':
    import scipy.spatial.distance
    if p == 2:
      D = scipy.spatial.distance.cdist(X, N)
    else:
      D = scipy.spatial.distance.cdist(X, N, p)

    if k == 1:
      I = np.argmin(D, 1)[:, np.newaxis]
    else:
      I = np.argsort(D)[:, :k]
    return D[np.arange(D.shape[0])[:, np.newaxis], I], I 
  else:
    fail('Unknown search method: %s' % method) 
Example #4
Source File: main.py    From scTDA with GNU General Public License v3.0 6 votes vote down vote up
def get_gene(self, genin, ignore_log=False, con=True):
        """
        Returns a dictionary that asigns to each node id the average value of the column 'genin' in the raw table.
        'genin' can be also a list of columns, on which case the average of all columns. The output is normalized
        such that the sum over all nodes is equal to 1. It also provides as an output the normalization factor, to
        convert the dictionary to log_2(1+TPM) units (or TPM units). When 'ignore_log' is True it treat entries as
        being in natural scale, even if self.log2 is True (used internally). When 'con' is False, it uses all
        nodes, not only the ones in the first connected component of the topological representation (used internally).
        Argument 'genin' may also be equal to the special keyword '_dist_root', on which case it returns the graph
        distance funtion to the root node. It can be also equal to 'timepoint_xxx', on which case it returns a
        dictionary with the fraction of cells belonging to timepoint xxx in each node.
        """
        if genin == '_dist_root':
            return self.get_distroot(self.root), 1.0
        elif genin is not None and 'timepoint_' in genin:
            return self.count_gene(self.rootlane, float(genin[genin.index('_')+1:]))
        else:
            return UnrootedGraph.get_gene(self, genin, ignore_log, con) 
Example #5
Source File: similarity.py    From lexpredict-contraxsuite with GNU Affero General Public License v3.0 6 votes vote down vote up
def get_similarity_matrix(self, observation_matrix: numpy.array):
        """
        Calculate the pairwise similarity of a set of records in an MxN `observation_matrix` with M records and N features.
        :param observation_matrix: numpy.array - feature matrix
        :return: numpy.array - distance_matrix
        # TODO: Implement boolean conversion for relevant distance metrics, e.g., Jaccard.
        """
        # Toggle TF-IDF
        if self.use_tfidf:
            observation_matrix = sklearn.feature_extraction.text.TfidfTransformer().fit_transform(
                observation_matrix).toarray()
            distance_matrix = scipy.spatial.distance.squareform(
                scipy.spatial.distance.pdist(observation_matrix, metric=self.distance_type))
        else:
            distance_matrix = scipy.spatial.distance.squareform(
                scipy.spatial.distance.pdist(observation_matrix, self.distance_type))

        return distance_matrix 
Example #6
Source File: main.py    From scTDA with GNU General Public License v3.0 6 votes vote down vote up
def cellular_subpopulations(self, threshold=0.05, min_cells=5, clus_thres=0.65):
        """
        Identifies potential transient cellular subpopulations. The parameter
        'threshold' sets an upper bound of the q-value of the genes that are considered in the analysis.
        The parameter 'min_cells' sets the minimum number of cells on which each of the genes considered in the
        analysis is expressed. Cellular subpopulations are determined by clustering the Jensen-Shannon distance
        matrix of the genes that pass all the constraints. The number of clusters is controlled in this case by
        the parameter 'clus_thres'. In both cases a list with the genes associated to each cluster is returned.
        It requires the presence of the file 'name.genes.tsv', produced by the method RotedGraph.save().
        """
        con = []
        dis = []
        nam = []
        f = open(self.name + '.genes.tsv', 'r')
        for n, line in enumerate(f):
            if n > 0:
                sp = line[:-1].split('\t')
                if float(sp[7]) < threshold and float(sp[1]) > min_cells:
                    nam.append(sp[0])
        f.close()
        mat2 = self.JSD_matrix(nam)
        return [map(lambda xx: nam[xx], m)
                for m in find_clusters(hierarchical_clustering(mat2, labels=nam,
                                                               cluster_distance=True, thres=clus_thres)).values()] 
Example #7
Source File: main.py    From scTDA with GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, table, lens='mds', metric='correlation', precomputed=False, **kwargs):
        """
        Initializes the class by providing the mapper input table generated by Preprocess.save(). The parameter 'metric'
        specifies the metric distance to be used ('correlation', 'euclidean' or 'neighbor'). The parameter 'lens'
        specifies the dimensional reduction algorithm to be used ('mds' or 'pca'). The rest of the arguments are
        passed directly to sklearn.manifold.MDS or sklearn.decomposition.PCA. It plots the low-dimensional projection
        of the data.
        """
        self.df = pandas.read_table(table + '.mapper.tsv')
        if lens == 'neighbor':
            self.lens_data_mds = sakmapper.apply_lens(self.df, lens=lens, **kwargs)
        elif lens == 'mds':
            if precomputed:
                self.lens_data_mds = sakmapper.apply_lens(self.df, lens=lens, metric=metric,
                                                          dissimilarity='precomputed', **kwargs)
            else:
                self.lens_data_mds = sakmapper.apply_lens(self.df, lens=lens, metric=metric, **kwargs)
        else:
            self.lens_data_mds = sakmapper.apply_lens(self.df, lens=lens, **kwargs)
        pylab.figure()
        pylab.scatter(numpy.array(self.lens_data_mds)[:, 0], numpy.array(self.lens_data_mds)[:, 1], s=10, alpha=0.7)
        pylab.show() 
Example #8
Source File: rbf.py    From pwtools with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def rbf_multi(rsq, p):
    r"""Multiquadric RBF :math:`\sqrt{r^2 + p^2}`

    Parameters
    ----------
    rsq : float
        squared distance :math:`r^2`
    p : float
        width
    """
    return np.sqrt(rsq + p**2.0) 
Example #9
Source File: TableRecognition.py    From OTR with GNU General Public License v3.0 5 votes vote down vote up
def find_corner_clusters(self, distance_threshold=20.):
        # Find all bounding box corners
        corners = []
        for i in range(len(self.contours)):
            if self.contours[i] is None: continue
            bbox = self.contours_bbox[i]
            for coord in bbox:
                corners.append((coord[0], coord[1]))

        # Simpler algorithm, still superfast (<40 ms for 2k corners): Compute all distances using cdist
        corners = np.asarray(corners)
        distmat = scipy.spatial.distance.cdist(corners, corners, 'euclidean')
        ignore = np.zeros(corners.shape[0], np.bool) # Set to true if we found a cluster for this node already

        # Find cluster in the distance matrix, i.e. node groups which are close together
        cluster_coords = []  # For each cluster, a (x,y coordinate pair)
        cluster_num_nodes = []  # For each cluster, the number of nodes it consists of
        cluster_coords_to_node_id = {}  # (x,y) tuple => cluster ID
        for i in range(corners.shape[0]):
            if ignore[i]: continue
            # Which nodes are close to this node, including itself
            below_thresh = distmat[i, :] < distance_threshold # Rather set this large, we can correct non-convexity later
            allnodes = np.nonzero(below_thresh)[0] # index list
            # Get a new ID
            clusterid = len(cluster_coords)
            allcorners = corners[allnodes]
            cluster_coords.append(tuple(cv_algorithms.meanCenter(allcorners)))
            cluster_num_nodes.append(allnodes.size)
            # Also create a map from each position to the current cluster ID
            # This works only because these coordinates are discrete integer pixel indices
            for coord in allcorners:
                cluster_coords_to_node_id[tuple(coord)] = clusterid
            # Ignore all nodes in the cluster (i.e. don't assign them to a new cluster)
            ignore[allnodes] = True
        # Now that the size is known, we can convert to numpy arrays
        self.cluster_coords = np.asarray(cluster_coords)
        self.cluster_num_nodes = np.asarray(cluster_num_nodes)
        self.cluster_coords_to_node_id = cluster_coords_to_node_id 
Example #10
Source File: util.py    From multisensory with Apache License 2.0 5 votes vote down vote up
def center_point(pts):
  # """ Choose the point closest to the center of the point set, as
  # measured by the median distance to every other point. O(n^2)."""
  best_err = np.inf
  best_pt = pts[0]
  for p1 in pts:
    err = np.median([pylab.dist(p1, p2) for p2 in pts])
    #err = np.sum([np.sum((p1 - p2)**2)**0.5 for p2 in pts])
    if err < best_err:
      best_err = err
      best_pt = p1
  return best_pt 
Example #11
Source File: clusterer.py    From yelp with GNU Lesser General Public License v2.1 5 votes vote down vote up
def k_means_scikit(matrix):
        k_means = skcluster.KMeans(n_clusters=50, init='k-means++',
                                   n_init=1, verbose=1)
        k_means.fit(matrix)

        return k_means.labels_

    # OK
    # With cosine distance the algorithm doesn't converge 
Example #12
Source File: LeniaND.py    From Lenia with MIT License 5 votes vote down vote up
def draw_recurrence(self, e=0.1, steps=10):
        ''' https://stackoverflow.com/questions/33650371/recurrence-plot-in-python '''
        if self.analyzer.series == [] or len(self.analyzer.series[-1]) < 2:
            return

        size = min(SIZEX*PIXEL, SIZEY*PIXEL)
        segment = np.asarray(self.analyzer.series[-1])[-size:, self.analyzer.RECURRENCE_RANGE]
        vmin, vmax = segment.min(axis=0), segment.max(axis=0)
        # vmean = (vmax + vmin) / 2
        # d = vmax - vmin < 0.01
        # vmin[d], vmax[d] = vmean[d] - 0.01, vmean[d] + 0.01
        segment = self.normalize(segment, vmin, vmax)
        D = scipy.spatial.distance.squareform(np.log(scipy.spatial.distance.pdist(segment))) + np.eye(segment.shape[0]) * -100
        buffer = np.uint8(np.clip(-D/2, 0, 1) * 253)
        self.img = PIL.Image.frombuffer('L', buffer.shape, buffer, 'raw', 'L', 0, 1) 
Example #13
Source File: symmetry.py    From molecular-design-toolkit with Apache License 2.0 5 votes vote down vote up
def get_symmetrized_coords(self, elem):
        """
        Symmetrize the molecule based on the symmetry operation
        This will work as long as the symmetry operation brings each atom closest to a symmetry relation.
        """
        import scipy.spatial.distance

        # First, apply the transformation
        oriented_coords = self.orientation
        transformed_coords = self.orientation.T.ldot(elem.matrix).T

        # Next, calculate the correspondence between the untransformed and transformed atoms
        align_to_transform = {}  # map between the original positions and their transformed positions
        transform_to_align = {}  # inverse
        byelement = mdt.utils.Categorizer(lambda x: x.element, self.mol.atoms)
        for elemname, atoms in byelement.items():
            indices = np.array([atom.index for atom in atoms])
            atoms_aligned = oriented_coords[indices].defunits_value()
            atoms_transformed = transformed_coords[indices].defunits_value()
            distances = scipy.spatial.distance.cdist(atoms_aligned, atoms_transformed)
            for a_idx, t_idx in enumerate(distances.argmin(axis=0)):
                align_to_transform[indices[a_idx]] = indices[t_idx]
            for t_idx, a_idx in enumerate(distances.argmin(axis=1)):
                transform_to_align[indices[t_idx]] = indices[a_idx]

        # Make the positions exactly symmetric by averaging them
        pos = np.zeros(transformed_coords.shape) * u.default.length
        for align_atom, transform_atom in align_to_transform.items():
            assert transform_to_align[transform_atom] == align_atom, \
                'Molecule is too far from this symmetry to symmetrize'
            pos[transform_atom] = (oriented_coords[transform_atom] +
                                   transformed_coords[align_atom]) / 2.0
        return pos 
Example #14
Source File: main.py    From scTDA with GNU General Public License v3.0 5 votes vote down vote up
def plot_rootlane_correlation(self):
        """
        Displays correlation between sampling time points and graph distance to root node. It returns the two
        parameters of the linear fit, Pearson's r, p-value and standard error.
        """
        pylab.scatter(self.pel, self.dr, s=9.0, alpha=0.7, c='r')
        pylab.xlim(min(self.pel), max(self.pel))
        pylab.ylim(0, max(self.dr)+1)
        pylab.xlabel(self.rootlane)
        pylab.ylabel('Distance to root node')
        xk = pylab.linspace(min(self.pel), max(self.pel), 50)
        pylab.plot(xk, self.po[1]+self.po[0]*xk, 'k--', linewidth=2.0)
        pylab.show()
        return self.po 
Example #15
Source File: main.py    From scTDA with GNU General Public License v3.0 5 votes vote down vote up
def cor_matrix(self, lista, c=1):
        """
        Returns correlation distance matrix of the list of genes specified by 'lista'. It uses 'c' cores for the
        computation.
        """
        geys = numpy.array([self.dicgenes[mju] for mju in lista])
        return sklearn.metrics.pairwise.pairwise_distances(geys, metric='correlation', n_jobs=c) 
Example #16
Source File: similarity.py    From lexpredict-contraxsuite with GNU Affero General Public License v3.0 5 votes vote down vote up
def _get_similarity(self):
        """
        Central method to calculate and store similarity -
        Non optimized memory usage - may fail on large term frequency matrix
        """
        # get Feature object, see real signature in features.py module
        features = self.get_features()

        # Get distance matrix
        distance_matrix = self.get_similarity_matrix(features.term_frequency_matrix)

        counter = 0
        # Create records based on threshold
        for i in range(distance_matrix.shape[0]):
            sim_obj_list = []
            for j in range(i):
                if distance_matrix[i, j] <= (1.0 - self.threshold):
                    sim_obj = self.similarity_model()
                    setattr(sim_obj, self.similarity_model_field_a, features.item_index[i])
                    setattr(sim_obj, self.similarity_model_field_b, features.item_index[j])
                    sim_obj.similarity = 100 * (1.0 - distance_matrix[i, j])
                    sim_obj.similarity_type = self.distance_type
                    sim_obj_list.append(sim_obj)

            # Bulk create
            self.similarity_model.objects.bulk_create(sim_obj_list)
            counter += len(sim_obj_list)
        return counter 
Example #17
Source File: kps.py    From imgaug with MIT License 5 votes vote down vote up
def coords_almost_equals(self, other, max_distance=1e-4):
        """Estimate if this and another KP have almost identical coordinates.

        Added in 0.4.0.

        Parameters
        ----------
        other : imgaug.augmentables.kps.Keypoint or iterable
            The other keypoint with which to compare this one.
            If this is an ``iterable``, it is assumed to contain the
            xy-coordinates of a keypoint.

        max_distance : number, optional
            The maximum euclidean distance between a this keypoint and the
            other one. If the distance is exceeded, the two keypoints are not
            viewed as equal.

        Returns
        -------
        bool
            Whether the two keypoints have almost identical coordinates.

        """
        if ia.is_np_array(other):
            # we use flat here in case other is (N,2) instead of (4,)
            coords_b = other.flat
        elif ia.is_iterable(other):
            coords_b = list(ia.flatten(other))
        else:
            assert isinstance(other, Keypoint), (
                "Expected 'other' to be an iterable containing one "
                "(x,y)-coordinate pair or a Keypoint. "
                "Got type %s." % (type(other),))
            coords_b = other.coords.flat

        coords_a = self.coords

        return np.allclose(coords_a.flat, coords_b, atol=max_distance, rtol=0) 
Example #18
Source File: polys.py    From imgaug with MIT License 5 votes vote down vote up
def find_closest_point_index(self, x, y, return_distance=False):
        """Find the index of the exterior point closest to given coordinates.

        "Closeness" is here defined based on euclidean distance.
        This method will raise an ``AssertionError`` if the exterior contains
        no points.

        Parameters
        ----------
        x : number
            X-coordinate around which to search for close points.

        y : number
            Y-coordinate around which to search for close points.

        return_distance : bool, optional
            Whether to also return the distance of the closest point.

        Returns
        -------
        int
            Index of the closest point.

        number
            Euclidean distance to the closest point.
            This value is only returned if `return_distance` was set
            to ``True``.

        """
        assert len(self.exterior) > 0, (
            "Cannot find the closest point on a polygon which's exterior "
            "contains no points.")
        distances = []
        for x2, y2 in self.exterior:
            dist = (x2 - x) ** 2 + (y2 - y) ** 2
            distances.append(dist)
        distances = np.sqrt(distances)
        closest_idx = np.argmin(distances)
        if return_distance:
            return closest_idx, distances[closest_idx]
        return closest_idx 
Example #19
Source File: compute_class_embedding.py    From semantic-embeddings with MIT License 5 votes vote down vote up
def mds(class_dist, num_dim = None):
    """
    Finds an embedding of `n` classes in a `d`-dimensional space, so that their Euclidean distances corresponds
    to pre-defined ones, using classical multidimensional scaling (MDS).
    
    class_dist - `n-by-n` matrix specifying the desired distance between each pair of classes.
                 The distances in this matrix *must* define a proper metric that fulfills the triangle inequality.
                 Otherwise, a `RuntimeError` will be raised.
    num_dim - Optionally, the maximum target dimensionality `d` for the embeddings. If not given, it will be determined
              automatically based on the eigenvalues, but this might not be accurate due to limited machine precision.
    
    Returns: `n-by-d` matrix with rows being the locations of the corresponding classes in the embedding space.
    """

    H = np.eye(class_dist.shape[0], dtype=class_dist.dtype) - np.ones(class_dist.shape, dtype=class_dist.dtype) / class_dist.shape[0]
    B = np.dot(H, np.dot(class_dist ** 2, H)) / -2

    eigval, eigvec = np.linalg.eigh(B)
    nonzero_eigvals = (eigval > np.finfo(class_dist.dtype).eps)
    eigval = eigval[nonzero_eigvals]
    eigvec = eigvec[:,nonzero_eigvals]
    
    if num_dim is not None:
        sort_ind = np.argsort(eigval)[::-1]
        eigval = eigval[sort_ind[:num_dim]]
        eigvec = eigvec[:,sort_ind[:num_dim]]

    embedding = eigvec * np.sqrt(eigval[None,:])
    return embedding 
Example #20
Source File: rbf.py    From pwtools with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def interpolate(self, points):
        """Actually do interpolation. Return interpolated values at each
        point in points.

        Parameters
        ----------
        points : see :meth:`__call__`

        Returns
        -------
        vals : 1d array (points.shape[0],)

        Notes
        -----
        Calculates
            distsq = (points - centers)**2  # squared distance matrix
            G = rbf(distsq)                 # RBF values
            zi = dot(G, w)                  # interpolation z_i = Sum_j g_ij w_j
        """
        self._assert_ndim_points(points)
        distsq = self.get_distsq(points=points)
        G = self.rbf(distsq, self.p)
        assert G.shape[1] == len(self.w), \
               "shape mismatch between g_ij: %s and w_j: %s, 2nd dim of "\
               "g_ij must match length of w_j" %(str(G.shape),
                                                 str(self.w.shape))
        # normalize w
        ww = self.w
        maxw = np.abs(ww).max()*1.0
        return np.dot(G, ww / maxw) * maxw 
Example #21
Source File: rbf.py    From pwtools with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_distsq(self, points=None):
        """Matrix of distance values :math:`R_{ij} = |\mathbf x_i - \mathbf
        c_j|`.

            | :math:`\mathbf x_i` : points[i,:]
            | :math:`\mathbf c_i` : centers[i,:]

        Parameters
        ----------
        points : array (M,N) with N-dim points, optional
            If None then ``self.points`` is used (training points).

        Returns
        -------
        distsq : (M,K), where K = M usually for training
        """
        # pure numpy:
        #     dist = points[:,None,...] - centers[None,...]
        #     distsq = (dist**2.0).sum(axis=-1)
        # where
        #     points:  (M,N)
        #     centers: (K,N)
        #     dist:    (M,K,N) "matrix" of distance vectors (only for numpy case)
        #     distsq:  (M,K)    matrix of squared distance values
        # Creates *big* temporary arrays if points is big (~1e4 points).
        #
        # training:
        #     If points == centers, we could also use pdist(points), which
        #     would give us a 1d array of all distances. But we need the
        #     redundant square matrix form for G=rbf(distsq) anyway, so there
        #     is no real point in special-casing that. These two are the same:
        #      >>> R = spatial.squareform(spatial.distances.pdist(points))
        #      >>> R = spatial.distances.cdist(points,points)
        #      >>> distsq = R**2
        if points is None:
            if self.distsq is None:
                return num.distsq(self.points, self.centers)
            else:
                return self.distsq
        else:
            return num.distsq(points, self.centers) 
Example #22
Source File: rbf.py    From pwtools with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def rbf_inv_multi(rsq, p):
    r"""Inverse Multiquadric RBF :math:`\frac{1}{\sqrt{r^2 + p^2}}`

    Parameters
    ----------
    rsq : float
        squared distance :math:`r^2`
    p : float
        width
    """
    return 1/rbf_multi(rsq, p) 
Example #23
Source File: rbf.py    From pwtools with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def rbf_gauss(rsq, p):
    r"""Gaussian RBF :math:`\exp\left(-\frac{r^2}{2\,p^2}\right)`

    Parameters
    ----------
    rsq : float
        squared distance :math:`r^2`
    p : float
        width
    """
    return np.exp(-0.5*rsq / p**2.0) 
Example #24
Source File: main.py    From scTDA with GNU General Public License v3.0 4 votes vote down vote up
def JSD_matrix(self, lista, maximum_matrix_entries=5*10**7, verbose=False):
        """
        Returns the Jensen-Shannon distance matrix of the list of genes specified by 'lista'. If 'verbose' is set to
        True, it prints progress on the screen. 'maximum_matrix_entries' limits memory usage. If the largest matrix constructed
        as part of the calculation exceeds 'maximum_matrix_entries', the job is divided into multiple jobs, performed in series.
        The largest matrix has dimension (# nodes)*(# genes tested)^2.
        """
        ge = numpy.array([self.get_gene(genis)[0].values() for genis in lista])
        ge2 = numpy.copy(ge)
        ge2[ge2 == 0] = 1
        plogp = numpy.sum(ge2*numpy.log2(ge2), axis=1)
        plogptile = numpy.tile(plogp, (len(lista), 1))
        if len(ge)*len(ge[0])**2 <= maximum_matrix_entries:
            if verbose:
                print 'within limits: %s' % (len(ge)*len(ge[0])**2)
            ge_tile = numpy.tile(ge,(len(lista), 1, 1))
            ge_tile_T = numpy.transpose(ge_tile, [1, 0, 2])
            pq = 0.5*(ge_tile + ge_tile_T)
            pq[pq == 0] = 1
            pqlogpq = numpy.sum(pq*numpy.log2(pq), axis=2)
        else:
            group_number = len(ge)*len(ge[0])**2 / maximum_matrix_entries + 1
            group_length = len(ge) / group_number
            dd = range(0, len(ge), group_length)
            if dd[-1] != len(ge):
                dd += [len(ge)]
            if verbose:
                print 'outside limits: %s' % (len(ge)*len(ge[0])**2)
                print 'group_number = %s' % group_number
                print 'group_length = %s' % group_length
                print 'dd = %s'%dd
            for i in range(len(dd)-1):
                if verbose:
                    print 'i = %s' % i
                ge_tile = numpy.tile(ge, (dd[i+1] - dd[i], 1, 1))
                ge_tile_T = numpy.transpose(numpy.tile(ge[range(dd[i], dd[i+1])], (len(ge), 1, 1)), [1, 0, 2])
                pq = 0.5*(ge_tile + ge_tile_T)
                pq[pq == 0] = 1
                sliver = numpy.sum(pq*numpy.log2(pq), axis=2)
                if i == 0:
                    pqlogpq = sliver
                else:
                    pqlogpq = numpy.vstack((pqlogpq, sliver))
        jsdiv = 0.5 * (plogptile + plogptile.T - 2*pqlogpq)
        return numpy.sqrt(jsdiv) 
Example #25
Source File: main.py    From scTDA with GNU General Public License v3.0 4 votes vote down vote up
def hierarchical_clustering(mat, method='average', cluster_distance=True, labels=None, thres=0.65):
    """
    Performs hierarchical clustering based on distance matrix 'mat' using the method specified by 'method'.
    Optional argument 'labels' may specify a list of labels. If cluster_distance is True, the clustering is
    performed on the distance matrix using euclidean distance. Otherwise, mat specifies the distance matrix for
    clustering. Adapted from
    http://stackoverflow.com/questions/7664826/how-to-get-flat-clustering-corresponding-to-color-clusters-in-the-dendrogram-cre
    Not subjected to copyright.
    """
    D = numpy.array(mat)
    if not cluster_distance:
        Dtriangle = scipy.spatial.distance.squareform(D)
    else:
        Dtriangle = scipy.spatial.distance.pdist(D, metric='euclidean')
    fig = pylab.figure(figsize=(8, 8))
    ax1 = fig.add_axes([0.09, 0.1, 0.2, 0.6])
    Y = sch.linkage(Dtriangle, method=method)
    Z1 = sch.dendrogram(Y, orientation='right', color_threshold=thres*max(Y[:, 2]))
    ax1.set_xticks([])
    ax1.set_yticks([])
    ax2 = fig.add_axes([0.3, 0.71, 0.6, 0.2])
    Y = sch.linkage(Dtriangle, method=method)
    Z2 = sch.dendrogram(Y, color_threshold=thres*max(Y[:, 2]))
    ax2.set_xticks([])
    ax2.set_yticks([])
    axmatrix = fig.add_axes([0.3, 0.1, 0.6, 0.6])
    idx1 = Z1['leaves']
    idx2 = Z2['leaves']
    D = D[idx1, :]
    D = D[:, idx2]
    im = axmatrix.matshow(D, aspect='auto', origin='lower', cmap=pylab.get_cmap('jet_r'))
    if labels is None:
        axmatrix.set_xticks([])
        axmatrix.set_yticks([])
    else:
        axmatrix.set_xticks(range(len(labels)))
        lab = [labels[idx1[m]] for m in range(len(labels))]
        axmatrix.set_xticklabels(lab)
        axmatrix.set_yticks(range(len(labels)))
        axmatrix.set_yticklabels(lab)
        for tick in pylab.gca().xaxis.iter_ticks():
            tick[0].label2On = False
            tick[0].label1On = True
            tick[0].label1.set_rotation('vertical')
        for tick in pylab.gca().yaxis.iter_ticks():
            tick[0].label2On = True
            tick[0].label1On = False
    axcolor = fig.add_axes([0.91, 0.1, 0.02, 0.6])
    pylab.colorbar(im, cax=axcolor)
    pylab.show()
    return Z1 
Example #26
Source File: kps.py    From imgaug with MIT License 4 votes vote down vote up
def compute_geometric_median(points=None, eps=1e-5, X=None):
    """Estimate the geometric median of points in 2D.

    Code from https://stackoverflow.com/a/30305181

    Parameters
    ----------
    points : (N,2) ndarray
        Points in 2D. Second axis must be given in xy-form.

    eps : float, optional
        Distance threshold when to return the median.

    X : None or (N,2) ndarray, optional
        Deprecated.

    Returns
    -------
    (2,) ndarray
        Geometric median as xy-coordinate.

    """
    # pylint: disable=invalid-name
    if X is not None:
        assert points is None
        ia.warn_deprecated("Using 'X' is deprecated, use 'points' instead.")
        points = X

    y = np.mean(points, 0)

    while True:
        dist = scipy.spatial.distance.cdist(points, [y])
        nonzeros = (dist != 0)[:, 0]

        dist_inv = 1 / dist[nonzeros]
        dist_inv_sum = np.sum(dist_inv)
        dist_inv_norm = dist_inv / dist_inv_sum
        T = np.sum(dist_inv_norm * points[nonzeros], 0)

        num_zeros = len(points) - np.sum(nonzeros)
        if num_zeros == 0:
            y1 = T
        elif num_zeros == len(points):
            return y
        else:
            R = (T - y) * dist_inv_sum
            r = np.linalg.norm(R)
            rinv = 0 if r == 0 else num_zeros/r
            y1 = max(0, 1-rinv)*T + min(1, rinv)*y

        if scipy.spatial.distance.euclidean(y, y1) < eps:
            return y1

        y = y1 
Example #27
Source File: polys.py    From imgaug with MIT License 4 votes vote down vote up
def exterior_almost_equals(self, other, max_distance=1e-4,
                               points_per_edge=8):
        """Estimate if this and another polygon's exterior are almost identical.

        The two exteriors can have different numbers of points, but any point
        randomly sampled on the exterior of one polygon should be close to the
        closest point on the exterior of the other polygon.

        .. note::

            This method works in an approximative way. One can come up with
            polygons with fairly different shapes that will still be estimated
            as equal by this method. In practice however this should be
            unlikely to be the case. The probability for something like that
            goes down as the interpolation parameter is increased.

        Parameters
        ----------
        other : imgaug.augmentables.polys.Polygon or (N,2) ndarray or list of tuple
            The other polygon with which to compare the exterior.
            If this is an ``ndarray``, it is assumed to represent an exterior.
            It must then have dtype ``float32`` and shape ``(N,2)`` with the
            second dimension denoting xy-coordinates.
            If this is a ``list`` of ``tuple`` s, it is assumed to represent
            an exterior. Each tuple then must contain exactly two ``number`` s,
            denoting xy-coordinates.

        max_distance : number, optional
            The maximum euclidean distance between a point on one polygon and
            the closest point on the other polygon. If the distance is exceeded
            for any such pair, the two exteriors are not viewed as equal. The
            points are either the points contained in the polygon's exterior
            ndarray or interpolated points between these.

        points_per_edge : int, optional
            How many points to interpolate on each edge.

        Returns
        -------
        bool
            Whether the two polygon's exteriors can be viewed as equal
            (approximate test).

        """
        if isinstance(other, list):
            other = Polygon(np.float32(other))
        elif ia.is_np_array(other):
            other = Polygon(other)
        else:
            assert isinstance(other, Polygon), (
                "Expected 'other' to be a list of coordinates, a coordinate "
                "array or a single Polygon. Got type %s." % (type(other),))

        return self.to_line_string(closed=True).coords_almost_equals(
            other.to_line_string(closed=True),
            max_distance=max_distance,
            points_per_edge=points_per_edge
        ) 
Example #28
Source File: polys.py    From imgaug with MIT License 4 votes vote down vote up
def change_first_point_by_coords(self, x, y, max_distance=1e-4,
                                     raise_if_too_far_away=True):
        """
        Reorder exterior points so that the point closest to given x/y is first.

        This method takes a given ``(x,y)`` coordinate, finds the closest
        corner point on the exterior and reorders all exterior corner points
        so that the found point becomes the first one in the array.

        If no matching points are found, an exception is raised.

        Parameters
        ----------
        x : number
            X-coordinate of the point.

        y : number
            Y-coordinate of the point.

        max_distance : None or number, optional
            Maximum distance past which possible matches are ignored.
            If ``None`` the distance limit is deactivated.

        raise_if_too_far_away : bool, optional
            Whether to raise an exception if the closest found point is too
            far away (``True``) or simply return an unchanged copy if this
            object (``False``).

        Returns
        -------
        imgaug.augmentables.polys.Polygon
            Copy of this polygon with the new point order.

        """
        if len(self.exterior) == 0:
            raise Exception("Cannot reorder polygon points, because it "
                            "contains no points.")

        closest_idx, closest_dist = self.find_closest_point_index(
            x=x, y=y, return_distance=True)
        if max_distance is not None and closest_dist > max_distance:
            if not raise_if_too_far_away:
                return self.deepcopy()

            closest_point = self.exterior[closest_idx, :]
            raise Exception(
                "Closest found point (%.9f, %.9f) exceeds max_distance of "
                "%.9f exceeded" % (
                    closest_point[0], closest_point[1], closest_dist))
        return self.change_first_point_by_index(closest_idx) 
Example #29
Source File: seistomopy_gui.py    From SeisTomoPy_V3 with GNU General Public License v3.0 4 votes vote down vote up
def _plot_path(self):
        PC=np.loadtxt(DIR + '/hotspots.xy')

        rng = float(self.ui.width_cross.value())/2. # distance in degrees
        r = self.center_pt
        lon, lat = r.longitude, r.latitude
        az = float(self.ui.azimuth.value())

        try:
            if self.__midpt_map_obj.longitude == lon and \
                    self.__midpt_map_obj.latitude == lat and\
                    self.__azimuth_map_obj == az and\
                    self.__width_map_obj == rng*2.:
                return
        except AttributeError:
            pass

        try:
            self.__midpt_map_obj.remove()
            self.__startpoint_map_obj.remove()
            self.__endpoint_map_obj.remove()
            self.__great_circle_obj.remove()            
            self.__great_circle_obj1.remove()
            self.__PC_map_obj4.remove()
            self.__PC_map_obj5.remove()
            self.__great_circle_obj_path.remove()
            self.__great_circle_obj_path1.remove()
        except AttributeError:
            pass

        x1, y1 = self.map(lon, lat)
        self.__midpt_map_obj = self.map.scatter(x1, y1, s=40, zorder=10,
                                                   color="yellow", marker="o",
                                                   edgecolor="k")
        self.__midpt_map_obj.longitude = lon
        self.__midpt_map_obj.latitude = lat
        self.__azimuth_map_obj = az
        self.__width_map_obj = rng*2.

        #  plot great circle
        end_lat,end_lon,end_lat2,end_lon2 = SeisTomoPy.path_tracer(lon,lat,rng,az)
        x2, y2 = self.map(end_lon,end_lat)
        x3, y3 = self.map(end_lon2,end_lat2)
        self.__great_circle_obj, = self.map.drawgreatcircle(lon,lat,end_lon,end_lat,ls='None',marker='o',markersize=2,color='k')
        self.__great_circle_obj1, = self.map.drawgreatcircle(lon,lat,end_lon2,end_lat2,ls='None',marker='o',markersize=2,color='k')

        # plot starting and end points
        self.__endpoint_map_obj = self.map.scatter(x2, y2, s=40, zorder=10,
                                                   color="red", marker="o",
                                                   edgecolor="k")
        self.__startpoint_map_obj = self.map.scatter(x3, y3, s=40, zorder=10,
                                                   color="lime", marker="o",
                                                   edgecolor="k")
        self.fig2_cross.canvas.draw()

        self.ui.label_4.setText(str(int(end_lat2)))
        self.ui.label_9.setText(str(int(end_lon2))) 
Example #30
Source File: seistomopy_gui.py    From SeisTomoPy_V3 with GNU General Public License v3.0 4 votes vote down vote up
def update_cross(self, force=False):

        rng = float(self.ui.width_cross.value())/2. # distance in degrees
        r = self.center_pt
        lon, lat = r.longitude, r.latitude
        az = float(self.ui.azimuth.value())

        try:
            end_latf,end_lonf,end_lati,end_loni = SeisTomoPy.path_tracer(lon,lat,rng,az)
            self._plot_path()
            self.ui.label_4.setText(str(int(end_lati)))
            self.ui.label_9.setText(str(int(end_loni)))
            tomo_choice_cross = int(self.ui.model_cross.currentIndex())
            para_cross = int(self.ui.parameter_cross.currentIndex())
            if tomo_choice_cross == 0:
                model_cross = 'SEISGLOB2'
            elif tomo_choice_cross == 1:
                model_cross = 'S40RTS'
            elif tomo_choice_cross == 2:
                model_cross = 'SEMUCB-WM1'
            elif tomo_choice_cross == 3:
                model_cross = 'S362WMANI+M'
            elif tomo_choice_cross == 4:
                model_cross = 'SEISGLOB1'
            elif tomo_choice_cross == 5:
                model_cross = 'SP12RTS'
            elif tomo_choice_cross == 6:
                model_cross = 'SGLOBE-rani'
            elif tomo_choice_cross == 7:
                model_cross = '3D2016-09S'
            elif tomo_choice_cross == 8:
                model_cross = 'YOUR MODEL'


            if para_cross == 0:
                para = 'Vs'
            elif para_cross == 1:
                para = 'Vp'
            elif para_cross == 2:
                para = 'Density'

            self.ui.label_16.setText('Model: ' + str(model_cross) + ' and Parameter: ' + str(para))

        except AttributeError:
            return