Python scipy.spatial.distance.pdist() Examples

The following are 30 code examples of scipy.spatial.distance.pdist(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.spatial.distance , or try the search function .
Example #1
Source File: pdist.py    From mars with Apache License 2.0 6 votes vote down vote up
def _execute_single(cls, ctx, op):
        from scipy.spatial.distance import pdist

        inputs, device_id, xp = as_same_device(
            [ctx[inp.key] for inp in op.inputs], device=op.device, ret_extra=True)

        if xp is cp:  # pragma: no cover
            raise NotImplementedError('`pdist` does not support running on GPU yet')

        with device(device_id):
            inputs_iter = iter(inputs)
            x = next(inputs_iter)
            kw = dict()
            if op.p is not None:
                kw['p'] = op.p
            if op.w is not None:
                kw['w'] = next(inputs_iter)
            if op.v is not None:
                kw['V'] = next(inputs_iter)
            if op.vi is not None:
                kw['VI'] = next(inputs_iter)

        ctx[op.outputs[0].key] = pdist(x, metric=op.metric, **kw) 
Example #2
Source File: hierarchy.py    From Computable with MIT License 6 votes vote down vote up
def average(y):
    """
    Performs average/UPGMA linkage on a condensed distance matrix

    Parameters
    ----------
    y : ndarray
        The upper triangular of the distance matrix. The result of
        ``pdist`` is returned in this form.

    Returns
    -------
    Z : ndarray
        A linkage matrix containing the hierarchical clustering. See
        the ``linkage`` function documentation for more information
        on its structure.

    See Also
    --------
    linkage: for advanced creation of hierarchical clusterings.

    """
    return linkage(y, method='average', metric='euclidean') 
Example #3
Source File: dataset.py    From neural-combinatorial-optimization-rl-tensorflow with MIT License 6 votes vote down vote up
def k_nearest_neighbor(self, sequence):
        # Calculate dist_matrix
        dist_array = pdist(sequence)
        dist_matrix = squareform(dist_array)
        # Construct tour
        new_sequence = [sequence[0]]
        current_city = 0
        visited_cities = [0]
        for i in range(1,len(sequence)):
            j = np.random.randint(0,min(len(sequence)-i,self.kNN))
            next_city = [index for index in dist_matrix[current_city].argsort() if index not in visited_cities][j]
            visited_cities.append(next_city)
            new_sequence.append(sequence[next_city])
            current_city = next_city
        return np.asarray(new_sequence)


    # Generate random TSP-TW instance 
Example #4
Source File: test_pairwise.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_euclidean_distances_sym(dtype, x_array_constr):
    # check that euclidean distances gives same result as scipy pdist
    # when only X is provided
    rng = np.random.RandomState(0)
    X = rng.random_sample((100, 10)).astype(dtype, copy=False)
    X[X < 0.8] = 0

    expected = squareform(pdist(X))

    X = x_array_constr(X)
    distances = euclidean_distances(X)

    # the default rtol=1e-7 is too close to the float32 precision
    # and fails due too rounding errors.
    assert_allclose(distances, expected, rtol=1e-6)
    assert distances.dtype == dtype 
Example #5
Source File: hierarchy.py    From Computable with MIT License 6 votes vote down vote up
def complete(y):
    """
    Performs complete/max/farthest point linkage on a condensed distance matrix

    Parameters
    ----------
    y : ndarray
        The upper triangular of the distance matrix. The result of
        ``pdist`` is returned in this form.

    Returns
    -------
    Z : ndarray
        A linkage matrix containing the hierarchical clustering. See
        the ``linkage`` function documentation for more information
        on its structure.

    See Also
    --------
    linkage

    """
    return linkage(y, method='complete', metric='euclidean') 
Example #6
Source File: coords2sort_order.py    From pyscf with Apache License 2.0 6 votes vote down vote up
def coords2sort_order(a2c):
  """ Delivers a list of atom indices which generates a near-diagonal overlap for a given set of atom coordinates """
  na  = a2c.shape[0]
  aa2d = squareform(pdist(a2c))
  mxd = np.amax(aa2d)+1.0
  a = 0
  lsa = []
  for ia in range(na):
    lsa.append(a)
    asrt = np.argsort(aa2d[a])
    for ja in range(1,na):
      b = asrt[ja]
      if b not in lsa: break
    aa2d[a,b] = aa2d[b,a] = mxd
    a = b
  return np.array(lsa) 
Example #7
Source File: post_proc.py    From HorizonNet with MIT License 6 votes vote down vote up
def vote(vec, tol):
    vec = np.sort(vec)
    n = np.arange(len(vec))[::-1]
    n = n[:, None] - n[None, :] + 1.0
    l = squareform(pdist(vec[:, None], 'minkowski', p=1) + 1e-9)

    invalid = (n < len(vec) * 0.4) | (l > tol)
    if (~invalid).sum() == 0 or len(vec) < tol:
        best_fit = np.median(vec)
        p_score = 0
    else:
        l[invalid] = 1e5
        n[invalid] = -1
        score = n
        max_idx = score.argmax()
        max_row = max_idx // len(vec)
        max_col = max_idx % len(vec)
        assert max_col > max_row
        best_fit = vec[max_row:max_col+1].mean()
        p_score = (max_col - max_row + 1) / len(vec)

    l1_score = np.abs(vec - best_fit).mean()

    return best_fit, p_score, l1_score 
Example #8
Source File: hierarchy.py    From lambda-packs with MIT License 6 votes vote down vote up
def single(y):
    """
    Perform single/min/nearest linkage on the condensed distance matrix ``y``.

    Parameters
    ----------
    y : ndarray
        The upper triangular of the distance matrix. The result of
        ``pdist`` is returned in this form.

    Returns
    -------
    Z : ndarray
        The linkage matrix.

    See Also
    --------
    linkage: for advanced creation of hierarchical clusterings.
    scipy.spatial.distance.pdist : pairwise distance metrics

    """
    return linkage(y, method='single', metric='euclidean') 
Example #9
Source File: hierarchy.py    From lambda-packs with MIT License 6 votes vote down vote up
def complete(y):
    """
    Perform complete/max/farthest point linkage on a condensed distance matrix.

    Parameters
    ----------
    y : ndarray
        The upper triangular of the distance matrix. The result of
        ``pdist`` is returned in this form.

    Returns
    -------
    Z : ndarray
        A linkage matrix containing the hierarchical clustering. See
        the `linkage` function documentation for more information
        on its structure.

    See Also
    --------
    linkage: for advanced creation of hierarchical clusterings.
    scipy.spatial.distance.pdist : pairwise distance metrics

    """
    return linkage(y, method='complete', metric='euclidean') 
Example #10
Source File: decisionboundaryplot.py    From highdimensional-decision-boundary-plot with MIT License 6 votes vote down vote up
def _get_sorted_db_keypoint_distances(self, N=None):
        """Use a minimum spanning tree heuristic to find the N largest gaps in the
        line constituted by the current decision boundary keypoints.
        """
        if N == None:
            N = self.n_interpolated_keypoints
        edges = minimum_spanning_tree(
            squareform(pdist(self.decision_boundary_points_2d))
        )
        edged = np.array(
            [
                euclidean(
                    self.decision_boundary_points_2d[u],
                    self.decision_boundary_points_2d[v],
                )
                for u, v in edges
            ]
        )
        gap_edge_idx = np.argsort(edged)[::-1][: int(N)]
        edges = edges[gap_edge_idx]
        gap_distances = np.square(edged[gap_edge_idx])
        gap_probability_scores = gap_distances / np.sum(gap_distances)
        return edges, gap_distances, gap_probability_scores 
Example #11
Source File: kernels.py    From BrainSpace with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _build_kernel(x, kernel, gamma=None):

    if kernel in {'pearson', 'spearman'}:
        if kernel == 'spearman':
            x = np.apply_along_axis(rankdata, 1, x)
        return np.corrcoef(x)

    if kernel in {'cosine', 'normalized_angle'}:
        x = 1 - squareform(pdist(x, metric='cosine'))
        if kernel == 'normalized_angle':
            x = 1 - np.arccos(x, x)/np.pi
        return x

    if kernel == 'gaussian':
        if gamma is None:
            gamma = 1 / x.shape[1]
        return rbf_kernel(x, gamma=gamma)

    if callable(kernel):
        return kernel(x)

    raise ValueError("Unknown kernel '{0}'.".format(kernel)) 
Example #12
Source File: hierarchy.py    From Computable with MIT License 6 votes vote down vote up
def single(y):
    """
    Performs single/min/nearest linkage on the condensed distance matrix ``y``

    Parameters
    ----------
    y : ndarray
        The upper triangular of the distance matrix. The result of
        ``pdist`` is returned in this form.

    Returns
    -------
    Z : ndarray
        The linkage matrix.

    See Also
    --------
    linkage: for advanced creation of hierarchical clusterings.

    """
    return linkage(y, method='single', metric='euclidean') 
Example #13
Source File: hierarchy.py    From lambda-packs with MIT License 6 votes vote down vote up
def average(y):
    """
    Perform average/UPGMA linkage on a condensed distance matrix.

    Parameters
    ----------
    y : ndarray
        The upper triangular of the distance matrix. The result of
        ``pdist`` is returned in this form.

    Returns
    -------
    Z : ndarray
        A linkage matrix containing the hierarchical clustering. See
        `linkage` for more information on its structure.

    See Also
    --------
    linkage: for advanced creation of hierarchical clusterings.
    scipy.spatial.distance.pdist : pairwise distance metrics

    """
    return linkage(y, method='average', metric='euclidean') 
Example #14
Source File: test_hierarchy.py    From Computable with MIT License 5 votes vote down vote up
def test_maxRstat_0_Q_linkage_complete(self):
        "Tests maxRstat(Z, R, 0) on the Q data set using complete linkage."
        X = eo['Q-X']
        Y = pdist(X)
        Z = linkage(X, 'complete')
        R = inconsistent(Z)
        MD = maxRstat(Z, R, 0)
        eps = 1e-15
        expectedMD = calculate_maximum_inconsistencies(Z, R, 0)
        self.assertTrue(within_tol(MD, expectedMD, eps)) 
Example #15
Source File: test_hierarchy.py    From Computable with MIT License 5 votes vote down vote up
def test_maxinconsts_Q_linkage_ward(self):
        "Tests maxinconsts(Z, R) on the Q data set using Ward linkage."
        X = eo['Q-X']
        Y = pdist(X)
        Z = linkage(X, 'ward')
        R = inconsistent(Z)
        MD = maxinconsts(Z, R)
        eps = 1e-15
        expectedMD = calculate_maximum_inconsistencies(Z, R)
        self.assertTrue(within_tol(MD, expectedMD, eps)) 
Example #16
Source File: test_hierarchy.py    From Computable with MIT License 5 votes vote down vote up
def test_maxRstat_0_Q_linkage_ward(self):
        "Tests maxRstat(Z, R, 0) on the Q data set using Ward linkage."
        X = eo['Q-X']
        Y = pdist(X)
        Z = linkage(X, 'ward')
        R = inconsistent(Z)
        MD = maxRstat(Z, R, 0)
        eps = 1e-15
        expectedMD = calculate_maximum_inconsistencies(Z, R, 0)
        self.assertTrue(within_tol(MD, expectedMD, eps)) 
Example #17
Source File: test_hierarchy.py    From Computable with MIT License 5 votes vote down vote up
def test_maxRstat_0_Q_linkage_centroid(self):
        "Tests maxRstat(Z, R, 0) on the Q data set using centroid linkage."
        X = eo['Q-X']
        Y = pdist(X)
        Z = linkage(X, 'centroid')
        R = inconsistent(Z)
        MD = maxRstat(Z, R, 0)
        eps = 1e-15
        expectedMD = calculate_maximum_inconsistencies(Z, R, 0)
        self.assertTrue(within_tol(MD, expectedMD, eps)) 
Example #18
Source File: test_hierarchy.py    From Computable with MIT License 5 votes vote down vote up
def test_maxRstat_3_Q_linkage_ward(self):
        "Tests maxRstat(Z, R, 3) on the Q data set using Ward linkage."
        X = eo['Q-X']
        Y = pdist(X)
        Z = linkage(X, 'ward')
        R = inconsistent(Z)
        MD = maxRstat(Z, R, 3)
        eps = 1e-15
        expectedMD = calculate_maximum_inconsistencies(Z, R, 3)
        self.assertTrue(within_tol(MD, expectedMD, eps)) 
Example #19
Source File: loci.py    From pyod with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def _calculate_decision_score(self, X):
        """Computes the outlier scores.
        
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The input data points.
            
        Returns
        -------
        outlier_scores : list
            Returns the list of outlier scores for input dataset.       
        """
        outlier_scores = [0] * X.shape[0]
        dist_matrix = squareform(pdist(X, metric="euclidean"))
        max_dist = dist_matrix.max()
        r_max = max_dist / self.alpha

        for p_ix in range(X.shape[0]):
            critical_values = _get_critical_values(dist_matrix, self.alpha,
                                                   p_ix, r_max)
            for r in critical_values:
                n_values = self._get_alpha_n(dist_matrix,
                                             _get_sampling_N(dist_matrix,
                                                             p_ix, r), r)
                cur_alpha_n = self._get_alpha_n(dist_matrix, p_ix, r)
                n_hat = np.mean(n_values)
                mdef = 1 - (cur_alpha_n / n_hat)
                sigma_mdef = np.std(n_values) / n_hat
                if n_hat >= 20:
                    outlier_scores[p_ix] = mdef / sigma_mdef
                    if mdef > (self.threshold_ * sigma_mdef):
                        break
        return outlier_scores 
Example #20
Source File: test_hierarchy.py    From Computable with MIT License 5 votes vote down vote up
def test_maxinconsts_Q_linkage_median(self):
        "Tests maxinconsts(Z, R) on the Q data set using median linkage."
        X = eo['Q-X']
        Y = pdist(X)
        Z = linkage(X, 'median')
        R = inconsistent(Z)
        MD = maxinconsts(Z, R)
        eps = 1e-15
        expectedMD = calculate_maximum_inconsistencies(Z, R)
        self.assertTrue(within_tol(MD, expectedMD, eps)) 
Example #21
Source File: test_hierarchy.py    From Computable with MIT License 5 votes vote down vote up
def test_maxinconsts_Q_linkage_centroid(self):
        "Tests maxinconsts(Z, R) on the Q data set using centroid linkage."
        X = eo['Q-X']
        Y = pdist(X)
        Z = linkage(X, 'centroid')
        R = inconsistent(Z)
        MD = maxinconsts(Z, R)
        eps = 1e-15
        expectedMD = calculate_maximum_inconsistencies(Z, R)
        self.assertTrue(within_tol(MD, expectedMD, eps)) 
Example #22
Source File: test_hierarchy.py    From Computable with MIT License 5 votes vote down vote up
def test_leaves_list_iris_average(self):
        "Tests leaves_list(Z) on the Iris data set using average linkage."
        X = eo['iris']
        Y = pdist(X)
        Z = linkage(X, 'average')
        node = to_tree(Z)
        self.assertTrue((node.pre_order() == leaves_list(Z)).all()) 
Example #23
Source File: test_hierarchy.py    From Computable with MIT License 5 votes vote down vote up
def test_maxinconsts_Q_linkage_complete(self):
        "Tests maxinconsts(Z, R) on the Q data set using complete linkage."
        X = eo['Q-X']
        Y = pdist(X)
        Z = linkage(X, 'complete')
        R = inconsistent(Z)
        MD = maxinconsts(Z, R)
        eps = 1e-15
        expectedMD = calculate_maximum_inconsistencies(Z, R)
        self.assertTrue(within_tol(MD, expectedMD, eps)) 
Example #24
Source File: test_hierarchy.py    From Computable with MIT License 5 votes vote down vote up
def test_maxinconsts_Q_linkage_single(self):
        "Tests maxinconsts(Z, R) on the Q data set using single linkage."
        X = eo['Q-X']
        Y = pdist(X)
        Z = linkage(X, 'single')
        R = inconsistent(Z)
        MD = maxinconsts(Z, R)
        eps = 1e-15
        expectedMD = calculate_maximum_inconsistencies(Z, R)
        self.assertTrue(within_tol(MD, expectedMD, eps)) 
Example #25
Source File: test_hierarchy.py    From Computable with MIT License 5 votes vote down vote up
def test_maxdists_Q_linkage_centroid(self):
        "Tests maxdists(Z) on the Q data set using centroid linkage."
        X = eo['Q-X']
        Y = pdist(X)
        Z = linkage(X, 'centroid')
        MD = maxdists(Z)
        eps = 1e-15
        expectedMD = calculate_maximum_distances(Z)
        self.assertTrue(within_tol(MD, expectedMD, eps)) 
Example #26
Source File: test_hierarchy.py    From Computable with MIT License 5 votes vote down vote up
def test_maxdists_Q_linkage_ward(self):
        "Tests maxdists(Z) on the Q data set using Ward linkage."
        X = eo['Q-X']
        Y = pdist(X)
        Z = linkage(X, 'ward')
        MD = maxdists(Z)
        eps = 1e-15
        expectedMD = calculate_maximum_distances(Z)
        self.assertTrue(within_tol(MD, expectedMD, eps)) 
Example #27
Source File: test_hierarchy.py    From Computable with MIT License 5 votes vote down vote up
def test_maxdists_Q_linkage_complete(self):
        "Tests maxdists(Z) on the Q data set using complete linkage."
        X = eo['Q-X']
        Y = pdist(X)
        Z = linkage(X, 'complete')
        MD = maxdists(Z)
        eps = 1e-15
        expectedMD = calculate_maximum_distances(Z)
        self.assertTrue(within_tol(MD, expectedMD, eps)) 
Example #28
Source File: test_hierarchy.py    From Computable with MIT License 5 votes vote down vote up
def test_maxdists_Q_linkage_single(self):
        "Tests maxdists(Z) on the Q data set using single linkage."
        X = eo['Q-X']
        Y = pdist(X)
        Z = linkage(X, 'single')
        MD = maxdists(Z)
        eps = 1e-15
        expectedMD = calculate_maximum_distances(Z)
        self.assertTrue(within_tol(MD, expectedMD, eps)) 
Example #29
Source File: test_hierarchy.py    From Computable with MIT License 5 votes vote down vote up
def test_is_monotonic_iris_linkage(self):
        "Tests is_monotonic(Z) on clustering generated by single linkage on Iris data set. Expecting True."
        X = eo['iris']
        Y = pdist(X)
        Z = linkage(X, 'single')
        self.assertTrue(is_monotonic(Z) == True) 
Example #30
Source File: test_hierarchy.py    From Computable with MIT License 5 votes vote down vote up
def test_fcluster_maxclusts_4(self):
        "Tests fcluster(Z, criterion='maxclust', t=4) on a random 3-cluster data set."
        expectedT = np.int_(eo['fclusterdata-maxclusts-4'])
        X = eo['Q-X']
        Y = pdist(X)
        Z = linkage(Y)
        T = fcluster(Z, criterion='maxclust', t=4)
        self.assertTrue(is_isomorphic(T, expectedT))