Python sklearn.metrics.pairwise.euclidean_distances() Examples

The following are 30 code examples of sklearn.metrics.pairwise.euclidean_distances(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.metrics.pairwise , or try the search function .
Example #1
Source Project: scanorama   Author: brianhie   File: time_align.py    License: MIT License 6 votes vote down vote up
def time_dist(datasets_dimred, time):
    time_dist = euclidean_distances(time, time)

    time_dists, scores = [], []
    for i in range(time_dist.shape[0]):
        for j in range(time_dist.shape[1]):
            if i >= j:
                continue
            score = np.mean(euclidean_distances(
                datasets_dimred[i], datasets_dimred[j]
            ))
            time_dists.append(time_dist[i, j])
            scores.append(score)

    print('Spearman rho = {}'.format(spearmanr(time_dists, scores)))
    print('Pearson rho = {}'.format(pearsonr(time_dists, scores))) 
Example #2
Source Project: scikit-learn-extra   Author: scikit-learn-contrib   File: test_k_medoids.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_kmedoids_pp():
    """Initial clusters should be well-separated for k-medoids++"""
    rng = np.random.RandomState(seed)
    kmedoids = KMedoids()
    X = [
        [10, 0],
        [11, 0],
        [0, 10],
        [0, 11],
        [10, 10],
        [11, 10],
        [12, 10],
        [10, 11],
    ]
    D = euclidean_distances(X)

    centers = kmedoids._kpp_init(D, n_clusters=3, random_state_=rng)

    assert len(centers) == 3

    inter_medoid_distances = D[centers][:, centers]
    assert np.all((inter_medoid_distances > 5) | (inter_medoid_distances == 0)) 
Example #3
Source Project: scikit-learn-extra   Author: scikit-learn-contrib   File: test_k_medoids.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_precomputed():
    """Test the 'precomputed' distance metric."""
    rng = np.random.RandomState(seed)
    X_1 = [[1.0, 0.0], [1.1, 0.0], [0.0, 1.0], [0.0, 1.1]]
    D_1 = euclidean_distances(X_1)
    X_2 = [[1.1, 0.0], [0.0, 0.9]]
    D_2 = euclidean_distances(X_2, X_1)

    kmedoids = KMedoids(metric="precomputed", n_clusters=2, random_state=rng)
    kmedoids.fit(D_1)

    assert_allclose(kmedoids.inertia_, 0.2)
    assert_array_equal(kmedoids.medoid_indices_, [2, 0])
    assert_array_equal(kmedoids.labels_, [1, 1, 0, 0])
    assert kmedoids.cluster_centers_ is None

    med_1, med_2 = tuple(kmedoids.medoid_indices_)
    predictions = kmedoids.predict(D_2)
    assert_array_equal(predictions, [med_1 // 2, med_2 // 2])

    transformed = kmedoids.transform(D_2)
    assert_array_equal(transformed, D_2[:, kmedoids.medoid_indices_]) 
Example #4
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_pairwise.py    License: MIT License 6 votes vote down vote up
def test_euclidean_distances(dtype, x_array_constr, y_array_constr):
    # check that euclidean distances gives same result as scipy cdist
    # when X and Y != X are provided
    rng = np.random.RandomState(0)
    X = rng.random_sample((100, 10)).astype(dtype, copy=False)
    X[X < 0.8] = 0
    Y = rng.random_sample((10, 10)).astype(dtype, copy=False)
    Y[Y < 0.8] = 0

    expected = cdist(X, Y)

    X = x_array_constr(X)
    Y = y_array_constr(Y)
    distances = euclidean_distances(X, Y)

    # the default rtol=1e-7 is too close to the float32 precision
    # and fails due too rounding errors.
    assert_allclose(distances, expected, rtol=1e-6)
    assert distances.dtype == dtype 
Example #5
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_pairwise.py    License: MIT License 6 votes vote down vote up
def test_euclidean_distances_sym(dtype, x_array_constr):
    # check that euclidean distances gives same result as scipy pdist
    # when only X is provided
    rng = np.random.RandomState(0)
    X = rng.random_sample((100, 10)).astype(dtype, copy=False)
    X[X < 0.8] = 0

    expected = squareform(pdist(X))

    X = x_array_constr(X)
    distances = euclidean_distances(X)

    # the default rtol=1e-7 is too close to the float32 precision
    # and fails due too rounding errors.
    assert_allclose(distances, expected, rtol=1e-6)
    assert distances.dtype == dtype 
Example #6
Source Project: qiskit-aqua   Author: Qiskit   File: error_correcting_code.py    License: Apache License 2.0 6 votes vote down vote up
def predict(self, x):
        """
        Applying multiple estimators for prediction.

        Args:
            x (numpy.ndarray): NxD array
        Returns:
            numpy.ndarray: predicted labels, Nx1 array
        """
        confidences = []
        for e in self.estimators:
            confidence = np.ravel(e.decision_function(x))
            confidences.append(confidence)
        y = np.array(confidences).T
        pred = euclidean_distances(y, self.codebook).argmin(axis=1)
        return self.classes[pred] 
Example #7
Source Project: fsfc   Author: danilkolikov   File: Lasso.py    License: MIT License 6 votes vote down vote up
def _calc_objective_vector(x, labels):
        clusters = {}
        for i, label in enumerate(labels):
            if label not in clusters:
                clusters[label] = []
            clusters[label].append(i)
        result = np.zeros([1, x.shape[1]])
        for i in range(x.shape[1]):
            feature = 0
            samples = x[:, i].T.reshape([x.shape[0], 1])
            for label, cluster in clusters.items():
                size = len(cluster)
                cluster_samples = samples[cluster]
                distances = euclidean_distances(cluster_samples)
                feature += np.sum(distances) / size
            result[0, i] = np.sum(euclidean_distances(samples)) / x.shape[0] - feature
        return result 
Example #8
Source Project: anvio   Author: merenlab   File: clustering.py    License: GNU General Public License v3.0 6 votes vote down vote up
def get_scaled_vectors(vectors, user_seed=None, n_components=12, normalize=True, progress=progress):
    if user_seed:
        seed = np.random.RandomState(seed=user_seed)
    else:
        seed = np.random.RandomState()

    # FIXME: Make this optional:
    from sklearn.metrics.pairwise import euclidean_distances as d

    vectors = get_normalized_vectors(np.array(vectors)) if normalize else np.array(vectors)

    # compute similarities based on d
    progress.update('Computing similarity matrix')
    similarities = d(vectors)

    progress.update('Scaling using %d components' % n_components)
    mds = manifold.MDS(n_components=n_components, max_iter=300, eps=1e-10, random_state=seed,
                       dissimilarity="precomputed", n_jobs=1)

    progress.update('Fitting')
    scaled_vectors = mds.fit(similarities).embedding_

    return scaled_vectors 
Example #9
Source Project: skl-groups   Author: djsutherland   File: test_transforms.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_rbfize():
    X = np.random.normal(size=(20, 4))
    dists = euclidean_distances(X)
    median = np.median(dists[np.triu_indices_from(dists, k=1)])

    rbf = RBFize(gamma=.25)
    res = rbf.fit_transform(dists)
    assert not hasattr(res, 'median_')
    assert np.allclose(res, np.exp(-.25 * dists ** 2))

    rbf = RBFize(gamma=.25, squared=True)
    res = rbf.fit_transform(dists)
    assert np.allclose(res, np.exp(-.25 * dists))

    rbf = RBFize(gamma=4, scale_by_median=True)
    res = rbf.fit_transform(dists)
    assert np.allclose(rbf.median_, median)
    assert np.allclose(res, np.exp((-4 * median**2) * dists ** 2))

    rbf = RBFize(gamma=4, scale_by_median=True, squared=True)
    res = rbf.fit_transform(dists)
    assert np.allclose(rbf.median_, median)
    assert np.allclose(res, np.exp((-4 * median) * dists)) 
Example #10
Source Project: CIKM-AnalytiCup-2018   Author: zake7749   File: feature_engineering.py    License: Apache License 2.0 6 votes vote down vote up
def _get_similarity_values(self, q1_csc, q2_csc):
        cosine_sim = []
        manhattan_dis = []
        eucledian_dis = []
        jaccard_dis = []
        minkowsk_dis = []
        
        for i,j in zip(q1_csc, q2_csc):
            sim = cs(i, j)
            cosine_sim.append(sim[0][0])
            sim = md(i, j)
            manhattan_dis.append(sim[0][0])
            sim = ed(i, j)
            eucledian_dis.append(sim[0][0])
            i_ = i.toarray()
            j_ = j.toarray()
            try:
                sim = jsc(i_, j_)
                jaccard_dis.append(sim)
            except:
                jaccard_dis.append(0)
                
            sim = minkowski_dis.pairwise(i_, j_)
            minkowsk_dis.append(sim[0][0])
        return cosine_sim, manhattan_dis, eucledian_dis, jaccard_dis, minkowsk_dis 
Example #11
Source Project: abu   Author: bbfamily   File: ABuStatsUtil.py    License: GNU General Public License v3.0 6 votes vote down vote up
def euclidean_distance_xy(x, y, to_similar=False):
    """
    欧式距离(L2范数)计算两个序列distance, g_euclidean_safe控制是否使用euclidean_distances计算
    还是使用la.norm,效率差别如下:
        euclidean_distances: 10000 loops, best of 3: 128 µs per loop
        la.norm            : 10000 loops, best of 3: 89.6 µs per loop
    在需要批量且很耗时的情况下切好模式,否则不要切换
    :param x: 可迭代序列
    :param y: 可迭代序列
    :param to_similar: 是否进行后置输出转换similar值
    :return: float数值

    """
    if g_euclidean_safe:
        euclidean = lambda a, b: euclidean_distances(a, b)
    else:
        euclidean = lambda a, b: la.norm(a - b)
    distance = _distance_xy(euclidean, x, y)
    if to_similar:
        # 实际上l1和l2转换similar的值不直观,只能对比使用
        distance = 1.0 / (1.0 + distance)
    return distance 
Example #12
Source Project: region   Author: pysal   File: test_skater.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_init():
    default = Spanning_Forest()
    assert default.metric == skm.manhattan_distances
    assert default.center == np.mean
    assert default.reduction == np.sum
    change = Spanning_Forest(dissimilarity=skm.euclidean_distances,
                             center=np.median, reduction=np.max)
    assert change.metric == skm.euclidean_distances
    assert change.center == np.median
    assert change.reduction == np.max
    
    sym = Spanning_Forest(affinity=skm.cosine_similarity)
    assert isinstance(sym.metric, types.LambdaType)
    test_distance = -np.log(skm.cosine_similarity(data[:2,]))
    comparator = sym.metric(data[:2,])
    np.testing.assert_allclose(test_distance, comparator) 
Example #13
Source Project: mvlearn   Author: neurodata   File: kcca.py    License: Apache License 2.0 6 votes vote down vote up
def _make_kernel(X, Y, ktype, constant=0.1, degree=2.0, sigma=1.0):
    # Linear kernel
    if ktype == "linear":
        return (X @ Y.T)

    # Polynomial kernel
    elif ktype == "poly":
        return (X @ Y.T + constant) ** degree

    # Gaussian kernel
    elif ktype == "gaussian":
        distmat = euclidean_distances(X, Y, squared=True)
        return np.exp(-distmat / (2 * sigma ** 2))

    # Linear diagonal kernel
    elif ktype == "linear-diag":
        return (X @ Y.T).diagonal()

    # Polynomial diagonal kernel
    elif ktype == "poly-diag":
        return ((X @ Y.T + constant) ** degree).diagonal()

    # Gaussian diagonal kernel
    elif ktype == "gaussian-diag":
        return np.exp(-np.sum(np.power((X-Y), 2), axis=1)/(2*sigma**2)) 
Example #14
Source Project: scanorama   Author: brianhie   File: time_align.py    License: MIT License 5 votes vote down vote up
def time_align_correlate(alignments, time):
    time_dist = euclidean_distances(time, time)

    assert(time_dist.shape == alignments.shape)

    time_dists, scores = [], []
    for i in range(time_dist.shape[0]):
        for j in range(time_dist.shape[1]):
            if i >= j:
                continue
            time_dists.append(time_dist[i, j])
            scores.append(alignments[i, j])

    print('Spearman rho = {}'.format(spearmanr(time_dists, scores)))
    print('Pearson rho = {}'.format(pearsonr(time_dists, scores))) 
Example #15
Source Project: Talking-Face-Generation-DAVS   Author: Hangz-nju-cuhk   File: embedding_utils.py    License: MIT License 5 votes vote down vote up
def L2retrieval(clips_embed, captions_embed, return_ranks = False):
    captions_num = captions_embed.shape[0]
    #index_list = []
    ranks = np.zeros(captions_num)
    top1 = np.zeros(captions_num)
    import time
    t1 = time.time()
    d = euclidean_distances(captions_embed, clips_embed)
    inds = np.argsort(d)
    num = np.arange(captions_num).reshape(captions_num, 1)
    ranks = np.where(inds == num)[1]
    top1 = inds[:, 0]
    t2 = time.time()
    print((t2 - t1))
    r1 = 100.0 * len(np.where(ranks < 1)[0]) / len(ranks)
    r5 = 100.0 * len(np.where(ranks < 5)[0]) / len(ranks)
    r10 = 100.0 * len(np.where(ranks < 10)[0]) / len(ranks)
    r50 = 100.0 * len(np.where(ranks < 50)[0]) / len(ranks)
    # r100 = 100.0 * len(np.where(ranks < 100)[0]) / len(ranks)
    #plus 1 because the index starts from 0
    medr = np.floor(np.median(ranks)) + 1
    meanr = ranks.mean() + 1

    if return_ranks:
        return (r1, r5, r10, r50, medr, meanr), (ranks, top1)
    else:
        return (r1, r5, r10, r50, medr, meanr) 
Example #16
Source Project: scikit-learn-extra   Author: scikit-learn-contrib   File: test_k_medoids.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_random_deterministic():
    """Random_state should determine 'random' init output."""
    rng = np.random.RandomState(seed)

    X = load_iris()["data"]
    D = euclidean_distances(X)

    medoids = KMedoids(init="random")._initialize_medoids(D, 4, rng)
    assert_array_equal(medoids, [47, 117, 67, 103]) 
Example #17
Source Project: scikit-learn-extra   Author: scikit-learn-contrib   File: test_k_medoids.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_heuristic_deterministic():
    """Result of heuristic init method should not depend on rnadom state."""
    rng1 = np.random.RandomState(1)
    rng2 = np.random.RandomState(2)
    X = load_iris()["data"]
    D = euclidean_distances(X)

    medoids_1 = KMedoids(init="heuristic")._initialize_medoids(D, 10, rng1)

    medoids_2 = KMedoids(init="heuristic")._initialize_medoids(D, 10, rng2)

    assert_array_equal(medoids_1, medoids_2) 
Example #18
Source Project: scikit-multiflow   Author: scikit-multiflow   File: online_smote_bagging.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def online_smote(self, k=5):
        if len(self.pos_samples) > 1:
            x = self.pos_samples[-1]
            distance_vector = euclidean_distances(self.pos_samples[:-1], [x])[0]
            neighbors = np.argsort(distance_vector)
            if k > len(neighbors):
                k = len(neighbors)
            i = self._random_state.randint(0, k)
            gamma = self._random_state.rand()
            x_smote = x + gamma * (x - self.pos_samples[neighbors[i]])
            return x_smote
        return self.pos_samples[-1] 
Example #19
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_pairwise.py    License: MIT License 5 votes vote down vote up
def test_euclidean_distances_known_result(x_array_constr, y_array_constr):
    # Check the pairwise Euclidean distances computation on known result
    X = x_array_constr([[0]])
    Y = y_array_constr([[1], [2]])
    D = euclidean_distances(X, Y)
    assert_allclose(D, [[1., 2.]]) 
Example #20
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_pairwise.py    License: MIT License 5 votes vote down vote up
def test_euclidean_distances_with_norms(dtype, y_array_constr):
    # check that we still get the right answers with {X,Y}_norm_squared
    # and that we get a wrong answer with wrong {X,Y}_norm_squared
    rng = np.random.RandomState(0)
    X = rng.random_sample((10, 10)).astype(dtype, copy=False)
    Y = rng.random_sample((20, 10)).astype(dtype, copy=False)

    # norms will only be used if their dtype is float64
    X_norm_sq = (X.astype(np.float64) ** 2).sum(axis=1).reshape(1, -1)
    Y_norm_sq = (Y.astype(np.float64) ** 2).sum(axis=1).reshape(1, -1)

    Y = y_array_constr(Y)

    D1 = euclidean_distances(X, Y)
    D2 = euclidean_distances(X, Y, X_norm_squared=X_norm_sq)
    D3 = euclidean_distances(X, Y, Y_norm_squared=Y_norm_sq)
    D4 = euclidean_distances(X, Y, X_norm_squared=X_norm_sq,
                             Y_norm_squared=Y_norm_sq)
    assert_allclose(D2, D1)
    assert_allclose(D3, D1)
    assert_allclose(D4, D1)

    # check we get the wrong answer with wrong {X,Y}_norm_squared
    wrong_D = euclidean_distances(X, Y,
                                  X_norm_squared=np.zeros_like(X_norm_sq),
                                  Y_norm_squared=np.zeros_like(Y_norm_sq))
    with pytest.raises(AssertionError):
        assert_allclose(wrong_D, D1) 
Example #21
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_pairwise.py    License: MIT License 5 votes vote down vote up
def test_euclidean_distances_extreme_values(dtype, eps, rtol, dim):
    # check that euclidean distances is correct with float32 input thanks to
    # upcasting. On float64 there are still precision issues.
    X = np.array([[1.] * dim], dtype=dtype)
    Y = np.array([[1. + eps] * dim], dtype=dtype)

    distances = euclidean_distances(X, Y)
    expected = cdist(X, Y)

    assert_allclose(distances, expected, rtol=1e-5) 
Example #22
Source Project: nlp_research   Author: zhufz   File: test_match.py    License: MIT License 5 votes vote down vote up
def __call__(self, text):
        if self.tfrecords_mode == 'point':
            assert text.find('||') != -1,"input should cotain two sentences seperated by ||"
            text_a = text.split('||')[0]
            text_b = text.split('||')[-1]
            pred,score = self._get_label([text_a], [text_b], need_preprocess = True)
            return pred[0][0], score[0][0]

        #加载自定义问句(自定义优先)
        if self.sim_mode == 'cross':
            text_list = self.text_list
            label_list = self.label_list
            if self.zdy != {}:
                text_list = self.zdy['text_list'] + text_list
                label_list = self.zdy['label_list'] + label_list
            pred,score = self._get_label([text], self.text_list, need_preprocess = True)
            selected_id = np.argmax(score)
            out_score = score[selected_id]
        elif self.sim_mode == 'represent':
            text_list = self.text_list
            vec_list = self.vec_list
            label_list = self.label_list
            if self.zdy != {}:
                text_list = self.zdy['text_list'] + text_list
                vec_list = np.concatenate([self.zdy['vec_list'], self.vec_list], axis = 0)
                label_list = self.zdy['label_list'] + label_list
            vec = self._get_vecs([text], need_preprocess = True)
            if self.is_distance:
                scores = euclidean_distances(vec, vec_list)[0]
                selected_id = np.argmin(scores)
                out_score = 1 - scores[selected_id]
            else:
                scores = cosine_similarity(vec, vec_list)[0]
                selected_id = np.argmax(scores)
                out_score = scores[selected_id]
        else:
            raise ValueError('unknown sim mode, represent or cross?')
        ret = (label_list[selected_id], out_score, selected_id, \
               self.text_list[selected_id])
        return ret 
Example #23
Source Project: nlp_research   Author: zhufz   File: similarity.py    License: MIT License 5 votes vote down vote up
def similarity(self, query, type):
        assert self.corpus != None, "self.corpus can't be None"
        ret = []
        if type == 'cosine':
            query = self.get_vector(query)
            for item in self.corpus_vec:
                sim = cosine_similarity(item, query)
                ret.append(sim[0][0])
        elif type == 'manhattan':
            query = self.get_vector(query)
            for item in self.corpus_vec:
                sim = manhattan_distances(item, query)
                ret.append(sim[0][0])
        elif type == 'euclidean':
            query = self.get_vector(query)
            for item in self.corpus_vec:
                sim = euclidean_distances (item, query)
                ret.append(sim[0][0])
        #elif type == 'jaccard':
        #    #query = query.split()
        #    query = self.get_vector(query)
        #    for item in self.corpus_vec:
        #        pdb.set_trace()
        #        sim = jaccard_similarity_score(item, query)
        #        ret.append(sim)
        elif type == 'bm25':
            query = query.split()
            ret = self.bm25_model.get_scores(query)
        else:
            raise ValueError('similarity type error:%s'%type)
        return ret 
Example #24
Source Project: DHGNN   Author: iMoonLab   File: construct_hypergraph.py    License: MIT License 5 votes vote down vote up
def _construct_edge_list_from_cluster(X, clusters, adjacent_clusters, k_neighbors) -> np.array:
    """
    construct edge list (numpy array) from cluster for single modality
    :param X: feature
    :param clusters: number of clusters for k-means
    :param adjacent_clusters: a node's adjacent clusters
    :param k_neighbors: number of a node's neighbors
    :return:
    """
    N = X.shape[0]
    kmeans = KMeans(n_clusters=clusters, random_state=0).fit(X)
    centers = kmeans.cluster_centers_
    dis = euclidean_distances(X, centers)
    _, cluster_center_dict = torch.topk(torch.Tensor(dis), adjacent_clusters, largest=False)
    cluster_center_dict = cluster_center_dict.numpy()
    point_labels = kmeans.labels_
    point_in_which_cluster = [np.where(point_labels == i)[0] for i in range(clusters)]

    def _list_cat(list_of_array):
        """
        example: [[0,1],[3,5,6],[-1]] -> [0,1,3,5,6,-1]
        :param list_of_array: list of np.array
        :return: list of numbers
        """
        ret = list()
        for array in list_of_array:
            ret += array.tolist()
        return ret

    cluster_neighbor_dict = [_list_cat([point_in_which_cluster[cluster_center_dict[point][i]]
                                        for i in range(adjacent_clusters)]) for point in range(N)]
    for point, entry in enumerate(cluster_neighbor_dict):
        entry.append(point)
    sampled_ids = [sample_ids(cluster_neighbor_dict[point], k_neighbors) for point in range(N)]
    return np.array(sampled_ids) 
Example #25
Source Project: DHGNN   Author: iMoonLab   File: layers.py    License: MIT License 5 votes vote down vote up
def _cluster_select(self, ids, feats):
        """
        compute k-means centers and cluster labels of each node
        return top #n_cluster nearest cluster transformed features
        :param ids: indices selected during train/valid/test, torch.LongTensor
        :param feats:
        :return: top #n_cluster nearest cluster mapped features
        """
        if self.kmeans is None:
            _N = feats.size(0)
            np_feats = feats.detach().cpu().numpy()
            kmeans = KMeans(n_clusters=self.n_cluster, random_state=0, n_jobs=-1).fit(np_feats)
            centers = kmeans.cluster_centers_
            dis = euclidean_distances(np_feats, centers)
            _, cluster_center_dict = torch.topk(torch.Tensor(dis), self.n_center, largest=False)
            cluster_center_dict = cluster_center_dict.numpy()
            point_labels = kmeans.labels_
            point_in_which_cluster = [np.where(point_labels == i)[0] for i in range(self.n_cluster)]
            idx = torch.LongTensor([[sample_ids_v2(point_in_which_cluster[cluster_center_dict[point][i]], self.kc)   
                        for i in range(self.n_center)] for point in range(_N)])    # (_N, n_center, kc)
            self.kmeans = idx
        else:
            idx = self.kmeans
        
        idx = idx[ids]
        N = idx.size(0)
        d = feats.size(1)
        cluster_feats = feats[idx.view(-1)].view(N, self.n_center, self.kc, d)

        return cluster_feats                    # (N, n_center, kc, d) 
Example #26
Source Project: anvio   Author: merenlab   File: clustering.py    License: GNU General Public License v3.0 5 votes vote down vote up
def get_newick_tree_data_for_dict(d, transpose=False, linkage=constants.linkage_method_default, distance=constants.distance_metric_default, norm='l1'):
    is_distance_and_linkage_compatible(distance, linkage)

    vectors = pd.DataFrame.from_dict(d, orient='index')

    id_to_sample_dict = dict([(i, vectors.index[i]) for i in range(len(vectors.index))])

    if transpose:
        id_to_sample_dict = dict([(i, vectors.columns[i]) for i in range(len(vectors.columns))])

    newick = get_newick_from_matrix(vectors, distance, linkage, norm, id_to_sample_dict, transpose=transpose)

    return newick 
Example #27
Source Project: tslearn   Author: tslearn-team   File: metrics.py    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
def compute(self):
        """Compute distance matrix.

        Returns
        -------
        D: array, shape = [m, n]
            Distance matrix.
        """
        return euclidean_distances(self.X, self.Y, squared=True) 
Example #28
Source Project: perfect_match   Author: d909b   File: news_benchmark.py    License: MIT License 5 votes vote down vote up
def get_centroid_weights(self, x):
        similarities = map(
            lambda centroid: euclidean_distances(self.data_access.standardise_entry(x).reshape((1, -1)),
                                                 centroid.reshape((1, -1))),
            map(lambda x: x[0], self.centroids)
        )
        return np.squeeze(similarities) 
Example #29
Source Project: Same-Size-K-Means   Author: ndanielsen   File: equal_groups.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _transform(self, X):
        """guts of transform method; no input validation"""
        return euclidean_distances(X, self.cluster_centers_) 
Example #30
Source Project: soft-dtw   Author: mblondel   File: distance.py    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
def compute(self):
        """
        Compute distance matrix.

        Returns
        -------
        D: array, shape = [m, n]
            Distance matrix.
        """
        return euclidean_distances(self.X, self.Y, squared=True)