Python sklearn.metrics.pairwise.euclidean_distances() Examples
The following are 30
code examples of sklearn.metrics.pairwise.euclidean_distances().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.metrics.pairwise
, or try the search function
.

Example #1
Source Project: scanorama Author: brianhie File: time_align.py License: MIT License | 6 votes |
def time_dist(datasets_dimred, time): time_dist = euclidean_distances(time, time) time_dists, scores = [], [] for i in range(time_dist.shape[0]): for j in range(time_dist.shape[1]): if i >= j: continue score = np.mean(euclidean_distances( datasets_dimred[i], datasets_dimred[j] )) time_dists.append(time_dist[i, j]) scores.append(score) print('Spearman rho = {}'.format(spearmanr(time_dists, scores))) print('Pearson rho = {}'.format(pearsonr(time_dists, scores)))
Example #2
Source Project: scikit-learn-extra Author: scikit-learn-contrib File: test_k_medoids.py License: BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_kmedoids_pp(): """Initial clusters should be well-separated for k-medoids++""" rng = np.random.RandomState(seed) kmedoids = KMedoids() X = [ [10, 0], [11, 0], [0, 10], [0, 11], [10, 10], [11, 10], [12, 10], [10, 11], ] D = euclidean_distances(X) centers = kmedoids._kpp_init(D, n_clusters=3, random_state_=rng) assert len(centers) == 3 inter_medoid_distances = D[centers][:, centers] assert np.all((inter_medoid_distances > 5) | (inter_medoid_distances == 0))
Example #3
Source Project: scikit-learn-extra Author: scikit-learn-contrib File: test_k_medoids.py License: BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_precomputed(): """Test the 'precomputed' distance metric.""" rng = np.random.RandomState(seed) X_1 = [[1.0, 0.0], [1.1, 0.0], [0.0, 1.0], [0.0, 1.1]] D_1 = euclidean_distances(X_1) X_2 = [[1.1, 0.0], [0.0, 0.9]] D_2 = euclidean_distances(X_2, X_1) kmedoids = KMedoids(metric="precomputed", n_clusters=2, random_state=rng) kmedoids.fit(D_1) assert_allclose(kmedoids.inertia_, 0.2) assert_array_equal(kmedoids.medoid_indices_, [2, 0]) assert_array_equal(kmedoids.labels_, [1, 1, 0, 0]) assert kmedoids.cluster_centers_ is None med_1, med_2 = tuple(kmedoids.medoid_indices_) predictions = kmedoids.predict(D_2) assert_array_equal(predictions, [med_1 // 2, med_2 // 2]) transformed = kmedoids.transform(D_2) assert_array_equal(transformed, D_2[:, kmedoids.medoid_indices_])
Example #4
Source Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_pairwise.py License: MIT License | 6 votes |
def test_euclidean_distances(dtype, x_array_constr, y_array_constr): # check that euclidean distances gives same result as scipy cdist # when X and Y != X are provided rng = np.random.RandomState(0) X = rng.random_sample((100, 10)).astype(dtype, copy=False) X[X < 0.8] = 0 Y = rng.random_sample((10, 10)).astype(dtype, copy=False) Y[Y < 0.8] = 0 expected = cdist(X, Y) X = x_array_constr(X) Y = y_array_constr(Y) distances = euclidean_distances(X, Y) # the default rtol=1e-7 is too close to the float32 precision # and fails due too rounding errors. assert_allclose(distances, expected, rtol=1e-6) assert distances.dtype == dtype
Example #5
Source Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_pairwise.py License: MIT License | 6 votes |
def test_euclidean_distances_sym(dtype, x_array_constr): # check that euclidean distances gives same result as scipy pdist # when only X is provided rng = np.random.RandomState(0) X = rng.random_sample((100, 10)).astype(dtype, copy=False) X[X < 0.8] = 0 expected = squareform(pdist(X)) X = x_array_constr(X) distances = euclidean_distances(X) # the default rtol=1e-7 is too close to the float32 precision # and fails due too rounding errors. assert_allclose(distances, expected, rtol=1e-6) assert distances.dtype == dtype
Example #6
Source Project: qiskit-aqua Author: Qiskit File: error_correcting_code.py License: Apache License 2.0 | 6 votes |
def predict(self, x): """ Applying multiple estimators for prediction. Args: x (numpy.ndarray): NxD array Returns: numpy.ndarray: predicted labels, Nx1 array """ confidences = [] for e in self.estimators: confidence = np.ravel(e.decision_function(x)) confidences.append(confidence) y = np.array(confidences).T pred = euclidean_distances(y, self.codebook).argmin(axis=1) return self.classes[pred]
Example #7
Source Project: fsfc Author: danilkolikov File: Lasso.py License: MIT License | 6 votes |
def _calc_objective_vector(x, labels): clusters = {} for i, label in enumerate(labels): if label not in clusters: clusters[label] = [] clusters[label].append(i) result = np.zeros([1, x.shape[1]]) for i in range(x.shape[1]): feature = 0 samples = x[:, i].T.reshape([x.shape[0], 1]) for label, cluster in clusters.items(): size = len(cluster) cluster_samples = samples[cluster] distances = euclidean_distances(cluster_samples) feature += np.sum(distances) / size result[0, i] = np.sum(euclidean_distances(samples)) / x.shape[0] - feature return result
Example #8
Source Project: anvio Author: merenlab File: clustering.py License: GNU General Public License v3.0 | 6 votes |
def get_scaled_vectors(vectors, user_seed=None, n_components=12, normalize=True, progress=progress): if user_seed: seed = np.random.RandomState(seed=user_seed) else: seed = np.random.RandomState() # FIXME: Make this optional: from sklearn.metrics.pairwise import euclidean_distances as d vectors = get_normalized_vectors(np.array(vectors)) if normalize else np.array(vectors) # compute similarities based on d progress.update('Computing similarity matrix') similarities = d(vectors) progress.update('Scaling using %d components' % n_components) mds = manifold.MDS(n_components=n_components, max_iter=300, eps=1e-10, random_state=seed, dissimilarity="precomputed", n_jobs=1) progress.update('Fitting') scaled_vectors = mds.fit(similarities).embedding_ return scaled_vectors
Example #9
Source Project: skl-groups Author: djsutherland File: test_transforms.py License: BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_rbfize(): X = np.random.normal(size=(20, 4)) dists = euclidean_distances(X) median = np.median(dists[np.triu_indices_from(dists, k=1)]) rbf = RBFize(gamma=.25) res = rbf.fit_transform(dists) assert not hasattr(res, 'median_') assert np.allclose(res, np.exp(-.25 * dists ** 2)) rbf = RBFize(gamma=.25, squared=True) res = rbf.fit_transform(dists) assert np.allclose(res, np.exp(-.25 * dists)) rbf = RBFize(gamma=4, scale_by_median=True) res = rbf.fit_transform(dists) assert np.allclose(rbf.median_, median) assert np.allclose(res, np.exp((-4 * median**2) * dists ** 2)) rbf = RBFize(gamma=4, scale_by_median=True, squared=True) res = rbf.fit_transform(dists) assert np.allclose(rbf.median_, median) assert np.allclose(res, np.exp((-4 * median) * dists))
Example #10
Source Project: CIKM-AnalytiCup-2018 Author: zake7749 File: feature_engineering.py License: Apache License 2.0 | 6 votes |
def _get_similarity_values(self, q1_csc, q2_csc): cosine_sim = [] manhattan_dis = [] eucledian_dis = [] jaccard_dis = [] minkowsk_dis = [] for i,j in zip(q1_csc, q2_csc): sim = cs(i, j) cosine_sim.append(sim[0][0]) sim = md(i, j) manhattan_dis.append(sim[0][0]) sim = ed(i, j) eucledian_dis.append(sim[0][0]) i_ = i.toarray() j_ = j.toarray() try: sim = jsc(i_, j_) jaccard_dis.append(sim) except: jaccard_dis.append(0) sim = minkowski_dis.pairwise(i_, j_) minkowsk_dis.append(sim[0][0]) return cosine_sim, manhattan_dis, eucledian_dis, jaccard_dis, minkowsk_dis
Example #11
Source Project: abu Author: bbfamily File: ABuStatsUtil.py License: GNU General Public License v3.0 | 6 votes |
def euclidean_distance_xy(x, y, to_similar=False): """ 欧式距离(L2范数)计算两个序列distance, g_euclidean_safe控制是否使用euclidean_distances计算 还是使用la.norm,效率差别如下: euclidean_distances: 10000 loops, best of 3: 128 µs per loop la.norm : 10000 loops, best of 3: 89.6 µs per loop 在需要批量且很耗时的情况下切好模式,否则不要切换 :param x: 可迭代序列 :param y: 可迭代序列 :param to_similar: 是否进行后置输出转换similar值 :return: float数值 """ if g_euclidean_safe: euclidean = lambda a, b: euclidean_distances(a, b) else: euclidean = lambda a, b: la.norm(a - b) distance = _distance_xy(euclidean, x, y) if to_similar: # 实际上l1和l2转换similar的值不直观,只能对比使用 distance = 1.0 / (1.0 + distance) return distance
Example #12
Source Project: region Author: pysal File: test_skater.py License: BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_init(): default = Spanning_Forest() assert default.metric == skm.manhattan_distances assert default.center == np.mean assert default.reduction == np.sum change = Spanning_Forest(dissimilarity=skm.euclidean_distances, center=np.median, reduction=np.max) assert change.metric == skm.euclidean_distances assert change.center == np.median assert change.reduction == np.max sym = Spanning_Forest(affinity=skm.cosine_similarity) assert isinstance(sym.metric, types.LambdaType) test_distance = -np.log(skm.cosine_similarity(data[:2,])) comparator = sym.metric(data[:2,]) np.testing.assert_allclose(test_distance, comparator)
Example #13
Source Project: mvlearn Author: neurodata File: kcca.py License: Apache License 2.0 | 6 votes |
def _make_kernel(X, Y, ktype, constant=0.1, degree=2.0, sigma=1.0): # Linear kernel if ktype == "linear": return (X @ Y.T) # Polynomial kernel elif ktype == "poly": return (X @ Y.T + constant) ** degree # Gaussian kernel elif ktype == "gaussian": distmat = euclidean_distances(X, Y, squared=True) return np.exp(-distmat / (2 * sigma ** 2)) # Linear diagonal kernel elif ktype == "linear-diag": return (X @ Y.T).diagonal() # Polynomial diagonal kernel elif ktype == "poly-diag": return ((X @ Y.T + constant) ** degree).diagonal() # Gaussian diagonal kernel elif ktype == "gaussian-diag": return np.exp(-np.sum(np.power((X-Y), 2), axis=1)/(2*sigma**2))
Example #14
Source Project: scanorama Author: brianhie File: time_align.py License: MIT License | 5 votes |
def time_align_correlate(alignments, time): time_dist = euclidean_distances(time, time) assert(time_dist.shape == alignments.shape) time_dists, scores = [], [] for i in range(time_dist.shape[0]): for j in range(time_dist.shape[1]): if i >= j: continue time_dists.append(time_dist[i, j]) scores.append(alignments[i, j]) print('Spearman rho = {}'.format(spearmanr(time_dists, scores))) print('Pearson rho = {}'.format(pearsonr(time_dists, scores)))
Example #15
Source Project: Talking-Face-Generation-DAVS Author: Hangz-nju-cuhk File: embedding_utils.py License: MIT License | 5 votes |
def L2retrieval(clips_embed, captions_embed, return_ranks = False): captions_num = captions_embed.shape[0] #index_list = [] ranks = np.zeros(captions_num) top1 = np.zeros(captions_num) import time t1 = time.time() d = euclidean_distances(captions_embed, clips_embed) inds = np.argsort(d) num = np.arange(captions_num).reshape(captions_num, 1) ranks = np.where(inds == num)[1] top1 = inds[:, 0] t2 = time.time() print((t2 - t1)) r1 = 100.0 * len(np.where(ranks < 1)[0]) / len(ranks) r5 = 100.0 * len(np.where(ranks < 5)[0]) / len(ranks) r10 = 100.0 * len(np.where(ranks < 10)[0]) / len(ranks) r50 = 100.0 * len(np.where(ranks < 50)[0]) / len(ranks) # r100 = 100.0 * len(np.where(ranks < 100)[0]) / len(ranks) #plus 1 because the index starts from 0 medr = np.floor(np.median(ranks)) + 1 meanr = ranks.mean() + 1 if return_ranks: return (r1, r5, r10, r50, medr, meanr), (ranks, top1) else: return (r1, r5, r10, r50, medr, meanr)
Example #16
Source Project: scikit-learn-extra Author: scikit-learn-contrib File: test_k_medoids.py License: BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_random_deterministic(): """Random_state should determine 'random' init output.""" rng = np.random.RandomState(seed) X = load_iris()["data"] D = euclidean_distances(X) medoids = KMedoids(init="random")._initialize_medoids(D, 4, rng) assert_array_equal(medoids, [47, 117, 67, 103])
Example #17
Source Project: scikit-learn-extra Author: scikit-learn-contrib File: test_k_medoids.py License: BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_heuristic_deterministic(): """Result of heuristic init method should not depend on rnadom state.""" rng1 = np.random.RandomState(1) rng2 = np.random.RandomState(2) X = load_iris()["data"] D = euclidean_distances(X) medoids_1 = KMedoids(init="heuristic")._initialize_medoids(D, 10, rng1) medoids_2 = KMedoids(init="heuristic")._initialize_medoids(D, 10, rng2) assert_array_equal(medoids_1, medoids_2)
Example #18
Source Project: scikit-multiflow Author: scikit-multiflow File: online_smote_bagging.py License: BSD 3-Clause "New" or "Revised" License | 5 votes |
def online_smote(self, k=5): if len(self.pos_samples) > 1: x = self.pos_samples[-1] distance_vector = euclidean_distances(self.pos_samples[:-1], [x])[0] neighbors = np.argsort(distance_vector) if k > len(neighbors): k = len(neighbors) i = self._random_state.randint(0, k) gamma = self._random_state.rand() x_smote = x + gamma * (x - self.pos_samples[neighbors[i]]) return x_smote return self.pos_samples[-1]
Example #19
Source Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_pairwise.py License: MIT License | 5 votes |
def test_euclidean_distances_known_result(x_array_constr, y_array_constr): # Check the pairwise Euclidean distances computation on known result X = x_array_constr([[0]]) Y = y_array_constr([[1], [2]]) D = euclidean_distances(X, Y) assert_allclose(D, [[1., 2.]])
Example #20
Source Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_pairwise.py License: MIT License | 5 votes |
def test_euclidean_distances_with_norms(dtype, y_array_constr): # check that we still get the right answers with {X,Y}_norm_squared # and that we get a wrong answer with wrong {X,Y}_norm_squared rng = np.random.RandomState(0) X = rng.random_sample((10, 10)).astype(dtype, copy=False) Y = rng.random_sample((20, 10)).astype(dtype, copy=False) # norms will only be used if their dtype is float64 X_norm_sq = (X.astype(np.float64) ** 2).sum(axis=1).reshape(1, -1) Y_norm_sq = (Y.astype(np.float64) ** 2).sum(axis=1).reshape(1, -1) Y = y_array_constr(Y) D1 = euclidean_distances(X, Y) D2 = euclidean_distances(X, Y, X_norm_squared=X_norm_sq) D3 = euclidean_distances(X, Y, Y_norm_squared=Y_norm_sq) D4 = euclidean_distances(X, Y, X_norm_squared=X_norm_sq, Y_norm_squared=Y_norm_sq) assert_allclose(D2, D1) assert_allclose(D3, D1) assert_allclose(D4, D1) # check we get the wrong answer with wrong {X,Y}_norm_squared wrong_D = euclidean_distances(X, Y, X_norm_squared=np.zeros_like(X_norm_sq), Y_norm_squared=np.zeros_like(Y_norm_sq)) with pytest.raises(AssertionError): assert_allclose(wrong_D, D1)
Example #21
Source Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_pairwise.py License: MIT License | 5 votes |
def test_euclidean_distances_extreme_values(dtype, eps, rtol, dim): # check that euclidean distances is correct with float32 input thanks to # upcasting. On float64 there are still precision issues. X = np.array([[1.] * dim], dtype=dtype) Y = np.array([[1. + eps] * dim], dtype=dtype) distances = euclidean_distances(X, Y) expected = cdist(X, Y) assert_allclose(distances, expected, rtol=1e-5)
Example #22
Source Project: nlp_research Author: zhufz File: test_match.py License: MIT License | 5 votes |
def __call__(self, text): if self.tfrecords_mode == 'point': assert text.find('||') != -1,"input should cotain two sentences seperated by ||" text_a = text.split('||')[0] text_b = text.split('||')[-1] pred,score = self._get_label([text_a], [text_b], need_preprocess = True) return pred[0][0], score[0][0] #加载自定义问句(自定义优先) if self.sim_mode == 'cross': text_list = self.text_list label_list = self.label_list if self.zdy != {}: text_list = self.zdy['text_list'] + text_list label_list = self.zdy['label_list'] + label_list pred,score = self._get_label([text], self.text_list, need_preprocess = True) selected_id = np.argmax(score) out_score = score[selected_id] elif self.sim_mode == 'represent': text_list = self.text_list vec_list = self.vec_list label_list = self.label_list if self.zdy != {}: text_list = self.zdy['text_list'] + text_list vec_list = np.concatenate([self.zdy['vec_list'], self.vec_list], axis = 0) label_list = self.zdy['label_list'] + label_list vec = self._get_vecs([text], need_preprocess = True) if self.is_distance: scores = euclidean_distances(vec, vec_list)[0] selected_id = np.argmin(scores) out_score = 1 - scores[selected_id] else: scores = cosine_similarity(vec, vec_list)[0] selected_id = np.argmax(scores) out_score = scores[selected_id] else: raise ValueError('unknown sim mode, represent or cross?') ret = (label_list[selected_id], out_score, selected_id, \ self.text_list[selected_id]) return ret
Example #23
Source Project: nlp_research Author: zhufz File: similarity.py License: MIT License | 5 votes |
def similarity(self, query, type): assert self.corpus != None, "self.corpus can't be None" ret = [] if type == 'cosine': query = self.get_vector(query) for item in self.corpus_vec: sim = cosine_similarity(item, query) ret.append(sim[0][0]) elif type == 'manhattan': query = self.get_vector(query) for item in self.corpus_vec: sim = manhattan_distances(item, query) ret.append(sim[0][0]) elif type == 'euclidean': query = self.get_vector(query) for item in self.corpus_vec: sim = euclidean_distances (item, query) ret.append(sim[0][0]) #elif type == 'jaccard': # #query = query.split() # query = self.get_vector(query) # for item in self.corpus_vec: # pdb.set_trace() # sim = jaccard_similarity_score(item, query) # ret.append(sim) elif type == 'bm25': query = query.split() ret = self.bm25_model.get_scores(query) else: raise ValueError('similarity type error:%s'%type) return ret
Example #24
Source Project: DHGNN Author: iMoonLab File: construct_hypergraph.py License: MIT License | 5 votes |
def _construct_edge_list_from_cluster(X, clusters, adjacent_clusters, k_neighbors) -> np.array: """ construct edge list (numpy array) from cluster for single modality :param X: feature :param clusters: number of clusters for k-means :param adjacent_clusters: a node's adjacent clusters :param k_neighbors: number of a node's neighbors :return: """ N = X.shape[0] kmeans = KMeans(n_clusters=clusters, random_state=0).fit(X) centers = kmeans.cluster_centers_ dis = euclidean_distances(X, centers) _, cluster_center_dict = torch.topk(torch.Tensor(dis), adjacent_clusters, largest=False) cluster_center_dict = cluster_center_dict.numpy() point_labels = kmeans.labels_ point_in_which_cluster = [np.where(point_labels == i)[0] for i in range(clusters)] def _list_cat(list_of_array): """ example: [[0,1],[3,5,6],[-1]] -> [0,1,3,5,6,-1] :param list_of_array: list of np.array :return: list of numbers """ ret = list() for array in list_of_array: ret += array.tolist() return ret cluster_neighbor_dict = [_list_cat([point_in_which_cluster[cluster_center_dict[point][i]] for i in range(adjacent_clusters)]) for point in range(N)] for point, entry in enumerate(cluster_neighbor_dict): entry.append(point) sampled_ids = [sample_ids(cluster_neighbor_dict[point], k_neighbors) for point in range(N)] return np.array(sampled_ids)
Example #25
Source Project: DHGNN Author: iMoonLab File: layers.py License: MIT License | 5 votes |
def _cluster_select(self, ids, feats): """ compute k-means centers and cluster labels of each node return top #n_cluster nearest cluster transformed features :param ids: indices selected during train/valid/test, torch.LongTensor :param feats: :return: top #n_cluster nearest cluster mapped features """ if self.kmeans is None: _N = feats.size(0) np_feats = feats.detach().cpu().numpy() kmeans = KMeans(n_clusters=self.n_cluster, random_state=0, n_jobs=-1).fit(np_feats) centers = kmeans.cluster_centers_ dis = euclidean_distances(np_feats, centers) _, cluster_center_dict = torch.topk(torch.Tensor(dis), self.n_center, largest=False) cluster_center_dict = cluster_center_dict.numpy() point_labels = kmeans.labels_ point_in_which_cluster = [np.where(point_labels == i)[0] for i in range(self.n_cluster)] idx = torch.LongTensor([[sample_ids_v2(point_in_which_cluster[cluster_center_dict[point][i]], self.kc) for i in range(self.n_center)] for point in range(_N)]) # (_N, n_center, kc) self.kmeans = idx else: idx = self.kmeans idx = idx[ids] N = idx.size(0) d = feats.size(1) cluster_feats = feats[idx.view(-1)].view(N, self.n_center, self.kc, d) return cluster_feats # (N, n_center, kc, d)
Example #26
Source Project: anvio Author: merenlab File: clustering.py License: GNU General Public License v3.0 | 5 votes |
def get_newick_tree_data_for_dict(d, transpose=False, linkage=constants.linkage_method_default, distance=constants.distance_metric_default, norm='l1'): is_distance_and_linkage_compatible(distance, linkage) vectors = pd.DataFrame.from_dict(d, orient='index') id_to_sample_dict = dict([(i, vectors.index[i]) for i in range(len(vectors.index))]) if transpose: id_to_sample_dict = dict([(i, vectors.columns[i]) for i in range(len(vectors.columns))]) newick = get_newick_from_matrix(vectors, distance, linkage, norm, id_to_sample_dict, transpose=transpose) return newick
Example #27
Source Project: tslearn Author: tslearn-team File: metrics.py License: BSD 2-Clause "Simplified" License | 5 votes |
def compute(self): """Compute distance matrix. Returns ------- D: array, shape = [m, n] Distance matrix. """ return euclidean_distances(self.X, self.Y, squared=True)
Example #28
Source Project: perfect_match Author: d909b File: news_benchmark.py License: MIT License | 5 votes |
def get_centroid_weights(self, x): similarities = map( lambda centroid: euclidean_distances(self.data_access.standardise_entry(x).reshape((1, -1)), centroid.reshape((1, -1))), map(lambda x: x[0], self.centroids) ) return np.squeeze(similarities)
Example #29
Source Project: Same-Size-K-Means Author: ndanielsen File: equal_groups.py License: BSD 3-Clause "New" or "Revised" License | 5 votes |
def _transform(self, X): """guts of transform method; no input validation""" return euclidean_distances(X, self.cluster_centers_)
Example #30
Source Project: soft-dtw Author: mblondel File: distance.py License: BSD 2-Clause "Simplified" License | 5 votes |
def compute(self): """ Compute distance matrix. Returns ------- D: array, shape = [m, n] Distance matrix. """ return euclidean_distances(self.X, self.Y, squared=True)