Python sklearn.metrics.pairwise.pairwise_distances() Examples
The following are 30
code examples of sklearn.metrics.pairwise.pairwise_distances().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.metrics.pairwise
, or try the search function
.
Example #1
Source File: classification.py From default-credit-card-prediction with MIT License | 7 votes |
def predict(self, X): """ Classify the input data assigning the label of the nearest prototype Keyword arguments: X -- The feature vectors """ classification=np.zeros(len(X)) if self.distance_metric=="euclidean": distances=pairwise_distances(X, self.M_,self.distance_metric) #compute distances to the prototypes (template matching) if self.distance_metric=="minkowski": distances=pairwise_distances(X, self.M_,self.distance_metric) elif self.distance_metric=="manhattan": distances=pairwise_distances(X, self.M_,self.distance_metric) elif self.distance_metric=="mahalanobis": distances=pairwise_distances(X, self.M_,self.distance_metric) else: distances=pairwise_distances(X, self.M_,"euclidean") for i in xrange(len(X)): classification[i]=self.outcomes[distances[i].tolist().index(min(distances[i]))] #choose the class belonging to nearest prototype distance return classification
Example #2
Source File: test_t_sne.py From twitter-stock-recommendation with MIT License | 6 votes |
def _run_answer_test(pos_input, pos_output, neighbors, grad_output, verbose=False, perplexity=0.1, skip_num_points=0): distances = pairwise_distances(pos_input).astype(np.float32) args = distances, perplexity, verbose pos_output = pos_output.astype(np.float32) neighbors = neighbors.astype(np.int64) pij_input = _joint_probabilities(*args) pij_input = squareform(pij_input).astype(np.float32) grad_bh = np.zeros(pos_output.shape, dtype=np.float32) from scipy.sparse import csr_matrix P = csr_matrix(pij_input) neighbors = P.indices.astype(np.int64) indptr = P.indptr.astype(np.int64) _barnes_hut_tsne.gradient(P.data, pos_output, neighbors, indptr, grad_bh, 0.5, 2, 1, skip_num_points=0) assert_array_almost_equal(grad_bh, grad_output, decimal=4)
Example #3
Source File: test_pairwise.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_paired_distances(metric, func): # Test the pairwise_distance helper function. rng = np.random.RandomState(0) # Euclidean distance should be equivalent to calling the function. X = rng.random_sample((5, 4)) # Euclidean distance, with Y != X. Y = rng.random_sample((5, 4)) S = paired_distances(X, Y, metric=metric) S2 = func(X, Y) assert_array_almost_equal(S, S2) S3 = func(csr_matrix(X), csr_matrix(Y)) assert_array_almost_equal(S, S3) if metric in PAIRWISE_DISTANCE_FUNCTIONS: # Check the pairwise_distances implementation # gives the same value distances = PAIRWISE_DISTANCE_FUNCTIONS[metric](X, Y) distances = np.diag(distances) assert_array_almost_equal(distances, S)
Example #4
Source File: main.py From FaceRecognition with MIT License | 6 votes |
def find_matching_ids(self, embs): if self.id_names: matching_ids = [] matching_distances = [] distance_matrix = pairwise_distances(embs, self.embeddings) for distance_row in distance_matrix: min_index = np.argmin(distance_row) if distance_row[min_index] < self.distance_treshold: matching_ids.append(self.id_names[min_index]) matching_distances.append(distance_row[min_index]) else: matching_ids.append(None) matching_distances.append(None) else: matching_ids = [None] * len(embs) matching_distances = [np.inf] * len(embs) return matching_ids, matching_distances
Example #5
Source File: test_t_sne.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_trustworthiness_precomputed_deprecation(): # FIXME: Remove this test in v0.23 # Use of the flag `precomputed` in trustworthiness parameters has been # deprecated, but will still work until v0.23. random_state = check_random_state(0) X = random_state.randn(100, 2) assert_equal(assert_warns(DeprecationWarning, trustworthiness, pairwise_distances(X), X, precomputed=True), 1.) assert_equal(assert_warns(DeprecationWarning, trustworthiness, pairwise_distances(X), X, metric='precomputed', precomputed=True), 1.) assert_raises(ValueError, assert_warns, DeprecationWarning, trustworthiness, X, X, metric='euclidean', precomputed=True) assert_equal(assert_warns(DeprecationWarning, trustworthiness, pairwise_distances(X), X, metric='euclidean', precomputed=True), 1.)
Example #6
Source File: test_t_sne.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def _run_answer_test(pos_input, pos_output, neighbors, grad_output, verbose=False, perplexity=0.1, skip_num_points=0): distances = pairwise_distances(pos_input).astype(np.float32) args = distances, perplexity, verbose pos_output = pos_output.astype(np.float32) neighbors = neighbors.astype(np.int64, copy=False) pij_input = _joint_probabilities(*args) pij_input = squareform(pij_input).astype(np.float32) grad_bh = np.zeros(pos_output.shape, dtype=np.float32) from scipy.sparse import csr_matrix P = csr_matrix(pij_input) neighbors = P.indices.astype(np.int64) indptr = P.indptr.astype(np.int64) _barnes_hut_tsne.gradient(P.data, pos_output, neighbors, indptr, grad_bh, 0.5, 2, 1, skip_num_points=0) assert_array_almost_equal(grad_bh, grad_output, decimal=4)
Example #7
Source File: TestMyself_Multithreading.py From MTCNN-VGG-face with MIT License | 6 votes |
def Calculate_Distance_1(dist1,dist2,metric,min_predicts,Lists_Num): global ThreadingState1 global ThreadingState2 ThreadingState1=0 ThreadingState2=0 i=0 for sublist in range(Lists_Num/2): predicts1 = pw.pairwise_distances(dist1[i], dist2, metric=metric) i=i+2 if predicts1[0][0] > 0.12: if ThreadingState2 is 1: break if predicts1[0][0] < min_predicts : min_predicts = predicts1[0][0] else: min_predicts = predicts1[0][0] ThreadingState1=1 break
Example #8
Source File: UDFS.py From scikit-feature with GNU General Public License v2.0 | 6 votes |
def construct_M(X, k, gamma): """ This function constructs the M matrix described in the paper """ n_sample, n_feature = X.shape Xt = X.T D = pairwise_distances(X) # sort the distance matrix D in ascending order idx = np.argsort(D, axis=1) # choose the k-nearest neighbors for each instance idx_new = idx[:, 0:k+1] H = np.eye(k+1) - 1/(k+1) * np.ones((k+1, k+1)) I = np.eye(k+1) Mi = np.zeros((n_sample, n_sample)) for i in range(n_sample): Xi = Xt[:, idx_new[i, :]] Xi_tilde =np.dot(Xi, H) Bi = np.linalg.inv(np.dot(Xi_tilde.T, Xi_tilde) + gamma*I) Si = np.zeros((n_sample, k+1)) for q in range(k+1): Si[idx_new[q], q] = 1 Mi = Mi + np.dot(np.dot(Si, np.dot(np.dot(H, Bi), H)), Si.T) M = np.dot(np.dot(X.T, Mi), X) return M
Example #9
Source File: density.py From modAL with MIT License | 6 votes |
def information_density(X: modALinput, metric: Union[str, Callable] = 'euclidean') -> np.ndarray: """ Calculates the information density metric of the given data using the given metric. Args: X: The data for which the information density is to be calculated. metric: The metric to be used. Should take two 1d numpy.ndarrays for argument. Todo: Should work with all possible modALinput. Perhaps refactor the module to use some stuff from sklearn.metrics.pairwise Returns: The information density for each sample. """ # inf_density = np.zeros(shape=(X.shape[0],)) # for X_idx, X_inst in enumerate(X): # inf_density[X_idx] = sum(similarity_measure(X_inst, X_j) for X_j in X) # # return inf_density/X.shape[0] similarity_mtx = 1/(1+pairwise_distances(X, X, metric=metric)) return similarity_mtx.mean(axis=1)
Example #10
Source File: test_neighbors.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_precomputed_cross_validation(): # Ensure array is split correctly rng = np.random.RandomState(0) X = rng.rand(20, 2) D = pairwise_distances(X, metric='euclidean') y = rng.randint(3, size=20) for Est in (neighbors.KNeighborsClassifier, neighbors.RadiusNeighborsClassifier, neighbors.KNeighborsRegressor, neighbors.RadiusNeighborsRegressor): metric_score = cross_val_score(Est(algorithm_params={'n_candidates': 5}), X, y) precomp_score = cross_val_score(Est(metric='precomputed', algorithm_params={'n_candidates': 5}, ), D, y) assert_array_equal(metric_score, precomp_score)
Example #11
Source File: bidirectional_eval.py From Cross-Modal-Projection-Learning with MIT License | 6 votes |
def _eval_retrieval(PX, PY, GX, GY): # D_{i, j} is the distance between the ith array from PX and the jth array from GX. D = pairwise_distances(PX, GX, metric=args.method, n_jobs=-2) Rank = np.argsort(D, axis=1) # Evaluation recall_1 = recall_at_k(Rank, PY, GY, k=1) # Recall @ K print "{:8}{:8.2%}".format('Recall@1', recall_1) recall_5 = recall_at_k(Rank, PY, GY, k=5) # Recall @ K print "{:8}{:8.2%}".format('Recall@5', recall_5) recall_10 = recall_at_k(Rank, PY, GY, k=10) # Recall @ K print "{:8}{:8.2%}".format('Recall@10', recall_10) map_value = mean_average_precision(Rank, PY, GY) # Mean Average Precision print "{:8}{:8.2%}".format('MAP', map_value) return recall_1, recall_5, recall_10, map_value
Example #12
Source File: k_medoids.py From alphacsc with BSD 3-Clause "New" or "Revised" License | 6 votes |
def transform(self, X): """Transforms X to cluster-distance space. Parameters ---------- X : {array-like, sparse matrix}, shape=(n_samples, n_features) Data to transform. Returns ------- X_new : {array-like, sparse matrix}, shape=(n_samples, n_clusters) X transformed in the new space of distances to cluster centers. """ X = check_array(X, accept_sparse=['csr', 'csc']) check_is_fitted(self, "cluster_centers_") if callable(self.distance_metric): return self.distance_metric(X, Y=self.cluster_centers_) else: return pairwise_distances(X, Y=self.cluster_centers_, metric=self.distance_metric)
Example #13
Source File: test_approximate.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_neighbors_accuracy_with_n_candidates(): # Checks whether accuracy increases as `n_candidates` increases. n_candidates_values = np.array([.1, 50, 500]) n_samples = 100 n_features = 10 n_iter = 10 n_points = 5 rng = np.random.RandomState(42) accuracies = np.zeros(n_candidates_values.shape[0], dtype=float) X = rng.rand(n_samples, n_features) for i, n_candidates in enumerate(n_candidates_values): lshf = ignore_warnings(LSHForest, category=DeprecationWarning)( n_candidates=n_candidates) ignore_warnings(lshf.fit)(X) for j in range(n_iter): query = X[rng.randint(0, n_samples)].reshape(1, -1) neighbors = lshf.kneighbors(query, n_neighbors=n_points, return_distance=False) distances = pairwise_distances(query, X, metric='cosine') ranks = np.argsort(distances)[0, :n_points] intersection = np.intersect1d(ranks, neighbors).shape[0] ratio = intersection / float(n_points) accuracies[i] = accuracies[i] + ratio accuracies[i] = accuracies[i] / float(n_iter) # Sorted accuracies should be equal to original accuracies print('accuracies:', accuracies) assert_true(np.all(np.diff(accuracies) >= 0), msg="Accuracies are not non-decreasing.") # Highest accuracy should be strictly greater than the lowest assert_true(np.ptp(accuracies) > 0, msg="Highest accuracy is not strictly greater than lowest.")
Example #14
Source File: test_pairwise.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_pairwise_callable_nonstrict_metric(): # paired_distances should allow callable metric where metric(x, x) != 0 # Knowing that the callable is a strict metric would allow the diagonal to # be left uncalculated and set to 0. assert_equal(pairwise_distances([[1.]], metric=lambda x, y: 5)[0, 0], 5)
Example #15
Source File: test_pairwise.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_pairwise_parallel(): wminkowski_kwds = {'w': np.arange(1, 5).astype('double'), 'p': 1} metrics = [(pairwise_distances, 'euclidean', {}), (pairwise_distances, wminkowski, wminkowski_kwds), (pairwise_distances, 'wminkowski', wminkowski_kwds), (pairwise_kernels, 'polynomial', {'degree': 1}), (pairwise_kernels, callable_rbf_kernel, {'gamma': .1}), ] for func, metric, kwds in metrics: yield check_pairwise_parallel, func, metric, kwds
Example #16
Source File: cosine.py From ibench with MIT License | 5 votes |
def _compute(self): self._cor_dist = pairwise_distances(self._X, metric='cosine', n_jobs=-1)
Example #17
Source File: test_neighbors.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_precomputed_cross_validation(): # Ensure array is split correctly rng = np.random.RandomState(0) X = rng.rand(20, 2) D = pairwise_distances(X, metric='euclidean') y = rng.randint(3, size=20) for Est in (neighbors.KNeighborsClassifier, neighbors.RadiusNeighborsClassifier, neighbors.KNeighborsRegressor, neighbors.RadiusNeighborsRegressor): metric_score = cross_val_score(Est(), X, y) precomp_score = cross_val_score(Est(metric='precomputed'), D, y) assert_array_equal(metric_score, precomp_score)
Example #18
Source File: corr.py From ibench with MIT License | 5 votes |
def _compute(self): self._cor_dist = pairwise_distances(self._X, metric='correlation', n_jobs=-1)
Example #19
Source File: RS-sklearn-rating.py From AiLearning with GNU General Public License v3.0 | 5 votes |
def calc_similarity(n_users, n_items, train_data, test_data): # 创建用户产品矩阵,针对测试数据和训练数据,创建两个矩阵: train_data_matrix = np.zeros((n_users, n_items)) for line in train_data.itertuples(): train_data_matrix[line[1] - 1, line[2] - 1] = line[3] test_data_matrix = np.zeros((n_users, n_items)) for line in test_data.itertuples(): test_data_matrix[line[1] - 1, line[2] - 1] = line[3] # 使用sklearn的pairwise_distances函数来计算余弦相似性。 print("1:", np.shape(train_data_matrix)) # 行: 人,列: 电影 print("2:", np.shape(train_data_matrix.T)) # 行: 电影,列: 人 user_similarity = pairwise_distances(train_data_matrix, metric="cosine") item_similarity = pairwise_distances(train_data_matrix.T, metric="cosine") print('开始统计流行item的数量...', file=sys.stderr) item_popular = {} # 统计在所有的用户中,不同电影的总出现次数 for i_index in range(n_items): if np.sum(train_data_matrix[:, i_index]) != 0: item_popular[i_index] = np.sum(train_data_matrix[:, i_index] != 0) # print "pop=", i_index, self.item_popular[i_index] # save the total number of items item_count = len(item_popular) print('总共流行item数量 = %d' % item_count, file=sys.stderr) return train_data_matrix, test_data_matrix, user_similarity, item_similarity, item_popular
Example #20
Source File: sklearn-RS-demo-cf-item-test.py From AiLearning with GNU General Public License v3.0 | 5 votes |
def calc_similarity(self): # 创建用户产品矩阵,针对测试数据和训练数据,创建两个矩阵: self.train_mat = np.zeros((self.n_users, self.n_items)) for line in self.train_data.itertuples(): self.train_mat[int(line.user_id) - 1, int(line.item_id) - 1] = float(line.rating) self.test_mat = np.zeros((self.n_users, self.n_items)) for line in self.test_data.itertuples(): # print "line", line.user_id-1, line.item_id-1, line.rating self.test_mat[int(line.user_id) - 1, int(line.item_id) - 1] = float(line.rating) # 使用sklearn的pairwise_distances函数来计算余弦相似性。 print("1:", np.shape(np.mat(self.train_mat).T)) # 行: 电影,列: 人 # 电影-电影-距离(1682, 1682) self.item_mat_similarity = pairwise_distances( np.mat(self.train_mat).T, metric='cosine') print('item_mat_similarity=', np.shape( self.item_mat_similarity), file=sys.stderr) print('开始统计流行item的数量...', file=sys.stderr) # 统计在所有的用户中,不同电影的总出现次数 for i_index in range(self.n_items): if np.sum(self.train_mat[:, i_index]) != 0: self.item_popular[i_index] = np.sum( self.train_mat[:, i_index] != 0) # print "pop=", i_index, self.item_popular[i_index] # save the total number of items self.item_count = len(self.item_popular) print('总共流行item数量 = %d' % self.item_count, file=sys.stderr) # @profile
Example #21
Source File: test_neighbors.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_pairwise_boolean_distance(): # Non-regression test for #4523 # 'brute': uses scipy.spatial.distance through pairwise_distances # 'ball_tree': uses sklearn.neighbors.dist_metrics rng = np.random.RandomState(0) X = rng.uniform(size=(6, 5)) NN = neighbors.NearestNeighbors nn1 = NN(metric="jaccard", algorithm='brute').fit(X) nn2 = NN(metric="jaccard", algorithm='ball_tree').fit(X) assert_array_equal(nn1.kneighbors(X)[0], nn2.kneighbors(X)[0])
Example #22
Source File: test_neighbors.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_non_euclidean_kneighbors(): rng = np.random.RandomState(0) X = rng.rand(5, 5) # Find a reasonable radius. dist_array = pairwise_distances(X).flatten() np.sort(dist_array) radius = dist_array[15] # Test kneighbors_graph for metric in ['manhattan', 'chebyshev']: nbrs_graph = neighbors.kneighbors_graph( X, 3, metric=metric, mode='connectivity', include_self=True).toarray() nbrs1 = neighbors.NearestNeighbors(3, metric=metric).fit(X) assert_array_equal(nbrs_graph, nbrs1.kneighbors_graph(X).toarray()) # Test radiusneighbors_graph for metric in ['manhattan', 'chebyshev']: nbrs_graph = neighbors.radius_neighbors_graph( X, radius, metric=metric, mode='connectivity', include_self=True).toarray() nbrs1 = neighbors.NearestNeighbors(metric=metric, radius=radius).fit(X) assert_array_equal(nbrs_graph, nbrs1.radius_neighbors_graph(X).A) # Raise error when wrong parameters are supplied, X_nbrs = neighbors.NearestNeighbors(3, metric='manhattan') X_nbrs.fit(X) assert_raises(ValueError, neighbors.kneighbors_graph, X_nbrs, 3, metric='euclidean') X_nbrs = neighbors.NearestNeighbors(radius=radius, metric='manhattan') X_nbrs.fit(X) assert_raises(ValueError, neighbors.radius_neighbors_graph, X_nbrs, radius, metric='euclidean')
Example #23
Source File: test_neighbors.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_non_euclidean_kneighbors(): rng = np.random.RandomState(0) X = rng.rand(5, 5) # Find a reasonable radius. dist_array = pairwise_distances(X).flatten() np.sort(dist_array) radius = dist_array[15] # Test kneighbors_graph for metric in ['manhattan', 'chebyshev']: nbrs_graph = neighbors.kneighbors_graph( X, 3, metric=metric, mode='connectivity', include_self=True).toarray() nbrs1 = neighbors.NearestNeighbors(3, metric=metric).fit(X) assert_array_equal(nbrs_graph, nbrs1.kneighbors_graph(X).toarray()) # Test radiusneighbors_graph for metric in ['manhattan', 'chebyshev']: nbrs_graph = neighbors.radius_neighbors_graph( X, radius, metric=metric, mode='connectivity', include_self=True).toarray() nbrs1 = neighbors.NearestNeighbors(metric=metric, radius=radius).fit(X) assert_array_equal(nbrs_graph, nbrs1.radius_neighbors_graph(X).A) # Raise error when wrong parameters are supplied, X_nbrs = neighbors.NearestNeighbors(3, metric='manhattan') X_nbrs.fit(X) assert_raises(ValueError, neighbors.kneighbors_graph, X_nbrs, 3, metric='euclidean') X_nbrs = neighbors.NearestNeighbors(radius=radius, metric='manhattan') X_nbrs.fit(X) assert_raises(ValueError, neighbors.radius_neighbors_graph, X_nbrs, radius, metric='euclidean')
Example #24
Source File: test_neighbors.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_kneighbors_regressor_sparse(sparsemat, n_samples=40, n_features=5, n_neighbors=5, random_state=0): # Test radius-based regression on sparse matrices # Like the above, but with various types of sparse matrices rng = np.random.RandomState(random_state) X = 2 * rng.rand(n_samples, n_features) - 1 y = ((X ** 2).sum(axis=1) < .25).astype(np.int) knn = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors, algorithm='auto') knn.fit(sparsemat(X), y) knn_pre = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors, metric='precomputed') knn_pre.fit(pairwise_distances(X, metric='euclidean'), y) for sparsev in SPARSE_OR_DENSE: X2 = sparsev(X) assert np.mean(knn.predict(X2).round() == y) > 0.95 X2_pre = sparsev(pairwise_distances(X, metric='euclidean')) if issparse(sparsev(X2_pre)): assert_raises(ValueError, knn_pre.predict, X2_pre) else: assert np.mean(knn_pre.predict(X2_pre).round() == y) > 0.95
Example #25
Source File: test_neighbors.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_pairwise_boolean_distance(): # Non-regression test for #4523 # 'brute': uses scipy.spatial.distance through pairwise_distances # 'ball_tree': uses sklearn.neighbors.dist_metrics rng = np.random.RandomState(0) X = rng.uniform(size=(6, 5)) NN = neighbors.NearestNeighbors nn1 = NN(metric="jaccard", algorithm='brute').fit(X) nn2 = NN(metric="jaccard", algorithm='ball_tree').fit(X) assert_array_equal(nn1.kneighbors(X)[0], nn2.kneighbors(X)[0])
Example #26
Source File: labeled_eval.py From multilabel-image-classification-tensorflow with MIT License | 5 votes |
def nearest_cross_sequence_neighbors(data, tasks, n_neighbors=1): """Computes the n_neighbors nearest neighbors for every row in data. Args: data: A np.float32 array of shape [num_data, embedding size] holding an embedded validation / test dataset. tasks: A list of strings of size [num_data] holding the task or sequence name that each row belongs to. n_neighbors: The number of knn indices to return for each row. Returns: indices: an np.int32 array of size [num_data, n_neighbors] holding the n_neighbors nearest indices for every row in data. These are restricted to be from different named sequences (as defined in `tasks`). """ # Compute the pairwise sequence adjacency matrix from `tasks`. num_data = data.shape[0] tasks = np.array(tasks) tasks = np.reshape(tasks, (num_data, 1)) assert len(tasks.shape) == 2 not_adjacent = (tasks != tasks.T) # Compute the symmetric pairwise distance matrix. pdist = pairwise_distances(data, metric='sqeuclidean') # For every row in the pairwise distance matrix, only consider # cross-sequence columns. indices = np.zeros((num_data, n_neighbors), dtype=np.int32) for idx in range(num_data): # Restrict to cross_sequence neighbors. distances = [( pdist[idx][i], i) for i in xrange(num_data) if not_adjacent[idx][i]] _, nearest_indices = zip(*sorted( distances, key=lambda x: x[0])[:n_neighbors]) indices[idx] = nearest_indices return indices
Example #27
Source File: k_medoids.py From alphacsc with BSD 3-Clause "New" or "Revised" License | 5 votes |
def predict(self, X): """Predict the closest cluster for each sample in X Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] New data to predict. Returns ------- labels : array, shape [n_samples,] Index of the cluster each sample belongs to. """ check_is_fitted(self, "cluster_centers_") X = check_array(X, accept_sparse=['csr', 'csc']) if callable(self.distance_metric): distances = self.distance_metric(X, Y=self.cluster_centers_) else: distances = pairwise_distances(X, Y=self.cluster_centers_, metric=self.distance_metric) # Assign data points to clusters based on which cluster assignment # yields the smallest distance labels = np.argmin(distances, axis=1) return labels
Example #28
Source File: main.py From FaceRecognition with MIT License | 5 votes |
def print_distance_table(self, id_image_paths): """Prints distances between id embeddings""" distance_matrix = pairwise_distances(self.embeddings, self.embeddings) image_names = [path.split("/")[-1] for path in id_image_paths] print("Distance matrix:\n{:20}".format(""), end="") [print("{:20}".format(name), end="") for name in image_names] for path, distance_row in zip(image_names, distance_matrix): print("\n{:20}".format(path), end="") for distance in distance_row: print("{:20}".format("%0.3f" % distance), end="") print()
Example #29
Source File: rocchioclassifier.py From Quadflor with BSD 3-Clause "New" or "Revised" License | 5 votes |
def predict_proba(self, X): """ Returns a matrix for each of the samples to belong to each of the classes. The matrix has shape = [n_samples, n_classes] where n_samples is the size of the first dimension of the input matrix X and n_classes is the number of classes as determined from the parameter 'y' obtained during training. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Prediction vector, where n_samples in the number of samples and n_features is the number of features. """ probabilities = np.zeros((X.shape[0], self.y.shape[1]), dtype=np.float64) distances = (pairwise_distances(X, self.centroids_, metric=self.metric)) # in order to get probability like values, we ensure that the closer # the distance is to zero, the closer the probability is to 1 if(self.metric == 'cosine'): distances = 1 - distances else: # in the case of euclidean distance metric we need to normalize by the largest distance # to get a value between 0 and 1 distances = 1 - (distances / distances.max()) # map back onto a matrix containing all labels probabilities[:,self._mem_original_mapping] = distances return probabilities
Example #30
Source File: ICGS_NMF.py From altanalyze with Apache License 2.0 | 5 votes |
def caldist(X,i,keys,keylist): D=[] Xxd=[] newlist=[] #for i in range(len(visited)): #Xd=np.array(X[i]) #Xd=Xd.reshape(1, -1) for ii in keys: if ii==i: continue newlist.append(ii) Xxd.append(X[ii].tolist()) Xxd=np.array(Xxd) Xd=X[i] #Xd=Xxd #Xxd=Xxd.tolist() Xd=Xd.reshape(1, -1) D=pairwise_distances(Xd,Xxd,metric='euclidean').tolist() for q in range(len(np.argsort(D)[0])): if newlist[q] in keylist: continue else: key1=newlist[q] break return key1