# Python sklearn.metrics.pairwise.pairwise_distances() Examples

The following are 30 code examples of sklearn.metrics.pairwise.pairwise_distances(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module , or try the search function .
Example #1
```def predict(self, X):
"""
Classify the input data assigning the label of the nearest prototype

Keyword arguments:
X -- The feature vectors
"""
classification=np.zeros(len(X))

if self.distance_metric=="euclidean":
distances=pairwise_distances(X, self.M_,self.distance_metric)									#compute distances to the prototypes (template matching)
if self.distance_metric=="minkowski":
distances=pairwise_distances(X, self.M_,self.distance_metric)
elif self.distance_metric=="manhattan":
distances=pairwise_distances(X, self.M_,self.distance_metric)
elif self.distance_metric=="mahalanobis":
distances=pairwise_distances(X, self.M_,self.distance_metric)
else:
distances=pairwise_distances(X, self.M_,"euclidean")

for i in xrange(len(X)):
classification[i]=self.outcomes[distances[i].tolist().index(min(distances[i]))]					#choose the class belonging to nearest prototype distance

return classification ```
Example #2
```def test_paired_distances(metric, func):
# Test the pairwise_distance helper function.
rng = np.random.RandomState(0)
# Euclidean distance should be equivalent to calling the function.
X = rng.random_sample((5, 4))
# Euclidean distance, with Y != X.
Y = rng.random_sample((5, 4))

S = paired_distances(X, Y, metric=metric)
S2 = func(X, Y)
assert_array_almost_equal(S, S2)
S3 = func(csr_matrix(X), csr_matrix(Y))
assert_array_almost_equal(S, S3)
if metric in PAIRWISE_DISTANCE_FUNCTIONS:
# Check the pairwise_distances implementation
# gives the same value
distances = PAIRWISE_DISTANCE_FUNCTIONS[metric](X, Y)
distances = np.diag(distances)
assert_array_almost_equal(distances, S) ```
Example #3
```def test_trustworthiness_precomputed_deprecation():
# FIXME: Remove this test in v0.23

# Use of the flag `precomputed` in trustworthiness parameters has been
# deprecated, but will still work until v0.23.
random_state = check_random_state(0)
X = random_state.randn(100, 2)
assert_equal(assert_warns(DeprecationWarning, trustworthiness,
pairwise_distances(X), X, precomputed=True), 1.)
assert_equal(assert_warns(DeprecationWarning, trustworthiness,
pairwise_distances(X), X, metric='precomputed',
precomputed=True), 1.)
assert_raises(ValueError, assert_warns, DeprecationWarning,
trustworthiness, X, X, metric='euclidean', precomputed=True)
assert_equal(assert_warns(DeprecationWarning, trustworthiness,
pairwise_distances(X), X, metric='euclidean',
precomputed=True), 1.) ```
Example #4
```def _run_answer_test(pos_input, pos_output, neighbors, grad_output,
verbose=False, perplexity=0.1, skip_num_points=0):
distances = pairwise_distances(pos_input).astype(np.float32)
args = distances, perplexity, verbose
pos_output = pos_output.astype(np.float32)
neighbors = neighbors.astype(np.int64, copy=False)
pij_input = _joint_probabilities(*args)
pij_input = squareform(pij_input).astype(np.float32)

from scipy.sparse import csr_matrix
P = csr_matrix(pij_input)

neighbors = P.indices.astype(np.int64)
indptr = P.indptr.astype(np.int64)

Example #5
```def construct_M(X, k, gamma):
"""
This function constructs the M matrix described in the paper
"""
n_sample, n_feature = X.shape
Xt = X.T
D = pairwise_distances(X)
# sort the distance matrix D in ascending order
idx = np.argsort(D, axis=1)
# choose the k-nearest neighbors for each instance
idx_new = idx[:, 0:k+1]
H = np.eye(k+1) - 1/(k+1) * np.ones((k+1, k+1))
I = np.eye(k+1)
Mi = np.zeros((n_sample, n_sample))
for i in range(n_sample):
Xi = Xt[:, idx_new[i, :]]
Xi_tilde =np.dot(Xi, H)
Bi = np.linalg.inv(np.dot(Xi_tilde.T, Xi_tilde) + gamma*I)
Si = np.zeros((n_sample, k+1))
for q in range(k+1):
Si[idx_new[q], q] = 1
Mi = Mi + np.dot(np.dot(Si, np.dot(np.dot(H, Bi), H)), Si.T)
M = np.dot(np.dot(X.T, Mi), X)
return M ```
Example #6
```def information_density(X: modALinput, metric: Union[str, Callable] = 'euclidean') -> np.ndarray:
"""
Calculates the information density metric of the given data using the given metric.

Args:
X: The data for which the information density is to be calculated.
metric: The metric to be used. Should take two 1d numpy.ndarrays for argument.

Todo:
Should work with all possible modALinput.
Perhaps refactor the module to use some stuff from sklearn.metrics.pairwise

Returns:
The information density for each sample.
"""
# inf_density = np.zeros(shape=(X.shape[0],))
# for X_idx, X_inst in enumerate(X):
#     inf_density[X_idx] = sum(similarity_measure(X_inst, X_j) for X_j in X)
#
# return inf_density/X.shape[0]

similarity_mtx = 1/(1+pairwise_distances(X, X, metric=metric))

return similarity_mtx.mean(axis=1) ```
Example #7
```def _eval_retrieval(PX, PY, GX, GY):

# D_{i, j} is the distance between the ith array from PX and the jth array from GX.
D = pairwise_distances(PX, GX, metric=args.method, n_jobs=-2)
Rank = np.argsort(D, axis=1)

# Evaluation
recall_1 = recall_at_k(Rank, PY, GY, k=1)  # Recall @ K
print "{:8}{:8.2%}".format('Recall@1', recall_1)

recall_5 = recall_at_k(Rank, PY, GY, k=5)  # Recall @ K
print "{:8}{:8.2%}".format('Recall@5', recall_5)

recall_10 = recall_at_k(Rank, PY, GY, k=10)  # Recall @ K
print "{:8}{:8.2%}".format('Recall@10', recall_10)

map_value = mean_average_precision(Rank, PY, GY)  # Mean Average Precision
print "{:8}{:8.2%}".format('MAP', map_value)

return recall_1, recall_5, recall_10, map_value ```
Example #8
```def transform(self, X):
"""Transforms X to cluster-distance space.

Parameters
----------
X : {array-like, sparse matrix}, shape=(n_samples, n_features)
Data to transform.

Returns
-------
X_new : {array-like, sparse matrix}, shape=(n_samples, n_clusters)
X transformed in the new space of distances to cluster centers.
"""
X = check_array(X, accept_sparse=['csr', 'csc'])
check_is_fitted(self, "cluster_centers_")

if callable(self.distance_metric):
return self.distance_metric(X, Y=self.cluster_centers_)
else:
return pairwise_distances(X, Y=self.cluster_centers_,
metric=self.distance_metric) ```
Example #9
```def test_precomputed_cross_validation():
# Ensure array is split correctly
rng = np.random.RandomState(0)
X = rng.rand(20, 2)
D = pairwise_distances(X, metric='euclidean')
y = rng.randint(3, size=20)
for Est in (neighbors.KNeighborsClassifier,
neighbors.KNeighborsRegressor,
metric_score = cross_val_score(Est(algorithm_params={'n_candidates': 5}), X, y)
precomp_score = cross_val_score(Est(metric='precomputed',
algorithm_params={'n_candidates': 5},
),
D, y)
assert_array_equal(metric_score, precomp_score) ```
Example #10
```def Calculate_Distance_1(dist1,dist2,metric,min_predicts,Lists_Num):
i=0
for sublist in range(Lists_Num/2):
predicts1 = pw.pairwise_distances(dist1[i], dist2, metric=metric)
i=i+2
if predicts1[0][0] > 0.12:
break
if predicts1[0][0] < min_predicts :
min_predicts = predicts1[0][0]

else:
min_predicts = predicts1[0][0]
break ```
Example #11
```def _run_answer_test(pos_input, pos_output, neighbors, grad_output,
verbose=False, perplexity=0.1, skip_num_points=0):
distances = pairwise_distances(pos_input).astype(np.float32)
args = distances, perplexity, verbose
pos_output = pos_output.astype(np.float32)
neighbors = neighbors.astype(np.int64)
pij_input = _joint_probabilities(*args)
pij_input = squareform(pij_input).astype(np.float32)

from scipy.sparse import csr_matrix
P = csr_matrix(pij_input)

neighbors = P.indices.astype(np.int64)
indptr = P.indptr.astype(np.int64)

Example #12
```def find_matching_ids(self, embs):
if self.id_names:
matching_ids = []
matching_distances = []
distance_matrix = pairwise_distances(embs, self.embeddings)
for distance_row in distance_matrix:
min_index = np.argmin(distance_row)
if distance_row[min_index] < self.distance_treshold:
matching_ids.append(self.id_names[min_index])
matching_distances.append(distance_row[min_index])
else:
matching_ids.append(None)
matching_distances.append(None)
else:
matching_ids = [None] * len(embs)
matching_distances = [np.inf] * len(embs)
return matching_ids, matching_distances ```
Example #13
```def dendrogram(data,
vectorizer,
method="ward",
color_threshold=1,
size=10,
filename=None):
"""dendrogram.

"median","centroid","weighted","single","ward","complete","average"
"""
data = list(data)
# get labels
labels = []
for graph in data:
label = graph.graph.get('id', None)
if label:
labels.append(label)
# transform input into sparse vectors
data_matrix = vectorizer.transform(data)

# labels
if not labels:
labels = [str(i) for i in range(data_matrix.shape[0])]

# embed high dimensional sparse vectors in 2D
from sklearn import metrics
distance_matrix = metrics.pairwise.pairwise_distances(data_matrix)
plt.figure(figsize=(size, size))
color_threshold=color_threshold,
labels=labels,
orientation='right')
if filename is not None:
plt.savefig(filename)
else:
plt.show() ```
Example #14
```def gs_exact(X, N, k='auto', seed=None, replace=False,
tol=1e-3, n_iter=300, verbose=1):
ge_idx = gs(X, N, replace=replace)

dist = pairwise_distances(X, n_jobs=-1)

cost = dist.max()

iter_i = 0

while iter_i < n_iter:

if verbose:
log('iter_i = {}'.format(iter_i))

labels = np.argmin(dist[ge_idx, :], axis=0)

ge_idx_new = []
for cluster in range(N):
cluster_idx = np.nonzero(labels == cluster)[0]
if len(cluster_idx) == 0:
ge_idx_new.append(ge_idx[cluster])
continue
X_cluster = dist[cluster_idx, :]
X_cluster = X_cluster[:, cluster_idx]
within_idx = np.argmin(X_cluster.max(0))
ge_idx_new.append(cluster_idx[within_idx])
ge_idx = ge_idx_new

cost, prev_cost = dist[ge_idx, :].min(0).max(), cost
assert(cost <= prev_cost)

if prev_cost - cost < tol:
break

iter_i += 1

return ge_idx ```
Example #15
```def fisher(yhat,y,samples=False):
"""Fisher criterion"""
classes = np.unique(y)
mu = np.zeros(len(classes))
v = np.zeros(len(classes))
# pdb.set_trace()
for c in classes.astype(int):
mu[c] = np.mean(yhat[y==c])
v[c] = np.var(yhat[y==c])

if not samples:
fisher = 0
for c1,c2 in pairwise(classes.astype(int)):
fisher += np.abs(mu[c1] - mu[c2])/np.sqrt(v[c1]+v[c2])
else:
# lexicase version
fisher = np.zeros(len(yhat))
# get closests classes to each class (min mu distance)
mu_d = pairwise_distances(mu.reshape(-1,1))
min_mu=np.zeros(len(classes),dtype=int)
for i in np.arange(len(min_mu)):
min_mu[i] = np.argsort(mu_d[i])[1]
# for c1, pairwise(classes.astype(int)):
#     min_mu[c1] = np.argmin()
for i,l in enumerate(yhat.astype(int)):
fisher[i] = np.abs(l - mu[min_mu[y[i]]])/np.sqrt(v[y[i]]+v[min_mu[y[i]]])

# pdb.set_trace()
return fisher ```
Example #16
```def compute_cos_dis(featA, featB):
return np.exp(-skp.pairwise_distances(featA, featB)) ```
Example #17
```def test_pairwise_boolean_distance(metric):
# test that we convert to boolean arrays for boolean distances
rng = np.random.RandomState(0)
X = rng.randn(5, 4)
Y = X.copy()
Y[0, 0] = 1 - Y[0, 0]

# ignore conversion to boolean in pairwise_distances
with ignore_warnings(category=DataConversionWarning):
for Z in [Y, None]:
res = pairwise_distances(X, Z, metric=metric)
res[np.isnan(res)] = 0
assert np.sum(res != 0) == 0

# non-boolean arrays are converted to boolean for boolean
# distance metrics with a data conversion warning
msg = "Data was converted to boolean for metric %s" % metric
with pytest.warns(DataConversionWarning, match=msg):
pairwise_distances(X, metric=metric)

# Check that the warning is raised if X is boolean by Y is not boolean:
with pytest.warns(DataConversionWarning, match=msg):
pairwise_distances(X.astype(bool), Y=Y, metric=metric)

# Check that no warning is raised if X is already boolean and Y is None:
with pytest.warns(None) as records:
pairwise_distances(X.astype(bool), metric=metric)
assert len(records) == 0 ```
Example #18
```def test_no_data_conversion_warning():
# No warnings issued if metric is not a boolean distance function
rng = np.random.RandomState(0)
X = rng.randn(5, 4)
with pytest.warns(None) as records:
pairwise_distances(X, metric="minkowski")
assert len(records) == 0 ```
Example #19
```def test_pairwise_precomputed_non_negative():
# Test non-negative values
assert_raises_regexp(ValueError, '.* non-negative values.*',
pairwise_distances, np.full((5, 5), -1),
metric='precomputed') ```
Example #20
```def test_pairwise_callable_nonstrict_metric():
# paired_distances should allow callable metric where metric(x, x) != 0
# Knowing that the callable is a strict metric would allow the diagonal to
# be left uncalculated and set to 0.
assert_equal(pairwise_distances([[1.]], metric=lambda x, y: 5)[0, 0], 5)

# Test with all metrics that should be in PAIRWISE_KERNEL_FUNCTIONS. ```
Example #21
```def check_pairwise_distances_chunked(X, Y, working_memory, metric='euclidean'):
gen = pairwise_distances_chunked(X, Y, working_memory=working_memory,
metric=metric)
assert isinstance(gen, GeneratorType)
blockwise_distances = list(gen)
Y = X if Y is None else Y
min_block_mib = len(Y) * 8 * 2 ** -20

for block in blockwise_distances:
memory_used = block.nbytes
assert memory_used <= max(working_memory, min_block_mib) * 2 ** 20

blockwise_distances = np.vstack(blockwise_distances)
S = pairwise_distances(X, Y, metric=metric)
assert_array_almost_equal(blockwise_distances, S) ```
Example #22
```def test_parallel_pairwise_distances_diagonal(metric):
rng = np.random.RandomState(0)
X = rng.normal(size=(1000, 10), scale=1e10)
distances = pairwise_distances(X, metric=metric, n_jobs=2)
assert_allclose(np.diag(distances), 0, atol=1e-10) ```
Example #23
```def test_pairwise_distances_chunked():
# Test the pairwise_distance helper function.
rng = np.random.RandomState(0)
# Euclidean distance should be equivalent to calling the function.
X = rng.random_sample((400, 4))
check_pairwise_distances_chunked(X, None, working_memory=1,
metric='euclidean')
# Test small amounts of memory
for power in range(-16, 0):
check_pairwise_distances_chunked(X, None, working_memory=2 ** power,
metric='euclidean')
# X as list
check_pairwise_distances_chunked(X.tolist(), None, working_memory=1,
metric='euclidean')
# Euclidean distance, with Y != X.
Y = rng.random_sample((200, 4))
check_pairwise_distances_chunked(X, Y, working_memory=1,
metric='euclidean')
check_pairwise_distances_chunked(X.tolist(), Y.tolist(), working_memory=1,
metric='euclidean')
# absurdly large working_memory
check_pairwise_distances_chunked(X, Y, working_memory=10000,
metric='euclidean')
# "cityblock" uses scikit-learn metric, cityblock (function) is
# scipy.spatial.
check_pairwise_distances_chunked(X, Y, working_memory=1,
metric='cityblock')
# Test that a value error is raised if the metric is unknown
assert_raises(ValueError, next,
pairwise_distances_chunked(X, Y, metric="blah"))

# Test precomputed returns all at once
D = pairwise_distances(X)
gen = pairwise_distances_chunked(D,
working_memory=2 ** -16,
metric='precomputed')
assert isinstance(gen, GeneratorType)
assert next(gen) is D
assert_raises(StopIteration, next, gen) ```
Example #24
```def test_pairwise_distances_data_derived_params(n_jobs, metric, dist_function,
y_is_x):
# check that pairwise_distances give the same result in sequential and
# parallel, when metric has data-derived parameters.
with config_context(working_memory=1):  # to have more than 1 chunk
rng = np.random.RandomState(0)
X = rng.random_sample((1000, 10))

if y_is_x:
Y = X
expected_dist_default_params = squareform(pdist(X, metric=metric))
if metric == "seuclidean":
params = {'V': np.var(X, axis=0, ddof=1)}
else:
params = {'VI': np.linalg.inv(np.cov(X.T)).T}
else:
Y = rng.random_sample((1000, 10))
expected_dist_default_params = cdist(X, Y, metric=metric)
if metric == "seuclidean":
params = {'V': np.var(np.vstack([X, Y]), axis=0, ddof=1)}
else:
params = {'VI': np.linalg.inv(np.cov(np.vstack([X, Y]).T)).T}

expected_dist_explicit_params = cdist(X, Y, metric=metric, **params)
dist = np.vstack(tuple(dist_function(X, Y,
metric=metric, n_jobs=n_jobs)))

assert_allclose(dist, expected_dist_explicit_params)
assert_allclose(dist, expected_dist_default_params) ```
Example #25
```def test_dbscan_sparse_precomputed(include_self):
D = pairwise_distances(X)
X_ = X if include_self else None
# Ensure it is sparse not merely on diagonals:
assert D_sparse.nnz < D.shape[0] * (D.shape[0] - 1)
core_sparse, labels_sparse = dbscan(D_sparse,
eps=.8,
min_samples=10,
metric='precomputed')
core_dense, labels_dense = dbscan(D, eps=.8, min_samples=10,
metric='precomputed')
assert_array_equal(core_dense, core_sparse)
assert_array_equal(labels_dense, labels_sparse) ```
Example #26
```def test_dbscan_balltree():
# Tests the DBSCAN algorithm with balltree for neighbor calculation.
eps = 0.8
min_samples = 10

D = pairwise_distances(X)
core_samples, labels = dbscan(D, metric="precomputed", eps=eps,
min_samples=min_samples)

# number of clusters, ignoring noise if present
n_clusters_1 = len(set(labels)) - int(-1 in labels)
assert_equal(n_clusters_1, n_clusters)

db = DBSCAN(p=2.0, eps=eps, min_samples=min_samples, algorithm='ball_tree')
labels = db.fit(X).labels_

n_clusters_2 = len(set(labels)) - int(-1 in labels)
assert_equal(n_clusters_2, n_clusters)

db = DBSCAN(p=2.0, eps=eps, min_samples=min_samples, algorithm='kd_tree')
labels = db.fit(X).labels_

n_clusters_3 = len(set(labels)) - int(-1 in labels)
assert_equal(n_clusters_3, n_clusters)

db = DBSCAN(p=1.0, eps=eps, min_samples=min_samples, algorithm='ball_tree')
labels = db.fit(X).labels_

n_clusters_4 = len(set(labels)) - int(-1 in labels)
assert_equal(n_clusters_4, n_clusters)

db = DBSCAN(leaf_size=20, eps=eps, min_samples=min_samples,
algorithm='ball_tree')
labels = db.fit(X).labels_

n_clusters_5 = len(set(labels)) - int(-1 in labels)
assert_equal(n_clusters_5, n_clusters) ```
Example #27
```def test_precomputed_dists():
redX = X[::2]
dists = pairwise_distances(redX, metric='euclidean')
clust1 = OPTICS(min_samples=10, algorithm='brute',
metric='precomputed').fit(dists)
clust2 = OPTICS(min_samples=10, algorithm='brute',
metric='euclidean').fit(redX)

assert_allclose(clust1.reachability_, clust2.reachability_)
assert_array_equal(clust1.labels_, clust2.labels_) ```
Example #28
```def test_kneighbors_regressor_sparse(n_samples=40,
n_features=5,
n_test_pts=10,
n_neighbors=5,
random_state=0):
# Test radius-based regression on sparse matrices
# Like the above, but with various types of sparse matrices
rng = np.random.RandomState(random_state)
X = 2 * rng.rand(n_samples, n_features) - 1
y = ((X ** 2).sum(axis=1) < .25).astype(np.int)

for sparsemat in SPARSE_TYPES:
knn = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors,
algorithm='auto')
knn.fit(sparsemat(X), y)

knn_pre = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors,
metric='precomputed')
knn_pre.fit(pairwise_distances(X, metric='euclidean'), y)

for sparsev in SPARSE_OR_DENSE:
X2 = sparsev(X)
assert np.mean(knn.predict(X2).round() == y) > 0.95

X2_pre = sparsev(pairwise_distances(X, metric='euclidean'))
if issparse(sparsev(X2_pre)):
assert_raises(ValueError, knn_pre.predict, X2_pre)
else:
assert np.mean(knn_pre.predict(X2_pre).round() == y) > 0.95 ```
Example #29
```def test_non_euclidean_kneighbors():
rng = np.random.RandomState(0)
X = rng.rand(5, 5)

dist_array = pairwise_distances(X).flatten()
np.sort(dist_array)

# Test kneighbors_graph
for metric in ['manhattan', 'chebyshev']:
nbrs_graph = neighbors.kneighbors_graph(
X, 3, metric=metric, mode='connectivity',
include_self=True).toarray()
nbrs1 = neighbors.NearestNeighbors(3, metric=metric).fit(X)
assert_array_equal(nbrs_graph, nbrs1.kneighbors_graph(X).toarray())

for metric in ['manhattan', 'chebyshev']:
include_self=True).toarray()

# Raise error when wrong parameters are supplied,
X_nbrs = neighbors.NearestNeighbors(3, metric='manhattan')
X_nbrs.fit(X)
assert_raises(ValueError, neighbors.kneighbors_graph, X_nbrs, 3,
metric='euclidean')
X_nbrs.fit(X)
```def test_pairwise_boolean_distance():