Python sklearn.metrics.pairwise.pairwise_distances() Examples

The following are 30 code examples of sklearn.metrics.pairwise.pairwise_distances(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.metrics.pairwise , or try the search function .
Example #1
Source Project: default-credit-card-prediction   Author: alexpnt   File: classification.py    License: MIT License 7 votes vote down vote up
def predict(self, X):
		"""
		Classify the input data assigning the label of the nearest prototype

		Keyword arguments:
		X -- The feature vectors
		"""
		classification=np.zeros(len(X))

		if self.distance_metric=="euclidean":
			distances=pairwise_distances(X, self.M_,self.distance_metric)									#compute distances to the prototypes (template matching)
		if self.distance_metric=="minkowski":
			distances=pairwise_distances(X, self.M_,self.distance_metric)	
		elif self.distance_metric=="manhattan":
			distances=pairwise_distances(X, self.M_,self.distance_metric)
		elif self.distance_metric=="mahalanobis":
			distances=pairwise_distances(X, self.M_,self.distance_metric)
		else:
			distances=pairwise_distances(X, self.M_,"euclidean")

		for i in xrange(len(X)):
			classification[i]=self.outcomes[distances[i].tolist().index(min(distances[i]))]					#choose the class belonging to nearest prototype distance

		return classification 
Example #2
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_pairwise.py    License: MIT License 6 votes vote down vote up
def test_paired_distances(metric, func):
    # Test the pairwise_distance helper function.
    rng = np.random.RandomState(0)
    # Euclidean distance should be equivalent to calling the function.
    X = rng.random_sample((5, 4))
    # Euclidean distance, with Y != X.
    Y = rng.random_sample((5, 4))

    S = paired_distances(X, Y, metric=metric)
    S2 = func(X, Y)
    assert_array_almost_equal(S, S2)
    S3 = func(csr_matrix(X), csr_matrix(Y))
    assert_array_almost_equal(S, S3)
    if metric in PAIRWISE_DISTANCE_FUNCTIONS:
        # Check the pairwise_distances implementation
        # gives the same value
        distances = PAIRWISE_DISTANCE_FUNCTIONS[metric](X, Y)
        distances = np.diag(distances)
        assert_array_almost_equal(distances, S) 
Example #3
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_t_sne.py    License: MIT License 6 votes vote down vote up
def test_trustworthiness_precomputed_deprecation():
    # FIXME: Remove this test in v0.23

    # Use of the flag `precomputed` in trustworthiness parameters has been
    # deprecated, but will still work until v0.23.
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    assert_equal(assert_warns(DeprecationWarning, trustworthiness,
                              pairwise_distances(X), X, precomputed=True), 1.)
    assert_equal(assert_warns(DeprecationWarning, trustworthiness,
                              pairwise_distances(X), X, metric='precomputed',
                              precomputed=True), 1.)
    assert_raises(ValueError, assert_warns, DeprecationWarning,
                  trustworthiness, X, X, metric='euclidean', precomputed=True)
    assert_equal(assert_warns(DeprecationWarning, trustworthiness,
                              pairwise_distances(X), X, metric='euclidean',
                              precomputed=True), 1.) 
Example #4
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_t_sne.py    License: MIT License 6 votes vote down vote up
def _run_answer_test(pos_input, pos_output, neighbors, grad_output,
                     verbose=False, perplexity=0.1, skip_num_points=0):
    distances = pairwise_distances(pos_input).astype(np.float32)
    args = distances, perplexity, verbose
    pos_output = pos_output.astype(np.float32)
    neighbors = neighbors.astype(np.int64, copy=False)
    pij_input = _joint_probabilities(*args)
    pij_input = squareform(pij_input).astype(np.float32)
    grad_bh = np.zeros(pos_output.shape, dtype=np.float32)

    from scipy.sparse import csr_matrix
    P = csr_matrix(pij_input)

    neighbors = P.indices.astype(np.int64)
    indptr = P.indptr.astype(np.int64)

    _barnes_hut_tsne.gradient(P.data, pos_output, neighbors, indptr,
                              grad_bh, 0.5, 2, 1, skip_num_points=0)
    assert_array_almost_equal(grad_bh, grad_output, decimal=4) 
Example #5
Source Project: scikit-feature   Author: jundongl   File: UDFS.py    License: GNU General Public License v2.0 6 votes vote down vote up
def construct_M(X, k, gamma):
    """
    This function constructs the M matrix described in the paper
    """
    n_sample, n_feature = X.shape
    Xt = X.T
    D = pairwise_distances(X)
    # sort the distance matrix D in ascending order
    idx = np.argsort(D, axis=1)
    # choose the k-nearest neighbors for each instance
    idx_new = idx[:, 0:k+1]
    H = np.eye(k+1) - 1/(k+1) * np.ones((k+1, k+1))
    I = np.eye(k+1)
    Mi = np.zeros((n_sample, n_sample))
    for i in range(n_sample):
        Xi = Xt[:, idx_new[i, :]]
        Xi_tilde =np.dot(Xi, H)
        Bi = np.linalg.inv(np.dot(Xi_tilde.T, Xi_tilde) + gamma*I)
        Si = np.zeros((n_sample, k+1))
        for q in range(k+1):
            Si[idx_new[q], q] = 1
        Mi = Mi + np.dot(np.dot(Si, np.dot(np.dot(H, Bi), H)), Si.T)
    M = np.dot(np.dot(X.T, Mi), X)
    return M 
Example #6
Source Project: modAL   Author: modAL-python   File: density.py    License: MIT License 6 votes vote down vote up
def information_density(X: modALinput, metric: Union[str, Callable] = 'euclidean') -> np.ndarray:
    """
    Calculates the information density metric of the given data using the given metric.

    Args:
        X: The data for which the information density is to be calculated.
        metric: The metric to be used. Should take two 1d numpy.ndarrays for argument.

    Todo:
        Should work with all possible modALinput.
        Perhaps refactor the module to use some stuff from sklearn.metrics.pairwise

    Returns:
        The information density for each sample.
    """
    # inf_density = np.zeros(shape=(X.shape[0],))
    # for X_idx, X_inst in enumerate(X):
    #     inf_density[X_idx] = sum(similarity_measure(X_inst, X_j) for X_j in X)
    #
    # return inf_density/X.shape[0]

    similarity_mtx = 1/(1+pairwise_distances(X, X, metric=metric))

    return similarity_mtx.mean(axis=1) 
Example #7
Source Project: Cross-Modal-Projection-Learning   Author: YingZhangDUT   File: bidirectional_eval.py    License: MIT License 6 votes vote down vote up
def _eval_retrieval(PX, PY, GX, GY):

    # D_{i, j} is the distance between the ith array from PX and the jth array from GX.
    D = pairwise_distances(PX, GX, metric=args.method, n_jobs=-2)
    Rank = np.argsort(D, axis=1)

    # Evaluation
    recall_1 = recall_at_k(Rank, PY, GY, k=1)  # Recall @ K
    print "{:8}{:8.2%}".format('Recall@1', recall_1)

    recall_5 = recall_at_k(Rank, PY, GY, k=5)  # Recall @ K
    print "{:8}{:8.2%}".format('Recall@5', recall_5)

    recall_10 = recall_at_k(Rank, PY, GY, k=10)  # Recall @ K
    print "{:8}{:8.2%}".format('Recall@10', recall_10)

    map_value = mean_average_precision(Rank, PY, GY)  # Mean Average Precision
    print "{:8}{:8.2%}".format('MAP', map_value)

    return recall_1, recall_5, recall_10, map_value 
Example #8
Source Project: alphacsc   Author: alphacsc   File: k_medoids.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def transform(self, X):
        """Transforms X to cluster-distance space.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape=(n_samples, n_features)
            Data to transform.

        Returns
        -------
        X_new : {array-like, sparse matrix}, shape=(n_samples, n_clusters)
            X transformed in the new space of distances to cluster centers.
        """
        X = check_array(X, accept_sparse=['csr', 'csc'])
        check_is_fitted(self, "cluster_centers_")

        if callable(self.distance_metric):
            return self.distance_metric(X, Y=self.cluster_centers_)
        else:
            return pairwise_distances(X, Y=self.cluster_centers_,
                                      metric=self.distance_metric) 
Example #9
Source Project: scikit-hubness   Author: VarIr   File: test_neighbors.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_precomputed_cross_validation():
    # Ensure array is split correctly
    rng = np.random.RandomState(0)
    X = rng.rand(20, 2)
    D = pairwise_distances(X, metric='euclidean')
    y = rng.randint(3, size=20)
    for Est in (neighbors.KNeighborsClassifier,
                neighbors.RadiusNeighborsClassifier,
                neighbors.KNeighborsRegressor,
                neighbors.RadiusNeighborsRegressor):
        metric_score = cross_val_score(Est(algorithm_params={'n_candidates': 5}), X, y)
        precomp_score = cross_val_score(Est(metric='precomputed',
                                            algorithm_params={'n_candidates': 5},
                                            ),
                                        D, y)
        assert_array_equal(metric_score, precomp_score) 
Example #10
Source Project: MTCNN-VGG-face   Author: KaiJin1995   File: TestMyself_Multithreading.py    License: MIT License 6 votes vote down vote up
def Calculate_Distance_1(dist1,dist2,metric,min_predicts,Lists_Num):
    global ThreadingState1
    global ThreadingState2
    ThreadingState1=0
    ThreadingState2=0
    i=0
    for sublist in range(Lists_Num/2):
        predicts1 = pw.pairwise_distances(dist1[i], dist2, metric=metric)
        i=i+2
        if predicts1[0][0] > 0.12:
            if ThreadingState2 is 1:
                break
            if predicts1[0][0] < min_predicts :
                min_predicts = predicts1[0][0]

        else:
            min_predicts = predicts1[0][0]
            ThreadingState1=1
            break 
Example #11
Source Project: twitter-stock-recommendation   Author: alvarobartt   File: test_t_sne.py    License: MIT License 6 votes vote down vote up
def _run_answer_test(pos_input, pos_output, neighbors, grad_output,
                     verbose=False, perplexity=0.1, skip_num_points=0):
    distances = pairwise_distances(pos_input).astype(np.float32)
    args = distances, perplexity, verbose
    pos_output = pos_output.astype(np.float32)
    neighbors = neighbors.astype(np.int64)
    pij_input = _joint_probabilities(*args)
    pij_input = squareform(pij_input).astype(np.float32)
    grad_bh = np.zeros(pos_output.shape, dtype=np.float32)

    from scipy.sparse import csr_matrix
    P = csr_matrix(pij_input)

    neighbors = P.indices.astype(np.int64)
    indptr = P.indptr.astype(np.int64)

    _barnes_hut_tsne.gradient(P.data, pos_output, neighbors, indptr,
                              grad_bh, 0.5, 2, 1, skip_num_points=0)
    assert_array_almost_equal(grad_bh, grad_output, decimal=4) 
Example #12
Source Project: FaceRecognition   Author: habrman   File: main.py    License: MIT License 6 votes vote down vote up
def find_matching_ids(self, embs):
        if self.id_names:
            matching_ids = []
            matching_distances = []
            distance_matrix = pairwise_distances(embs, self.embeddings)
            for distance_row in distance_matrix:
                min_index = np.argmin(distance_row)
                if distance_row[min_index] < self.distance_treshold:
                    matching_ids.append(self.id_names[min_index])
                    matching_distances.append(distance_row[min_index])
                else:
                    matching_ids.append(None)
                    matching_distances.append(None)
        else:
            matching_ids = [None] * len(embs)
            matching_distances = [np.inf] * len(embs)
        return matching_ids, matching_distances 
Example #13
Source Project: EDeN   Author: fabriziocosta   File: __init__.py    License: MIT License 5 votes vote down vote up
def dendrogram(data,
               vectorizer,
               method="ward",
               color_threshold=1,
               size=10,
               filename=None):
    """dendrogram.

    "median","centroid","weighted","single","ward","complete","average"
    """
    data = list(data)
    # get labels
    labels = []
    for graph in data:
        label = graph.graph.get('id', None)
        if label:
            labels.append(label)
    # transform input into sparse vectors
    data_matrix = vectorizer.transform(data)

    # labels
    if not labels:
        labels = [str(i) for i in range(data_matrix.shape[0])]

    # embed high dimensional sparse vectors in 2D
    from sklearn import metrics
    from scipy.cluster.hierarchy import linkage, dendrogram
    distance_matrix = metrics.pairwise.pairwise_distances(data_matrix)
    linkage_matrix = linkage(distance_matrix, method=method)
    plt.figure(figsize=(size, size))
    dendrogram(linkage_matrix,
               color_threshold=color_threshold,
               labels=labels,
               orientation='right')
    if filename is not None:
        plt.savefig(filename)
    else:
        plt.show() 
Example #14
Source Project: geosketch   Author: brianhie   File: sketch.py    License: MIT License 5 votes vote down vote up
def gs_exact(X, N, k='auto', seed=None, replace=False,
             tol=1e-3, n_iter=300, verbose=1):
    ge_idx = gs(X, N, replace=replace)

    dist = pairwise_distances(X, n_jobs=-1)

    cost = dist.max()

    iter_i = 0

    while iter_i < n_iter:

        if verbose:
            log('iter_i = {}'.format(iter_i))

        labels = np.argmin(dist[ge_idx, :], axis=0)

        ge_idx_new = []
        for cluster in range(N):
            cluster_idx = np.nonzero(labels == cluster)[0]
            if len(cluster_idx) == 0:
                ge_idx_new.append(ge_idx[cluster])
                continue
            X_cluster = dist[cluster_idx, :]
            X_cluster = X_cluster[:, cluster_idx]
            within_idx = np.argmin(X_cluster.max(0))
            ge_idx_new.append(cluster_idx[within_idx])
        ge_idx = ge_idx_new

        cost, prev_cost = dist[ge_idx, :].min(0).max(), cost
        assert(cost <= prev_cost)

        if prev_cost - cost < tol:
            break

        iter_i += 1

    return ge_idx 
Example #15
Source Project: few   Author: lacava   File: evaluation.py    License: GNU General Public License v3.0 5 votes vote down vote up
def fisher(yhat,y,samples=False):
    """Fisher criterion"""
    classes = np.unique(y)
    mu = np.zeros(len(classes))
    v = np.zeros(len(classes))
    # pdb.set_trace()
    for c in classes.astype(int):
        mu[c] = np.mean(yhat[y==c])
        v[c] = np.var(yhat[y==c])

    if not samples:
        fisher = 0
        for c1,c2 in pairwise(classes.astype(int)):
            fisher += np.abs(mu[c1] - mu[c2])/np.sqrt(v[c1]+v[c2])
    else:
        # lexicase version
        fisher = np.zeros(len(yhat))
        # get closests classes to each class (min mu distance)
        mu_d = pairwise_distances(mu.reshape(-1,1))
        min_mu=np.zeros(len(classes),dtype=int)
        for i in np.arange(len(min_mu)):
            min_mu[i] = np.argsort(mu_d[i])[1]
        # for c1, pairwise(classes.astype(int)):
        #     min_mu[c1] = np.argmin()
        for i,l in enumerate(yhat.astype(int)):
            fisher[i] = np.abs(l - mu[min_mu[y[i]]])/np.sqrt(v[y[i]]+v[min_mu[y[i]]])

    # pdb.set_trace()
    return fisher 
Example #16
Source Project: 3d-vehicle-tracking   Author: ucbdrive   File: tracking_utils.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def compute_cos_dis(featA, featB):
    return np.exp(-skp.pairwise_distances(featA, featB)) 
Example #17
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_pairwise.py    License: MIT License 5 votes vote down vote up
def test_pairwise_boolean_distance(metric):
    # test that we convert to boolean arrays for boolean distances
    rng = np.random.RandomState(0)
    X = rng.randn(5, 4)
    Y = X.copy()
    Y[0, 0] = 1 - Y[0, 0]

    # ignore conversion to boolean in pairwise_distances
    with ignore_warnings(category=DataConversionWarning):
        for Z in [Y, None]:
            res = pairwise_distances(X, Z, metric=metric)
            res[np.isnan(res)] = 0
            assert np.sum(res != 0) == 0

    # non-boolean arrays are converted to boolean for boolean
    # distance metrics with a data conversion warning
    msg = "Data was converted to boolean for metric %s" % metric
    with pytest.warns(DataConversionWarning, match=msg):
        pairwise_distances(X, metric=metric)

    # Check that the warning is raised if X is boolean by Y is not boolean:
    with pytest.warns(DataConversionWarning, match=msg):
        pairwise_distances(X.astype(bool), Y=Y, metric=metric)

    # Check that no warning is raised if X is already boolean and Y is None:
    with pytest.warns(None) as records:
        pairwise_distances(X.astype(bool), metric=metric)
    assert len(records) == 0 
Example #18
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_pairwise.py    License: MIT License 5 votes vote down vote up
def test_no_data_conversion_warning():
    # No warnings issued if metric is not a boolean distance function
    rng = np.random.RandomState(0)
    X = rng.randn(5, 4)
    with pytest.warns(None) as records:
        pairwise_distances(X, metric="minkowski")
    assert len(records) == 0 
Example #19
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_pairwise.py    License: MIT License 5 votes vote down vote up
def test_pairwise_precomputed_non_negative():
    # Test non-negative values
    assert_raises_regexp(ValueError, '.* non-negative values.*',
                         pairwise_distances, np.full((5, 5), -1),
                         metric='precomputed') 
Example #20
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_pairwise.py    License: MIT License 5 votes vote down vote up
def test_pairwise_callable_nonstrict_metric():
    # paired_distances should allow callable metric where metric(x, x) != 0
    # Knowing that the callable is a strict metric would allow the diagonal to
    # be left uncalculated and set to 0.
    assert_equal(pairwise_distances([[1.]], metric=lambda x, y: 5)[0, 0], 5)


# Test with all metrics that should be in PAIRWISE_KERNEL_FUNCTIONS. 
Example #21
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_pairwise.py    License: MIT License 5 votes vote down vote up
def check_pairwise_distances_chunked(X, Y, working_memory, metric='euclidean'):
    gen = pairwise_distances_chunked(X, Y, working_memory=working_memory,
                                     metric=metric)
    assert isinstance(gen, GeneratorType)
    blockwise_distances = list(gen)
    Y = X if Y is None else Y
    min_block_mib = len(Y) * 8 * 2 ** -20

    for block in blockwise_distances:
        memory_used = block.nbytes
        assert memory_used <= max(working_memory, min_block_mib) * 2 ** 20

    blockwise_distances = np.vstack(blockwise_distances)
    S = pairwise_distances(X, Y, metric=metric)
    assert_array_almost_equal(blockwise_distances, S) 
Example #22
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_pairwise.py    License: MIT License 5 votes vote down vote up
def test_parallel_pairwise_distances_diagonal(metric):
    rng = np.random.RandomState(0)
    X = rng.normal(size=(1000, 10), scale=1e10)
    distances = pairwise_distances(X, metric=metric, n_jobs=2)
    assert_allclose(np.diag(distances), 0, atol=1e-10) 
Example #23
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_pairwise.py    License: MIT License 5 votes vote down vote up
def test_pairwise_distances_chunked():
    # Test the pairwise_distance helper function.
    rng = np.random.RandomState(0)
    # Euclidean distance should be equivalent to calling the function.
    X = rng.random_sample((400, 4))
    check_pairwise_distances_chunked(X, None, working_memory=1,
                                     metric='euclidean')
    # Test small amounts of memory
    for power in range(-16, 0):
        check_pairwise_distances_chunked(X, None, working_memory=2 ** power,
                                         metric='euclidean')
    # X as list
    check_pairwise_distances_chunked(X.tolist(), None, working_memory=1,
                                     metric='euclidean')
    # Euclidean distance, with Y != X.
    Y = rng.random_sample((200, 4))
    check_pairwise_distances_chunked(X, Y, working_memory=1,
                                     metric='euclidean')
    check_pairwise_distances_chunked(X.tolist(), Y.tolist(), working_memory=1,
                                     metric='euclidean')
    # absurdly large working_memory
    check_pairwise_distances_chunked(X, Y, working_memory=10000,
                                     metric='euclidean')
    # "cityblock" uses scikit-learn metric, cityblock (function) is
    # scipy.spatial.
    check_pairwise_distances_chunked(X, Y, working_memory=1,
                                     metric='cityblock')
    # Test that a value error is raised if the metric is unknown
    assert_raises(ValueError, next,
                  pairwise_distances_chunked(X, Y, metric="blah"))

    # Test precomputed returns all at once
    D = pairwise_distances(X)
    gen = pairwise_distances_chunked(D,
                                     working_memory=2 ** -16,
                                     metric='precomputed')
    assert isinstance(gen, GeneratorType)
    assert next(gen) is D
    assert_raises(StopIteration, next, gen) 
Example #24
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_pairwise.py    License: MIT License 5 votes vote down vote up
def test_pairwise_distances_data_derived_params(n_jobs, metric, dist_function,
                                                y_is_x):
    # check that pairwise_distances give the same result in sequential and
    # parallel, when metric has data-derived parameters.
    with config_context(working_memory=1):  # to have more than 1 chunk
        rng = np.random.RandomState(0)
        X = rng.random_sample((1000, 10))

        if y_is_x:
            Y = X
            expected_dist_default_params = squareform(pdist(X, metric=metric))
            if metric == "seuclidean":
                params = {'V': np.var(X, axis=0, ddof=1)}
            else:
                params = {'VI': np.linalg.inv(np.cov(X.T)).T}
        else:
            Y = rng.random_sample((1000, 10))
            expected_dist_default_params = cdist(X, Y, metric=metric)
            if metric == "seuclidean":
                params = {'V': np.var(np.vstack([X, Y]), axis=0, ddof=1)}
            else:
                params = {'VI': np.linalg.inv(np.cov(np.vstack([X, Y]).T)).T}

        expected_dist_explicit_params = cdist(X, Y, metric=metric, **params)
        dist = np.vstack(tuple(dist_function(X, Y,
                                             metric=metric, n_jobs=n_jobs)))

        assert_allclose(dist, expected_dist_explicit_params)
        assert_allclose(dist, expected_dist_default_params) 
Example #25
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_dbscan.py    License: MIT License 5 votes vote down vote up
def test_dbscan_sparse_precomputed(include_self):
    D = pairwise_distances(X)
    nn = NearestNeighbors(radius=.9).fit(X)
    X_ = X if include_self else None
    D_sparse = nn.radius_neighbors_graph(X=X_, mode='distance')
    # Ensure it is sparse not merely on diagonals:
    assert D_sparse.nnz < D.shape[0] * (D.shape[0] - 1)
    core_sparse, labels_sparse = dbscan(D_sparse,
                                        eps=.8,
                                        min_samples=10,
                                        metric='precomputed')
    core_dense, labels_dense = dbscan(D, eps=.8, min_samples=10,
                                      metric='precomputed')
    assert_array_equal(core_dense, core_sparse)
    assert_array_equal(labels_dense, labels_sparse) 
Example #26
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_dbscan.py    License: MIT License 5 votes vote down vote up
def test_dbscan_balltree():
    # Tests the DBSCAN algorithm with balltree for neighbor calculation.
    eps = 0.8
    min_samples = 10

    D = pairwise_distances(X)
    core_samples, labels = dbscan(D, metric="precomputed", eps=eps,
                                  min_samples=min_samples)

    # number of clusters, ignoring noise if present
    n_clusters_1 = len(set(labels)) - int(-1 in labels)
    assert_equal(n_clusters_1, n_clusters)

    db = DBSCAN(p=2.0, eps=eps, min_samples=min_samples, algorithm='ball_tree')
    labels = db.fit(X).labels_

    n_clusters_2 = len(set(labels)) - int(-1 in labels)
    assert_equal(n_clusters_2, n_clusters)

    db = DBSCAN(p=2.0, eps=eps, min_samples=min_samples, algorithm='kd_tree')
    labels = db.fit(X).labels_

    n_clusters_3 = len(set(labels)) - int(-1 in labels)
    assert_equal(n_clusters_3, n_clusters)

    db = DBSCAN(p=1.0, eps=eps, min_samples=min_samples, algorithm='ball_tree')
    labels = db.fit(X).labels_

    n_clusters_4 = len(set(labels)) - int(-1 in labels)
    assert_equal(n_clusters_4, n_clusters)

    db = DBSCAN(leaf_size=20, eps=eps, min_samples=min_samples,
                algorithm='ball_tree')
    labels = db.fit(X).labels_

    n_clusters_5 = len(set(labels)) - int(-1 in labels)
    assert_equal(n_clusters_5, n_clusters) 
Example #27
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_optics.py    License: MIT License 5 votes vote down vote up
def test_precomputed_dists():
    redX = X[::2]
    dists = pairwise_distances(redX, metric='euclidean')
    clust1 = OPTICS(min_samples=10, algorithm='brute',
                    metric='precomputed').fit(dists)
    clust2 = OPTICS(min_samples=10, algorithm='brute',
                    metric='euclidean').fit(redX)

    assert_allclose(clust1.reachability_, clust2.reachability_)
    assert_array_equal(clust1.labels_, clust2.labels_) 
Example #28
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_neighbors.py    License: MIT License 5 votes vote down vote up
def test_kneighbors_regressor_sparse(n_samples=40,
                                     n_features=5,
                                     n_test_pts=10,
                                     n_neighbors=5,
                                     random_state=0):
    # Test radius-based regression on sparse matrices
    # Like the above, but with various types of sparse matrices
    rng = np.random.RandomState(random_state)
    X = 2 * rng.rand(n_samples, n_features) - 1
    y = ((X ** 2).sum(axis=1) < .25).astype(np.int)

    for sparsemat in SPARSE_TYPES:
        knn = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors,
                                            algorithm='auto')
        knn.fit(sparsemat(X), y)

        knn_pre = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors,
                                                metric='precomputed')
        knn_pre.fit(pairwise_distances(X, metric='euclidean'), y)

        for sparsev in SPARSE_OR_DENSE:
            X2 = sparsev(X)
            assert np.mean(knn.predict(X2).round() == y) > 0.95

            X2_pre = sparsev(pairwise_distances(X, metric='euclidean'))
            if issparse(sparsev(X2_pre)):
                assert_raises(ValueError, knn_pre.predict, X2_pre)
            else:
                assert np.mean(knn_pre.predict(X2_pre).round() == y) > 0.95 
Example #29
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_neighbors.py    License: MIT License 5 votes vote down vote up
def test_non_euclidean_kneighbors():
    rng = np.random.RandomState(0)
    X = rng.rand(5, 5)

    # Find a reasonable radius.
    dist_array = pairwise_distances(X).flatten()
    np.sort(dist_array)
    radius = dist_array[15]

    # Test kneighbors_graph
    for metric in ['manhattan', 'chebyshev']:
        nbrs_graph = neighbors.kneighbors_graph(
            X, 3, metric=metric, mode='connectivity',
            include_self=True).toarray()
        nbrs1 = neighbors.NearestNeighbors(3, metric=metric).fit(X)
        assert_array_equal(nbrs_graph, nbrs1.kneighbors_graph(X).toarray())

    # Test radiusneighbors_graph
    for metric in ['manhattan', 'chebyshev']:
        nbrs_graph = neighbors.radius_neighbors_graph(
            X, radius, metric=metric, mode='connectivity',
            include_self=True).toarray()
        nbrs1 = neighbors.NearestNeighbors(metric=metric, radius=radius).fit(X)
        assert_array_equal(nbrs_graph, nbrs1.radius_neighbors_graph(X).A)

    # Raise error when wrong parameters are supplied,
    X_nbrs = neighbors.NearestNeighbors(3, metric='manhattan')
    X_nbrs.fit(X)
    assert_raises(ValueError, neighbors.kneighbors_graph, X_nbrs, 3,
                  metric='euclidean')
    X_nbrs = neighbors.NearestNeighbors(radius=radius, metric='manhattan')
    X_nbrs.fit(X)
    assert_raises(ValueError, neighbors.radius_neighbors_graph, X_nbrs,
                  radius, metric='euclidean') 
Example #30
Source Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_neighbors.py    License: MIT License 5 votes vote down vote up
def test_pairwise_boolean_distance():
    # Non-regression test for #4523
    # 'brute': uses scipy.spatial.distance through pairwise_distances
    # 'ball_tree': uses sklearn.neighbors.dist_metrics
    rng = np.random.RandomState(0)
    X = rng.uniform(size=(6, 5))
    NN = neighbors.NearestNeighbors

    nn1 = NN(metric="jaccard", algorithm='brute').fit(X)
    nn2 = NN(metric="jaccard", algorithm='ball_tree').fit(X)
    assert_array_equal(nn1.kneighbors(X)[0], nn2.kneighbors(X)[0])