Python sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS Examples

The following are 3 code examples of sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.metrics.pairwise , or try the search function .
Example #1
Source File: test_pairwise.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_paired_distances(metric, func):
    # Test the pairwise_distance helper function.
    rng = np.random.RandomState(0)
    # Euclidean distance should be equivalent to calling the function.
    X = rng.random_sample((5, 4))
    # Euclidean distance, with Y != X.
    Y = rng.random_sample((5, 4))

    S = paired_distances(X, Y, metric=metric)
    S2 = func(X, Y)
    assert_array_almost_equal(S, S2)
    S3 = func(csr_matrix(X), csr_matrix(Y))
    assert_array_almost_equal(S, S3)
    if metric in PAIRWISE_DISTANCE_FUNCTIONS:
        # Check the pairwise_distances implementation
        # gives the same value
        distances = PAIRWISE_DISTANCE_FUNCTIONS[metric](X, Y)
        distances = np.diag(distances)
        assert_array_almost_equal(distances, S) 
Example #2
Source File: test_k_medoids.py    From scikit-learn-extra with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_kmedoids_iris():
    """Test kmedoids on the Iris dataset"""
    rng = np.random.RandomState(seed)
    X_iris = load_iris()["data"]

    ref_model = KMeans(n_clusters=3).fit(X_iris)

    avg_dist_to_closest_centroid = (
        ref_model.transform(X_iris).min(axis=1).mean()
    )

    for init in ["random", "heuristic", "k-medoids++"]:
        distance_metric = "euclidean"
        model = KMedoids(
            n_clusters=3, metric=distance_metric, init=init, random_state=rng
        )
        model.fit(X_iris)

        # test convergence in reasonable number of steps
        assert model.n_iter_ < (len(X_iris) // 10)

        distances = PAIRWISE_DISTANCE_FUNCTIONS[distance_metric](X_iris)
        avg_dist_to_random_medoid = np.mean(distances.ravel())
        avg_dist_to_closest_medoid = model.inertia_ / X_iris.shape[0]
        # We want distance-to-closest-medoid to be reduced from average
        # distance by more than 50%
        assert avg_dist_to_random_medoid > 2 * avg_dist_to_closest_medoid
        # When K-Medoids is using Euclidean distance,
        # we can compare its performance to
        # K-Means. We want the average distance to cluster centers
        # to be similar between K-Means and K-Medoids
        assert_allclose(
            avg_dist_to_closest_medoid, avg_dist_to_closest_centroid, rtol=0.1
        ) 
Example #3
Source File: test_pairwise.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_paired_distances():
    # Test the pairwise_distance helper function.
    rng = np.random.RandomState(0)
    # Euclidean distance should be equivalent to calling the function.
    X = rng.random_sample((5, 4))
    # Euclidean distance, with Y != X.
    Y = rng.random_sample((5, 4))
    for metric, func in iteritems(PAIRED_DISTANCES):
        S = paired_distances(X, Y, metric=metric)
        S2 = func(X, Y)
        assert_array_almost_equal(S, S2)
        S3 = func(csr_matrix(X), csr_matrix(Y))
        assert_array_almost_equal(S, S3)
        if metric in PAIRWISE_DISTANCE_FUNCTIONS:
            # Check the pairwise_distances implementation
            # gives the same value
            distances = PAIRWISE_DISTANCE_FUNCTIONS[metric](X, Y)
            distances = np.diag(distances)
            assert_array_almost_equal(distances, S)

    # Check the callable implementation
    S = paired_distances(X, Y, metric='manhattan')
    S2 = paired_distances(X, Y, metric=lambda x, y: np.abs(x - y).sum(axis=0))
    assert_array_almost_equal(S, S2)

    # Test that a value error is raised when the lengths of X and Y should not
    # differ
    Y = rng.random_sample((3, 4))
    assert_raises(ValueError, paired_distances, X, Y)