Python sklearn.neighbors.NearestCentroid() Examples

The following are code examples for showing how to use sklearn.neighbors.NearestCentroid(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: Weiss   Author: WangWenjun559   File: test_nearest_centroid.py    Apache License 2.0 6 votes vote down vote up
def test_classification_toy():
    # Check classification on a toy dataset, including sparse versions.
    clf = NearestCentroid()
    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)

    # Same test, but with a sparse matrix to fit and test.
    clf = NearestCentroid()
    clf.fit(X_csr, y)
    assert_array_equal(clf.predict(T_csr), true_result)

    # Fit with sparse, test with non-sparse
    clf = NearestCentroid()
    clf.fit(X_csr, y)
    assert_array_equal(clf.predict(T), true_result)

    # Fit with non-sparse, test with sparse
    clf = NearestCentroid()
    clf.fit(X, y)
    assert_array_equal(clf.predict(T_csr), true_result)

    # Fit and predict with non-CSR sparse matrices
    clf = NearestCentroid()
    clf.fit(X_csr.tocoo(), y)
    assert_array_equal(clf.predict(T_csr.tolil()), true_result) 
Example 2
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_nearest_centroid.py    Apache License 2.0 6 votes vote down vote up
def test_classification_toy():
    # Check classification on a toy dataset, including sparse versions.
    clf = NearestCentroid()
    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)

    # Same test, but with a sparse matrix to fit and test.
    clf = NearestCentroid()
    clf.fit(X_csr, y)
    assert_array_equal(clf.predict(T_csr), true_result)

    # Fit with sparse, test with non-sparse
    clf = NearestCentroid()
    clf.fit(X_csr, y)
    assert_array_equal(clf.predict(T), true_result)

    # Fit with non-sparse, test with sparse
    clf = NearestCentroid()
    clf.fit(X, y)
    assert_array_equal(clf.predict(T_csr), true_result)

    # Fit and predict with non-CSR sparse matrices
    clf = NearestCentroid()
    clf.fit(X_csr.tocoo(), y)
    assert_array_equal(clf.predict(T_csr.tolil()), true_result) 
Example 3
Project: nn-toolbox   Author: nhatsmrt   File: rbf.py    Apache License 2.0 5 votes vote down vote up
def centroids_initialize(self, input: Tensor, labels: Tensor):
        """
        (Re-)initialize the centers based on nearest centroids algorithm

        :param input:
        :param labels:
        """
        model = NearestCentroid()
        model.fit(input.cpu().detach().numpy(), labels.cpu().detach().numpy().ravel())
        self.weight.data.copy_(torch.Tensor(model.centroids_).to(self.weight.data.device)) 
Example 4
Project: EvoMSA   Author: INGEOTEC   File: test_base.py    Apache License 2.0 5 votes vote down vote up
def test_EvoMSA_evodag_class():
    from sklearn.neighbors import NearestCentroid
    import numpy as np
    X, y = get_data()
    model = EvoMSA(evodag_args=dict(popsize=10, early_stopping_rounds=10,
                                    n_estimators=3),
                   models=[['EvoMSA.model.Corpus', 'EvoMSA.model.Bernulli']],
                   evodag_class="sklearn.neighbors.NearestCentroid", TR=False,
                   n_jobs=2).fit(X, y)
    assert isinstance(model._evodag_model, NearestCentroid)
    cl = model.predict(X)
    hy = model.predict_proba(X)
    cl2 = model._le.inverse_transform(hy.argmax(axis=1))
    print(cl, cl2)
    assert np.all(cl == cl2) 
Example 5
Project: Weiss   Author: WangWenjun559   File: test_nearest_centroid.py    Apache License 2.0 5 votes vote down vote up
def test_precomputed():
    clf = NearestCentroid(metric="precomputed")
    clf.fit(X, y)
    S = pairwise_distances(T, clf.centroids_)
    assert_array_equal(clf.predict(S), true_result) 
Example 6
Project: Weiss   Author: WangWenjun559   File: test_nearest_centroid.py    Apache License 2.0 5 votes vote down vote up
def test_iris():
    # Check consistency on dataset iris.
    for metric in ('euclidean', 'cosine'):
        clf = NearestCentroid(metric=metric).fit(iris.data, iris.target)
        score = np.mean(clf.predict(iris.data) == iris.target)
        assert score > 0.9, "Failed with score = " + str(score) 
Example 7
Project: Weiss   Author: WangWenjun559   File: test_nearest_centroid.py    Apache License 2.0 5 votes vote down vote up
def test_iris_shrinkage():
    # Check consistency on dataset iris, when using shrinkage.
    for metric in ('euclidean', 'cosine'):
        for shrink_threshold in [None, 0.1, 0.5]:
            clf = NearestCentroid(metric=metric,
                                  shrink_threshold=shrink_threshold)
            clf = clf.fit(iris.data, iris.target)
            score = np.mean(clf.predict(iris.data) == iris.target)
            assert score > 0.8, "Failed with score = " + str(score) 
Example 8
Project: Weiss   Author: WangWenjun559   File: test_nearest_centroid.py    Apache License 2.0 5 votes vote down vote up
def test_pickle():
    import pickle

    # classification
    obj = NearestCentroid()
    obj.fit(iris.data, iris.target)
    score = obj.score(iris.data, iris.target)
    s = pickle.dumps(obj)

    obj2 = pickle.loads(s)
    assert_equal(type(obj2), obj.__class__)
    score2 = obj2.score(iris.data, iris.target)
    assert_array_equal(score, score2,
                       "Failed to generate same score"
                       " after pickling (classification).") 
Example 9
Project: Weiss   Author: WangWenjun559   File: test_nearest_centroid.py    Apache License 2.0 5 votes vote down vote up
def test_predict_translated_data():
    # Test that NearestCentroid gives same results on translated data

    rng = np.random.RandomState(0)
    X = rng.rand(50, 50)
    y = rng.randint(0, 3, 50)
    noise = rng.rand(50)
    clf = NearestCentroid(shrink_threshold=0.1)
    clf.fit(X, y)
    y_init = clf.predict(X)
    clf = NearestCentroid(shrink_threshold=0.1)
    X_noise = X + noise
    clf.fit(X_noise, y)
    y_translate = clf.predict(X_noise)
    assert_array_equal(y_init, y_translate) 
Example 10
Project: Weiss   Author: WangWenjun559   File: test_nearest_centroid.py    Apache License 2.0 5 votes vote down vote up
def test_manhattan_metric():
    # Test the manhattan metric.

    clf = NearestCentroid(metric='manhattan')
    clf.fit(X, y)
    dense_centroid = clf.centroids_
    clf.fit(X_csr, y)
    assert_array_equal(clf.centroids_, dense_centroid)
    assert_array_equal(dense_centroid, [[-1, -1], [1, 1]]) 
Example 11
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_nearest_centroid.py    Apache License 2.0 5 votes vote down vote up
def test_precomputed():
    clf = NearestCentroid(metric='precomputed')
    with assert_raises(ValueError) as context:
        clf.fit(X, y)
    assert_equal(ValueError, type(context.exception)) 
Example 12
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_nearest_centroid.py    Apache License 2.0 5 votes vote down vote up
def test_iris():
    # Check consistency on dataset iris.
    for metric in ('euclidean', 'cosine'):
        clf = NearestCentroid(metric=metric).fit(iris.data, iris.target)
        score = np.mean(clf.predict(iris.data) == iris.target)
        assert score > 0.9, "Failed with score = " + str(score) 
Example 13
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_nearest_centroid.py    Apache License 2.0 5 votes vote down vote up
def test_iris_shrinkage():
    # Check consistency on dataset iris, when using shrinkage.
    for metric in ('euclidean', 'cosine'):
        for shrink_threshold in [None, 0.1, 0.5]:
            clf = NearestCentroid(metric=metric,
                                  shrink_threshold=shrink_threshold)
            clf = clf.fit(iris.data, iris.target)
            score = np.mean(clf.predict(iris.data) == iris.target)
            assert score > 0.8, "Failed with score = " + str(score) 
Example 14
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_nearest_centroid.py    Apache License 2.0 5 votes vote down vote up
def test_pickle():
    import pickle

    # classification
    obj = NearestCentroid()
    obj.fit(iris.data, iris.target)
    score = obj.score(iris.data, iris.target)
    s = pickle.dumps(obj)

    obj2 = pickle.loads(s)
    assert_equal(type(obj2), obj.__class__)
    score2 = obj2.score(iris.data, iris.target)
    assert_array_equal(score, score2,
                       "Failed to generate same score"
                       " after pickling (classification).") 
Example 15
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_nearest_centroid.py    Apache License 2.0 5 votes vote down vote up
def test_shrinkage_threshold_decoded_y():
    clf = NearestCentroid(shrink_threshold=0.01)
    y_ind = np.asarray(y)
    y_ind[y_ind == -1] = 0
    clf.fit(X, y_ind)
    centroid_encoded = clf.centroids_
    clf.fit(X, y)
    assert_array_equal(centroid_encoded, clf.centroids_) 
Example 16
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_nearest_centroid.py    Apache License 2.0 5 votes vote down vote up
def test_predict_translated_data():
    # Test that NearestCentroid gives same results on translated data

    rng = np.random.RandomState(0)
    X = rng.rand(50, 50)
    y = rng.randint(0, 3, 50)
    noise = rng.rand(50)
    clf = NearestCentroid(shrink_threshold=0.1)
    clf.fit(X, y)
    y_init = clf.predict(X)
    clf = NearestCentroid(shrink_threshold=0.1)
    X_noise = X + noise
    clf.fit(X_noise, y)
    y_translate = clf.predict(X_noise)
    assert_array_equal(y_init, y_translate) 
Example 17
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_nearest_centroid.py    Apache License 2.0 5 votes vote down vote up
def test_manhattan_metric():
    # Test the manhattan metric.

    clf = NearestCentroid(metric='manhattan')
    clf.fit(X, y)
    dense_centroid = clf.centroids_
    clf.fit(X_csr, y)
    assert_array_equal(clf.centroids_, dense_centroid)
    assert_array_equal(dense_centroid, [[-1, -1], [1, 1]]) 
Example 18
Project: navi   Author: philoez98   File: classifiers.py    MIT License 5 votes vote down vote up
def initialize(self, show=False,**kwargs):
        shrink = None
        metric = 'euclidean'


        for key in kwargs.keys():
            if key == 'shrink_treshold':
                shrink = kwargs['shrink_treshold']
            elif key == 'metric':
                metric = kwargs['metric']

        near = NearestCentroid(shrink_threshold=shrink, metric=metric)
        near.fit(self.Xtrain, self.Ytrain)
        guess = near.predict(self.Xtest)

        if show:
            sPlot(self.Xtest, self.Ytest, guess)


        accuracy = validate(self.Ytest, guess, method='accu')
        report = validate(self.Ytest, guess, method='report')
        mcc = validate(self.Ytest, guess, method='mcc')
        bas = validate(self.Ytest, guess, method='bas')


        super().__setattr__('guess', guess)
        super().__setattr__('accu', *accuracy)
        super().__setattr__('report', report)
        super().__setattr__('mcc', *mcc)
        super().__setattr__('bas', *bas) 
Example 19
Project: anthem   Author: mikekestemont   File: Order2Verifier.py    GNU General Public License v3.0 5 votes vote down vote up
def fit(self, X, y):
        """
        Runs very light, memory-based like fitting Method
        which primarily stores `X` and `y` in memory. In the
        case of profile-based verifier, we store a single,
        mean centroid per author in memory.

        Parameters
        ----------
        X: floats, array-like [nb_documents, nb_features]
            The 2D matrix representing the training instance-based
            to be memorized.

        y, array of ints [nb_documents]
            An int-encoded representation of the correct authorship
            for each training documents.

        References
        ----------
        - Daelemans, W. & van den Bosch, A. (2005). Memory-Based
          Language Processing. Cambridge University Press.
        - M. Koppel and S. Seidman (2013), Automatically
          Identifying Pseudepigraphic Texts, EMNLP-13: 1449-1454.

        """

        if self.base == 'instance':
            self.train_X = X
            self.train_y = y

        elif self.base == 'profile':
            self.train_X = NearestCentroid().fit(X, y).centroids_ # mean centroids
            self.train_y = np.array(range(self.train_X.shape[0])) 
Example 20
Project: anthem   Author: mikekestemont   File: Order1Verifier.py    GNU General Public License v3.0 4 votes vote down vote up
def fit(self, X, y):
        """
        Runs very light, memory-based like fitting Method
        which primarily stores `X` and `y` in memory. In the
        case of profile-based verifier, we store a single,
        mean centroid per author in memory.

        Parameters
        ----------
        X: floats, array-like [nb_documents, nb_features]
            The 2D matrix representing the training instance-based
            to be memorized.

        y, array of ints [nb_documents]
            An int-encoded representation of the correct authorship
            for each training documents.

        References
        ----------
        - Daelemans, W. & van den Bosch, A. (2005). Memory-Based
          Language Processing. Cambridge University Press.
        - M. Koppel and S. Seidman (2013), Automatically
          Identifying Pseudepigraphic Texts, EMNLP-13: 1449-1454.

        """

        self.train_X = NearestCentroid().fit(X, y).centroids_ # mean centroids
        self.train_y = np.array(range(self.train_X.shape[0]))

        nb_items = self.train_X.shape[0]

        # calculate all pairwise distances in data set:
        distances = []
        idxs = range(self.train_X.shape[0])
        for i, j in combinations(range(nb_items), 2):
            distances.append(self.metric_fn(self.train_X[i],
                                            self.train_X[j],
                                            idxs))

        # fit a 0-1 scaler on the distances:
        distances = np.array(distances, dtype='float32').transpose()
        distances = distances[~np.isnan(distances)]
        self.distance_scaler1 = StandardScaler().fit(distances)
        distances = self.distance_scaler1.transform(distances.transpose())
        self.distance_scaler2 = MinMaxScaler().fit(distances)