Python sklearn.cluster.KMeans() Examples

The following are 30 code examples for showing how to use sklearn.cluster.KMeans(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.cluster , or try the search function .

Example 1
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_k_means.py    License: MIT License 8 votes vote down vote up
def test_k_means_new_centers():
    # Explore the part of the code where a new center is reassigned
    X = np.array([[0, 0, 1, 1],
                  [0, 0, 0, 0],
                  [0, 1, 0, 0],
                  [0, 0, 0, 0],
                  [0, 0, 0, 0],
                  [0, 1, 0, 0]])
    labels = [0, 1, 2, 1, 1, 2]
    bad_centers = np.array([[+0, 1, 0, 0],
                            [.2, 0, .2, .2],
                            [+0, 0, 0, 0]])

    km = KMeans(n_clusters=3, init=bad_centers, n_init=1, max_iter=10,
                random_state=1)
    for this_X in (X, sp.coo_matrix(X)):
        km.fit(this_X)
        this_labels = km.labels_
        # Reorder the labels so that the first instance is in cluster 0,
        # the second in cluster 1, ...
        this_labels = np.unique(this_labels, return_index=True)[1][this_labels]
        np.testing.assert_array_equal(this_labels, labels) 
Example 2
Project: sparse-subspace-clustering-python   Author: abhinav4192   File: SpectralClustering.py    License: MIT License 6 votes vote down vote up
def SpectralClustering(CKSym, n):
    # This is direct port of JHU vision lab code. Could probably use sklearn SpectralClustering.
    CKSym = CKSym.astype(float)
    N, _ = CKSym.shape
    MAXiter = 1000  # Maximum number of iterations for KMeans
    REPlic = 20  # Number of replications for KMeans

    DN = np.diag(np.divide(1, np.sqrt(np.sum(CKSym, axis=0) + np.finfo(float).eps)))
    LapN = identity(N).toarray().astype(float) - np.matmul(np.matmul(DN, CKSym), DN)
    _, _, vN = np.linalg.svd(LapN)
    vN = vN.T
    kerN = vN[:, N - n:N]
    normN = np.sqrt(np.sum(np.square(kerN), axis=1))
    kerNS = np.divide(kerN, normN.reshape(len(normN), 1) + np.finfo(float).eps)
    km = KMeans(n_clusters=n, n_init=REPlic, max_iter=MAXiter, n_jobs=-1).fit(kerNS)
    return km.labels_ 
Example 3
Project: PHATE   Author: KrishnaswamyLab   File: cluster.py    License: GNU General Public License v2.0 6 votes vote down vote up
def silhouette_score(phate_op, n_clusters, random_state=None, **kwargs):
    """Compute the Silhouette score on KMeans on the PHATE potential

    Parameters
    ----------
    phate_op : phate.PHATE
        Fitted PHATE operator
    n_clusters : int
        Number of clusters.
    random_state : int or None, optional (default: None)
        Random seed for k-means

    Returns
    -------
    score : float
    """
    cluster_labels = kmeans(phate_op, n_clusters=n_clusters, random_state=random_state, **kwargs)
    return metrics.silhouette_score(phate_op.diff_potential, cluster_labels) 
Example 4
Project: scanorama   Author: brianhie   File: pancreas_tests.py    License: MIT License 6 votes vote down vote up
def entropy_test(datasets_dimred, ds_labels):
    
    ds_labels = np.array(ds_labels)
    X_dimred = np.concatenate(datasets_dimred)
    embedding = None
    
    for k in range(10, 21):
        km = KMeans(n_clusters=k, n_jobs=-1, verbose=0)
        km.fit(X_dimred)

        if False and k % 5 == 0:
            embedding = visualize(
                datasets_dimred,
                km.labels_, NAMESPACE + '_km{}'.format(k),
                [ str(x) for x in range(k) ],
                embedding=embedding
            )
        
        print('k = {}, average normalized entropy = {}'
              .format(k, avg_norm_entropy(ds_labels, km.labels_))) 
Example 5
Project: LanczosNetwork   Author: lrjconan   File: spectral_graph_partition.py    License: MIT License 6 votes vote down vote up
def spectral_clustering(L, K, seed=1234):
  """
  Implement paper "Shi, J. and Malik, J., 2000. Normalized cuts and image 
  segmentation. IEEE Transactions on pattern analysis and machine intelligence, 
  22(8), pp.888-905."

  Args:
    L: graph Laplacian, numpy or scipy matrix
    K: int, number of clusters

  Returns:
    node_label: list

  N.B.: for simplicity, we only consider simple and undirected graph
  """
  num_nodes = L.shape[0]
  assert (K < num_nodes - 1)

  eig, eig_vec = scipy.sparse.linalg.eigsh(
      L, k=K, which='LM', maxiter=num_nodes * 10000, tol=0, mode='normal')
  kmeans = KMeans(n_clusters=K, random_state=seed).fit(eig_vec.real)

  return kmeans.labels_ 
Example 6
Project: Keras-BiGAN   Author: manicman1999   File: guess.py    License: MIT License 6 votes vote down vote up
def cluster(points, means = 8):
    kk = KMeans(n_clusters = means)
    kk.fit(points)

    labels = kk.predict(points)

    r = []

    for i in range(means):
        row = []
        while(len(row) < 8):
            image = random.randint(0, data.files.shape[0] - 1)
            if labels[image] == i:
                row.append(data.files[image])

        r.append(np.concatenate(row, axis=1))

    c = np.concatenate(r, axis=0)

    x = Image.fromarray(c)
    x.save('Results/clusters.png') 
Example 7
Project: mabwiser   Author: fidelity   File: test_clusters.py    License: Apache License 2.0 6 votes vote down vote up
def test_greedy0_n2(self):

        arms, mab = self.predict(arms=[1, 2, 3, 4],
                                 decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
                                 rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
                                 learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
                                 neighborhood_policy=NeighborhoodPolicy.Clusters(2),
                                 context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                                                  [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                                                  [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                                                  [0, 2, 1, 0, 0]],
                                 contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertListEqual(arms, [3, 1])
        self.assertTrue(isinstance(mab._imp.kmeans, KMeans)) 
Example 8
Project: mabwiser   Author: fidelity   File: test_clusters.py    License: Apache License 2.0 6 votes vote down vote up
def test_copy(self):
        arms, mab = self.predict(arms=[1, 2, 3, 4],
                                 decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
                                 rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
                                 learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0),
                                 neighborhood_policy=NeighborhoodPolicy.Clusters(2),
                                 context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
                                                  [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
                                                  [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
                                                  [0, 2, 1, 0, 0]],
                                 contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        clusters = deepcopy(mab._imp)
        self.assertIsNot(clusters, mab._imp)
        self.assertIsInstance(clusters.lp_list[0], _EpsilonGreedy)
        self.assertIsInstance(clusters.lp_list[1], _EpsilonGreedy)
        self.assertIsInstance(clusters.kmeans, KMeans)
        self.assertIsNot(clusters.kmeans, mab._imp.kmeans)
        self.assertIsNot(clusters.lp_list[0], mab._imp.lp_list[0])
        self.assertIsNot(clusters.lp_list[1], mab._imp.lp_list[1])
        self.assertEqual(clusters.lp_list[0].epsilon, mab._imp.lp_list[0].epsilon)
        self.assertEqual(clusters.lp_list[1].epsilon, mab._imp.lp_list[1].epsilon) 
Example 9
Project: pytorch_geometric   Author: rusty1s   File: argva_node_clustering.py    License: MIT License 6 votes vote down vote up
def test():
    model.eval()
    z = model.encode(data.x, data.train_pos_edge_index)

    # Cluster embedded values using k-means.
    kmeans_input = z.cpu().numpy()
    kmeans = KMeans(n_clusters=7, random_state=0).fit(kmeans_input)
    pred = kmeans.predict(kmeans_input)

    labels = data.y.cpu().numpy()
    completeness = completeness_score(labels, pred)
    hm = homogeneity_score(labels, pred)
    nmi = v_measure_score(labels, pred)

    auc, ap = model.test(z, data.test_pos_edge_index, data.test_neg_edge_index)

    return auc, ap, completeness, hm, nmi 
Example 10
Project: scVI   Author: YosefLab   File: posterior.py    License: MIT License 6 votes vote down vote up
def clustering_scores(self, prediction_algorithm: str = "knn") -> Tuple:
        if self.gene_dataset.n_labels > 1:
            latent, _, labels = self.get_latent()
            if prediction_algorithm == "knn":
                labels_pred = KMeans(
                    self.gene_dataset.n_labels, n_init=200
                ).fit_predict(
                    latent
                )  # n_jobs>1 ?
            elif prediction_algorithm == "gmm":
                gmm = GMM(self.gene_dataset.n_labels)
                gmm.fit(latent)
                labels_pred = gmm.predict(latent)

            asw_score = silhouette_score(latent, labels)
            nmi_score = NMI(labels, labels_pred)
            ari_score = ARI(labels, labels_pred)
            uca_score = unsupervised_clustering_accuracy(labels, labels_pred)[0]
            logger.debug(
                "Clustering Scores:\nSilhouette: %.4f\nNMI: %.4f\nARI: %.4f\nUCA: %.4f"
                % (asw_score, nmi_score, ari_score, uca_score)
            )
            return asw_score, nmi_score, ari_score, uca_score 
Example 11
Project: libact   Author: ntucllab   File: density_weighted_meta.py    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
def __init__(self, dataset, base_query_strategy, similarity_metric=None,
                 clustering_method=None, beta=1.0, random_state=None):
        super(DensityWeightedMeta, self).__init__(dataset=dataset)
        if not isinstance(base_query_strategy, QueryStrategy):
            raise TypeError(
                "'base_query_strategy' has to be an instance of 'QueryStrategy'"
            )
        if base_query_strategy.dataset != self.dataset:
            raise ValueError("base_query_strategy should share the same"
                             "dataset instance with DensityWeightedMeta")

        self.base_query_strategy = base_query_strategy
        self.beta = beta
        self.random_state_ = seed_random_state(random_state)

        if clustering_method is not None:
            self.clustering_method = clustering_method
        else:
            self.clustering_method = KMeans(
                n_clusters=5, random_state=self.random_state_)
        
        if similarity_metric is not None:
            self.similarity_metric = similarity_metric
        else:
            self.similarity_metric = cosine_similarity 
Example 12
Project: msppy   Author: lingquant   File: discretize.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def SAA(self):
        """Use K-means method to discretize the Markovian process."""
        from sklearn.cluster import KMeans
        if self.int_flag == 0:
            labels = numpy.zeros(self.n_samples,dtype=int)
        self._initialize_matrix()
        for t in range(1,self.T):
            kmeans = KMeans(
                n_clusters=self.n_Markov_states[t],
                random_state=0,
            ).fit(self.samples[:,t,:])
            self.Markov_states[t] = kmeans.cluster_centers_
            if self.int_flag == 0:
                labels_new = kmeans.labels_
                counts = numpy.zeros([self.n_Markov_states[t-1],1])
                for i in range(self.n_samples):
                    counts[labels[i]] += 1
                    self.transition_matrix[t][labels[i]][labels_new[i]] += 1
                self.transition_matrix[t] /= counts
                labels = labels_new
        if self.int_flag == 1:
            self.train_transition_matrix()

        return (self.Markov_states,self.transition_matrix) 
Example 13
Project: MassImageRetrieval   Author: liuguiyangnwpu   File: feature_preprocess.py    License: Apache License 2.0 6 votes vote down vote up
def analysis_KMeans():
	mean_distortions = []
	K = len(labels_idx)
	K_range = range(320, 1000)
	for k in K_range:
		print("Cluster k is {}".format(k))
		kmeans_model = KMeans(n_clusters=k, init="k-means++", n_jobs=-1)
		kmeans_model.fit(np_features)
		t_distortions = sum(
			np.min(cdist(np_features, kmeans_model.cluster_centers_, 'euclidean'), axis=1)) / np_features.shape[0]
		mean_distortions.append(t_distortions)

	with open("./kmeans_cluster.csv", "a+") as wh:
		for idx in range(len(K_range)):
			wh.write("{},{}\n".format(K_range[idx], mean_distortions[idx]))

	# plt.plot(K_range, mean_distortions, 'bx-')
	# plt.xlabel('k')
	# plt.ylabel(u'Avgerage distortion degree')
	# plt.title(u'Elbows rule to select the best K value')
	# plt.savefig("kmeans_cluster.png") 
Example 14
def findClusters_kmeans(data):
    '''
        Cluster data using k-means
    '''
    # create the classifier object
    kmeans = cl.KMeans(
        n_clusters=4,
        n_jobs=-1,
        verbose=0,
        n_init=30
    )

    # fit the data
    return kmeans.fit(data)

# the file name of the dataset 
Example 15
Project: retentioneering-tools   Author: retentioneering   File: clustering.py    License: Mozilla Public License 2.0 6 votes vote down vote up
def calc_mean_dist_from_center(data, km):
    """
    Calculates mean distance from cluster centers. Note that it will be calculated only for KMeans and GMM, because DBSCAN may have ambiguous form of clusters.

    Parameters
    --------
    data: pd.DataFrame
        Dataframe with features for clustering indexed as in ``retention_config.index_col``
    km:
        Already fitted clusterer.

    Returns
    -------
    Mapping of clusters names to mean distance from cluster centers.

    Return type
    -------
    Dict
    """
    res = {}
    cl = km.labels_
    cs = km.cluster_centers_
    for i in set(cl):
        res[i] = _cosine_dist(data[cl == i], cs[i]).mean()
    return res 
Example 16
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_spectral_embedding.py    License: MIT License 6 votes vote down vote up
def test_pipeline_spectral_clustering(seed=36):
    # Test using pipeline to do spectral clustering
    random_state = np.random.RandomState(seed)
    se_rbf = SpectralEmbedding(n_components=n_clusters,
                               affinity="rbf",
                               random_state=random_state)
    se_knn = SpectralEmbedding(n_components=n_clusters,
                               affinity="nearest_neighbors",
                               n_neighbors=5,
                               random_state=random_state)
    for se in [se_rbf, se_knn]:
        km = KMeans(n_clusters=n_clusters, random_state=random_state)
        km.fit(se.fit_transform(S))
        assert_array_almost_equal(
            normalized_mutual_info_score(
                km.labels_,
                true_labels), 1.0, 2) 
Example 17
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_pipeline.py    License: MIT License 6 votes vote down vote up
def test_fit_predict_on_pipeline():
    # test that the fit_predict method is implemented on a pipeline
    # test that the fit_predict on pipeline yields same results as applying
    # transform and clustering steps separately
    iris = load_iris()
    scaler = StandardScaler()
    km = KMeans(random_state=0)
    # As pipeline doesn't clone estimators on construction,
    # it must have its own estimators
    scaler_for_pipeline = StandardScaler()
    km_for_pipeline = KMeans(random_state=0)

    # first compute the transform and clustering step separately
    scaled = scaler.fit_transform(iris.data)
    separate_pred = km.fit_predict(scaled)

    # use a pipeline to do the transform and clustering in one step
    pipe = Pipeline([
        ('scaler', scaler_for_pipeline),
        ('Kmeans', km_for_pipeline)
    ])
    pipeline_pred = pipe.fit_predict(iris.data)

    assert_array_almost_equal(pipeline_pred, separate_pred) 
Example 18
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_k_means.py    License: MIT License 6 votes vote down vote up
def test_kmeans_results(representation, algo, dtype):
    # cheks that kmeans works as intended
    array_constr = {'dense': np.array, 'sparse': sp.csr_matrix}[representation]
    X = array_constr([[0, 0], [0.5, 0], [0.5, 1], [1, 1]], dtype=dtype)
    sample_weight = [3, 1, 1, 3]  # will be rescaled to [1.5, 0.5, 0.5, 1.5]
    init_centers = np.array([[0, 0], [1, 1]], dtype=dtype)

    expected_labels = [0, 0, 1, 1]
    expected_inertia = 0.1875
    expected_centers = np.array([[0.125, 0], [0.875, 1]], dtype=dtype)
    expected_n_iter = 2

    kmeans = KMeans(n_clusters=2, n_init=1, init=init_centers, algorithm=algo)
    kmeans.fit(X, sample_weight=sample_weight)

    assert_array_equal(kmeans.labels_, expected_labels)
    assert_almost_equal(kmeans.inertia_, expected_inertia)
    assert_array_almost_equal(kmeans.cluster_centers_, expected_centers)
    assert kmeans.n_iter_ == expected_n_iter 
Example 19
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_k_means.py    License: MIT License 6 votes vote down vote up
def test_elkan_results(distribution):
    # check that results are identical between lloyd and elkan algorithms
    rnd = np.random.RandomState(0)
    if distribution == 'normal':
        X = rnd.normal(size=(50, 10))
    else:
        X, _ = make_blobs(random_state=rnd)

    km_full = KMeans(algorithm='full', n_clusters=5, random_state=0, n_init=1)
    km_elkan = KMeans(algorithm='elkan', n_clusters=5,
                      random_state=0, n_init=1)

    km_full.fit(X)
    km_elkan.fit(X)
    assert_array_almost_equal(km_elkan.cluster_centers_,
                              km_full.cluster_centers_)
    assert_array_equal(km_elkan.labels_, km_full.labels_) 
Example 20
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_k_means.py    License: MIT License 6 votes vote down vote up
def test_k_means_non_collapsed():
    # Check k_means with a bad initialization does not yield a singleton
    # Starting with bad centers that are quickly ignored should not
    # result in a repositioning of the centers to the center of mass that
    # would lead to collapsed centers which in turns make the clustering
    # dependent of the numerical unstabilities.
    my_X = np.array([[1.1, 1.1], [0.9, 1.1], [1.1, 0.9], [0.9, 1.1]])
    array_init = np.array([[1.0, 1.0], [5.0, 5.0], [-5.0, -5.0]])
    km = KMeans(init=array_init, n_clusters=3, random_state=42, n_init=1)
    km.fit(my_X)

    # centers must not been collapsed
    assert_equal(len(np.unique(km.labels_)), 3)

    centers = km.cluster_centers_
    assert np.linalg.norm(centers[0] - centers[1]) >= 0.1
    assert np.linalg.norm(centers[0] - centers[2]) >= 0.1
    assert np.linalg.norm(centers[1] - centers[2]) >= 0.1 
Example 21
Project: discomll   Author: romanorac   File: tests_clustering.py    License: Apache License 2.0 5 votes vote down vote up
def test_kmeans_iris(self):
        # python -m unittest tests_clustering.Tests_Clustering.test_kmeans_iris
        from discomll.clustering import kmeans
        from sklearn.cluster import KMeans

        max_iter = 10
        clusters = 3
        random_seed = 0

        x_train, y_train, x_test, y_test = datasets.iris()
        train_data, test_data = datasets.iris_discomll()

        sk_kmeans = KMeans(n_clusters=clusters, max_iter=max_iter, n_init=1, random_state=random_seed).fit(x_train)
        centroids1 = sk_kmeans.cluster_centers_
        # predictions1 = sk_kmeans.predict(x_test)

        centroids_url = kmeans.fit(train_data,
                                   n_clusters=clusters,
                                   max_iterations=max_iter,
                                   random_state=random_seed)

        predictions_url = kmeans.predict(test_data, centroids_url)
        # predictions2 = [v[1] for k,v in result_iterator(predictions_url)]

        centroids2 = [v["x"] for k, v in result_iterator(centroids_url["kmeans_fitmodel"])]
        centroids2[0], centroids2[2] = centroids2[2], centroids2[0]
        self.assertTrue(np.allclose(centroids1, centroids2)) 
Example 22
Project: discomll   Author: romanorac   File: tests_clustering.py    License: Apache License 2.0 5 votes vote down vote up
def test_kmeans_breastcancer(self):
        # python -m unittest tests_clustering.Tests_Clustering.test_kmeans_breastcancer
        from discomll.clustering import kmeans
        from sklearn.cluster import KMeans

        max_iter = 10
        clusters = 2
        random_seed = 2

        x_train, _, x_test, _ = datasets.breastcancer_disc()
        train_data, test_data = datasets.breastcancer_disc_discomll()

        kmeans2 = KMeans(n_clusters=clusters, max_iter=max_iter, n_init=1, random_state=random_seed).fit(x_train)
        centroids1 = kmeans2.cluster_centers_
        predictions1 = kmeans2.predict(x_test)

        centroids_url = kmeans.fit(train_data,
                                   n_clusters=clusters,
                                   max_iterations=max_iter,
                                   random_state=random_seed)

        predictions_url = kmeans.predict(test_data, centroids_url)
        predictions2 = [v[0] for k, v in result_iterator(predictions_url)]
        centroids2 = [v["x"] for k, v in result_iterator(centroids_url["kmeans_fitmodel"])]

        centroids2[0], centroids2[1] = centroids2[1], centroids2[0]

        self.assertTrue(np.allclose(centroids1, centroids2)) 
Example 23
Project: differential-privacy-library   Author: IBM   File: k_means.py    License: MIT License 5 votes vote down vote up
def _update_centers(self, X, centers, labels, dims, total_iters):
        """Updates the centers of the KMeans algorithm for the current iteration, while satisfying differential
        privacy.

        Differential privacy is satisfied by adding (integer-valued, using :class:`.GeometricFolded`) random noise to
        the count of nearest neighbours to the previous cluster centers, and adding (real-valued, using
        :class:`.LaplaceBoundedDomain`) random noise to the sum of values per dimension.

        """
        epsilon_0, epsilon_i = self._split_epsilon(dims, total_iters)
        geometric_mech = GeometricFolded().set_sensitivity(1).set_bounds(0.5, float("inf")).set_epsilon(epsilon_0)
        laplace_mech = LaplaceBoundedDomain().set_epsilon(epsilon_i)

        for cluster in range(self.n_clusters):
            if cluster not in labels:
                continue

            cluster_count = sum(labels == cluster)
            noisy_count = geometric_mech.randomise(cluster_count)

            cluster_sum = np.sum(X[labels == cluster], axis=0)
            noisy_sum = np.zeros_like(cluster_sum)

            for i in range(dims):
                laplace_mech.set_sensitivity(self.bounds[1][i] - self.bounds[0][i]) \
                    .set_bounds(noisy_count * self.bounds[0][i], noisy_count * self.bounds[1][i])
                noisy_sum[i] = laplace_mech.randomise(cluster_sum[i])

            centers[cluster, :] = noisy_sum / noisy_count

        return centers 
Example 24
Project: differential-privacy-library   Author: IBM   File: k_means.py    License: MIT License 5 votes vote down vote up
def _calc_iters(self, n_dims, n_samples, rho=0.225):
        """Calculate the number of iterations to allow for the KMeans algorithm."""

        epsilon_m = np.sqrt(500 * (self.n_clusters ** 3) / (n_samples ** 2) *
                            (n_dims + np.cbrt(4 * n_dims * (rho ** 2))) ** 3)

        iters = max(min(self.epsilon / epsilon_m, 7), 2)

        return int(iters) 
Example 25
Project: recordlinkage   Author: J535D165   File: classifiers.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self,
                 match_cluster_center=None,
                 nonmatch_cluster_center=None,
                 **kwargs):
        super(KMeansClassifier, self).__init__()

        # initialize the classifier
        self.kernel = cluster.KMeans(n_clusters=2, n_init=1, **kwargs)

        # set cluster centers if available
        self.match_cluster_center = match_cluster_center
        self.nonmatch_cluster_center = nonmatch_cluster_center 
Example 26
Project: ParametricGP   Author: maziarraissi   File: parametric_GP.py    License: MIT License 5 votes vote down vote up
def __init__(self, X, y, M=10, max_iter = 2000, N_batch = 1, 
                 monitor_likelihood = 10, lrate = 1e-3):
        (N,D) = X.shape
        N_subset = min(N, 10000)
        idx = np.random.choice(N, N_subset, replace=False)
        kmeans = KMeans(n_clusters=M, random_state=0).fit(X[idx,:])
        Z = kmeans.cluster_centers_
    
        hyp = np.log(np.ones(D+1))
        logsigma_n = np.array([-4.0])
        hyp = np.concatenate([hyp, logsigma_n])
    
        m = np.zeros((M,1))
        S = kernel(Z,Z,hyp[:-1])

        self.X = X
        self.y = y
        
        self.M = M
        self.Z = Z
        self.m = m
        self.S = S
        
        self.hyp= hyp
        
        self.max_iter = max_iter
        self.N_batch = N_batch
        self.monitor_likelihood = monitor_likelihood
        self.jitter = 1e-8
        self.jitter_cov = 1e-8
        
        # Adam optimizer parameters
        self.mt_hyp = np.zeros(hyp.shape)
        self.vt_hyp = np.zeros(hyp.shape)
        self.lrate = lrate 
Example 27
Project: mabwiser   Author: fidelity   File: clusters.py    License: Apache License 2.0 5 votes vote down vote up
def __init__(self, rng: _BaseRNG, arms: List[Arm], n_jobs: int, backend: Optional[str],
                 lp: Union[_EpsilonGreedy, _Linear, _Random, _Softmax, _ThompsonSampling, _UCB1],
                 n_clusters: Num, is_minibatch: bool):
        super().__init__(rng, arms, n_jobs, backend)

        self.n_clusters = n_clusters

        if is_minibatch:
            self.kmeans = MiniBatchKMeans(n_clusters, random_state=rng.seed)
        else:
            self.kmeans = KMeans(n_clusters, random_state=rng.seed)

        # Create the list of learning policies for each cluster
        # Deep copy all parameters of the lp objects, except refer to the originals of rng and arms
        self.lp_list = [deepcopy(lp) for _ in range(self.n_clusters)]
        for c in range(self.n_clusters):
            self.lp_list[c].rng = rng
            self.lp_list[c].arms = arms

        self.decisions = None
        self.rewards = None
        self.contexts = None

        # Initialize the arm expectations to nan
        # When there are neighbors, expectations of the underlying learning policy is used
        # When there are no neighbors, return nan expectations
        reset(self.arm_to_expectation, np.nan) 
Example 28
Project: interactive-deep-colorization   Author: junyanz   File: colorize_image.py    License: MIT License 5 votes vote down vote up
def get_ab_reccs(self, h, w, K=5, N=25000, return_conf=False):
        ''' Recommended colors at point (h,w)
        Call this after calling net_forward
        '''
        if not self.dist_ab_set:
            print('Need to set prediction first')
            return 0

        # randomly sample from pdf
        cmf = np.cumsum(self.dist_ab[:, h, w])  # CMF
        cmf = cmf / cmf[-1]
        cmf_bins = cmf

        # randomly sample N points
        rnd_pts = np.random.uniform(low=0, high=1.0, size=N)
        inds = np.digitize(rnd_pts, bins=cmf_bins)
        rnd_pts_ab = self.pts_in_hull[inds, :]

        # run k-means
        kmeans = KMeans(n_clusters=K).fit(rnd_pts_ab)

        # sort by cluster occupancy
        k_label_cnt = np.histogram(kmeans.labels_, np.arange(0, K + 1))[0]
        k_inds = np.argsort(k_label_cnt, axis=0)[::-1]

        cluster_per = 1. * k_label_cnt[k_inds] / N  # percentage of points within cluster
        cluster_centers = kmeans.cluster_centers_[k_inds, :]  # cluster centers

        # cluster_centers = np.random.uniform(low=-100,high=100,size=(N,2))
        if return_conf:
            return cluster_centers, cluster_per
        else:
            return cluster_centers 
Example 29
Project: interactive-deep-colorization   Author: junyanz   File: colorize_image.py    License: MIT License 5 votes vote down vote up
def get_ab_reccs(self, h, w, K=5, N=25000, return_conf=False):
        ''' Recommended colors at point (h,w)
        Call this after calling net_forward
        '''
        if not self.dist_ab_set:
            print('Need to set prediction first')
            return 0

        # randomly sample from pdf
        cmf = np.cumsum(self.dist_ab[:, h, w])  # CMF
        cmf = cmf / cmf[-1]
        cmf_bins = cmf

        # randomly sample N points
        rnd_pts = np.random.uniform(low=0, high=1.0, size=N)
        inds = np.digitize(rnd_pts, bins=cmf_bins)
        rnd_pts_ab = self.pts_in_hull[inds, :]

        # run k-means
        kmeans = KMeans(n_clusters=K).fit(rnd_pts_ab)

        # sort by cluster occupancy
        k_label_cnt = np.histogram(kmeans.labels_, np.arange(0, K + 1))[0]
        k_inds = np.argsort(k_label_cnt, axis=0)[::-1]

        cluster_per = 1. * k_label_cnt[k_inds] / N  # percentage of points within cluster
        cluster_centers = kmeans.cluster_centers_[k_inds, :]  # cluster centers

        # cluster_centers = np.random.uniform(low=-100,high=100,size=(N,2))
        if return_conf:
            return cluster_centers, cluster_per
        else:
            return cluster_centers 
Example 30
Project: ImageColorTheme   Author: rainyear   File: KMeans.py    License: MIT License 5 votes vote down vote up
def __init__(self, pixData, maxColor, useSklearn=True):
        super(KMeans, self).__init__()
        h, w, d = pixData.shape
        self.pixData = np.reshape(pixData, (h * w, d))
        self.maxColor = maxColor
        if useSklearn:
            self._KMeans = KM(n_clusters = maxColor)
        else:
            self._KMeans = KMDiy(n_clusters = maxColor)