Python sklearn.cluster.SpectralClustering() Examples

The following are 23 code examples of sklearn.cluster.SpectralClustering(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.cluster , or try the search function .
Example #1
Source File: test_spectral.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_spectral_clustering(eigen_solver, assign_labels):
    S = np.array([[1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0],
                  [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0],
                  [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0],
                  [0.2, 0.2, 0.2, 1.0, 1.0, 1.0, 1.0],
                  [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0],
                  [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0],
                  [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]])

    for mat in (S, sparse.csr_matrix(S)):
        model = SpectralClustering(random_state=0, n_clusters=2,
                                   affinity='precomputed',
                                   eigen_solver=eigen_solver,
                                   assign_labels=assign_labels
                                   ).fit(mat)
        labels = model.labels_
        if labels[0] == 0:
            labels = 1 - labels

        assert adjusted_rand_score(labels, [1, 1, 1, 0, 0, 0, 0]) == 1

        model_copy = pickle.loads(pickle.dumps(model))
        assert model_copy.n_clusters == model.n_clusters
        assert model_copy.eigen_solver == model.eigen_solver
        assert_array_equal(model_copy.labels_, model.labels_) 
Example #2
Source File: metric.py    From MvDSCN with MIT License 6 votes vote down vote up
def post_proC(C, K, d, alpha):
    # C: coefficient matrix, K: number of clusters, d: dimension of each subspace
    C = 0.5*(C + C.T)
    r = min(d*K + 1, C.shape[0]-1)      
    U, S, _ = svds(C, r, v0=np.ones(C.shape[0]))
    U = U[:,::-1]    
    S = np.sqrt(S[::-1])
    S = np.diag(S)    
    U = U.dot(S)    
    U = normalize(U, norm='l2', axis = 1)       
    Z = U.dot(U.T)
    Z = Z * (Z>0)    
    L = np.abs(Z ** alpha) 
    L = L/L.max()   
    L = 0.5 * (L + L.T)    
    spectral = cluster.SpectralClustering(n_clusters=K, eigen_solver='arpack', affinity='precomputed', assign_labels='discretize', random_state=66)
    spectral.fit(L)
    grp = spectral.fit_predict(L) + 1
    return grp, L 
Example #3
Source File: clusterings.py    From parcellation_fragmenter with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def spectral_clustering(n_clusters, samples, size=False):

    """
    Run k-means clustering on vertex coordinates.

    Parameters:
    - - - - -
    n_clusters : int
        number of clusters to generate
    samples : array
        adjacency matrix of surface or region
    """

    # Run Spectral Clustering
    spectral = cluster.SpectralClustering(
        n_clusters=n_clusters, affinity='precomputed')
    spectral.fit(samples)

    labels = spectral.labels_.copy()
    labels = labels.astype(np.int32)+1

    return labels 
Example #4
Source File: test_spectral.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_spectral_clustering_sparse():
    X, y = make_blobs(n_samples=20, random_state=0,
                      centers=[[1, 1], [-1, -1]], cluster_std=0.01)

    S = rbf_kernel(X, gamma=1)
    S = np.maximum(S - 1e-4, 0)
    S = sparse.coo_matrix(S)

    labels = SpectralClustering(random_state=0, n_clusters=2,
                                affinity='precomputed').fit(S).labels_
    assert adjusted_rand_score(y, labels) == 1 
Example #5
Source File: test_spectral.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_spectral_clustering_sparse():
    X, y = make_blobs(n_samples=20, random_state=0,
                      centers=[[1, 1], [-1, -1]], cluster_std=0.01)

    S = rbf_kernel(X, gamma=1)
    S = np.maximum(S - 1e-4, 0)
    S = sparse.coo_matrix(S)

    labels = SpectralClustering(random_state=0, n_clusters=2,
                                affinity='precomputed').fit(S).labels_
    assert_equal(adjusted_rand_score(y, labels), 1) 
Example #6
Source File: test_spectral.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_spectral_unknown_assign_labels():
    # Test that SpectralClustering fails with an unknown assign_labels set.
    centers = np.array([
        [0., 0., 0.],
        [10., 10., 10.],
        [20., 20., 20.],
    ])
    X, true_labels = make_blobs(n_samples=100, centers=centers,
                                cluster_std=1., random_state=42)
    D = pairwise_distances(X)  # Distance matrix
    S = np.max(D) - D  # Similarity matrix
    S = sparse.coo_matrix(S)
    assert_raises(ValueError, spectral_clustering, S, n_clusters=2,
                  random_state=0, assign_labels="<unknown>") 
Example #7
Source File: test_spectral.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_spectral_unknown_mode():
    # Test that SpectralClustering fails with an unknown mode set.
    centers = np.array([
        [0., 0., 0.],
        [10., 10., 10.],
        [20., 20., 20.],
    ])
    X, true_labels = make_blobs(n_samples=100, centers=centers,
                                cluster_std=1., random_state=42)
    D = pairwise_distances(X)  # Distance matrix
    S = np.max(D) - D  # Similarity matrix
    S = sparse.coo_matrix(S)
    assert_raises(ValueError, spectral_clustering, S, n_clusters=2,
                  random_state=0, eigen_solver="<unknown>") 
Example #8
Source File: test_spectral.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_spectral_amg_mode():
    # Test the amg mode of SpectralClustering
    centers = np.array([
        [0., 0., 0.],
        [10., 10., 10.],
        [20., 20., 20.],
    ])
    X, true_labels = make_blobs(n_samples=100, centers=centers,
                                cluster_std=1., random_state=42)
    D = pairwise_distances(X)  # Distance matrix
    S = np.max(D) - D  # Similarity matrix
    S = sparse.coo_matrix(S)
    try:
        from pyamg import smoothed_aggregation_solver  # noqa

        amg_loaded = True
    except ImportError:
        amg_loaded = False
    if amg_loaded:
        labels = spectral_clustering(S, n_clusters=len(centers),
                                     random_state=0, eigen_solver="amg")
        # We don't care too much that it's good, just that it *worked*.
        # There does have to be some lower limit on the performance though.
        assert_greater(np.mean(labels == true_labels), .3)
    else:
        assert_raises(ValueError, spectral_embedding, S,
                      n_components=len(centers),
                      random_state=0, eigen_solver="amg") 
Example #9
Source File: test_spectral.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_spectral_clustering():
    S = np.array([[1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0],
                  [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0],
                  [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0],
                  [0.2, 0.2, 0.2, 1.0, 1.0, 1.0, 1.0],
                  [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0],
                  [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0],
                  [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]])

    for eigen_solver in ('arpack', 'lobpcg'):
        for assign_labels in ('kmeans', 'discretize'):
            for mat in (S, sparse.csr_matrix(S)):
                model = SpectralClustering(random_state=0, n_clusters=2,
                                           affinity='precomputed',
                                           eigen_solver=eigen_solver,
                                           assign_labels=assign_labels
                                          ).fit(mat)
                labels = model.labels_
                if labels[0] == 0:
                    labels = 1 - labels

                assert_array_equal(labels, [1, 1, 1, 0, 0, 0, 0])

                model_copy = loads(dumps(model))
                assert_equal(model_copy.n_clusters, model.n_clusters)
                assert_equal(model_copy.eigen_solver, model.eigen_solver)
                assert_array_equal(model_copy.labels_, model.labels_) 
Example #10
Source File: sklearn_cluster.py    From learn-to-cluster with MIT License 5 votes vote down vote up
def dask_spectral(feat, n_clusters, **kwargs):
    from dask_ml.cluster import SpectralClustering
    spectral = SpectralClustering(n_clusters=n_clusters,
                                  affinity='rbf',
                                  random_state=0).fit(feat)
    return spectral.labels_.compute() 
Example #11
Source File: sklearn_cluster.py    From learn-to-cluster with MIT License 5 votes vote down vote up
def spectral(feat, n_clusters, **kwargs):
    spectral = cluster.SpectralClustering(n_clusters=n_clusters,
                                          assign_labels="discretize",
                                          affinity="nearest_neighbors",
                                          random_state=0).fit(feat)
    return spectral.labels_ 
Example #12
Source File: compare_clustering_algs.py    From mmvt with GNU General Public License v3.0 5 votes vote down vote up
def compare(data, n_groups, output_fol):
    # plot_clusters(data.astype(np.float), scipy.cluster.vq.kmeans, 'scipy.cluster.vq.kmeans', output_fol, (n_groups,), {})
    plot_clusters(data, cluster.KMeans, 'KMeans', output_fol, (), {'n_clusters': n_groups})
    for ct in ['spherical', 'tied', 'diag', 'full']:
        plot_clusters(data, mixture.GaussianMixture, 'GMM_{}'.format(ct), output_fol, (),
                      {'n_components': n_groups, 'covariance_type': ct})
    plot_clusters(data, cluster.AffinityPropagation, 'AffinityPropagation', output_fol, (), {'preference': -5.0, 'damping': 0.95})
    plot_clusters(data, cluster.MeanShift, 'MeanShift', output_fol, (0.175,), {'cluster_all': False})
    plot_clusters(data, cluster.SpectralClustering, 'SpectralClustering', output_fol, (), {'n_clusters': n_groups})
    plot_clusters(data, cluster.AgglomerativeClustering, 'AgglomerativeClustering', output_fol, (), {'n_clusters': n_groups, 'linkage': 'ward'})
    plot_clusters(data, cluster.DBSCAN, 'DBSCAN', output_fol, (), {'eps': 0.025})
    # plot_clusters(data, hdbscan.HDBSCAN, 'HDBSCAN', output_fol, (), {'min_cluster_size': 15}) 
Example #13
Source File: create_endpoints_mask_with_clustering.py    From TractSeg with Apache License 2.0 5 votes vote down vote up
def cluster(points, algorithm=DBSCAN):
    print("Running {}...".format(algorithm))
    if algorithm == "KMeans":
        # not good at finding clusters if close together
        labels = KMeans(n_clusters=2, random_state=0, n_jobs=-1).fit_predict(points)
    elif algorithm == "DBSCAN":
        # no fixed number of labels; slow with high eps
        labels = DBSCAN(eps=3.0, n_jobs=-1).fit_predict(points)
    # labels = SpectralClustering(n_clusters=2, n_jobs=-1).fit_predict(points)  # slow (> 1min)
    # labels = AgglomerativeClustering(n_clusters=2).fit_predict(points)  # fast
    points_start, points_end = select_two_biggest_clusters(labels, points)
    return points_start, points_end 
Example #14
Source File: SpectralClustering.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            floats=['gamma'],
            strs=['affinity'],
            ints=['k', 'random_state'],
            aliases={'k': 'n_clusters'},
        )

        self.estimator = _SpectralClustering(**out_params)
        self.scaler = StandardScaler() 
Example #15
Source File: graphTools.py    From graph-neural-networks with GNU General Public License v3.0 5 votes vote down vote up
def computeSourceNodes(A, C):
    """
    computeSourceNodes: compute source nodes for the source localization problem
    
    Input:
        A (np.array): adjacency matrix of shape N x N
        C (int): number of classes
        
    Output:
        sourceNodes (list): contains the indices of the C source nodes
        
    Uses the adjacency matrix to compute C communities by means of spectral 
    clustering, and then selects the node with largest degree within each 
    community
    """
    sourceNodes = []
    degree = np.sum(A, axis = 0) # degree of each vector
    # Compute communities
    communityClusters = SpectralClustering(n_clusters = C,
                                           affinity = 'precomputed',
                                           assign_labels = 'discretize')
    communityClusters = communityClusters.fit(A)
    communityLabels = communityClusters.labels_
    # For each community
    for c in range(C):
        communityNodes = np.nonzero(communityLabels == c)[0]
        degreeSorted = np.argsort(degree[communityNodes])
        sourceNodes = sourceNodes + [communityNodes[degreeSorted[-1]]]
    
    return sourceNodes 
Example #16
Source File: region_growing.py    From pyImSegm with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def transform_rays_model_cdf_spectral(list_rays, nb_components=5):
    """ compute the mixture model and transform it into cumulative distribution

    :param list(list(int)) list_rays: list ray features (distances)
    :param int nb_components: number components in mixture model
    :return tuple(any,list(list(int))):  mixture model, list of stat/param of models

    >>> np.random.seed(0)
    >>> list_rays = [[9, 4, 9], [4, 9, 7], [9, 7, 11], [10, 8, 10],
    ...              [9, 11, 8], [4, 8, 5], [8, 10, 6], [9, 7, 11]]
    >>> mm, cdist = transform_rays_model_cdf_spectral(list_rays)
    >>> np.round(cdist, 1).tolist()  # doctest: +NORMALIZE_WHITESPACE
    [[1.0, 1.0, 1.0, 1.0, 1.0, 0.9, 0.8, 0.6, 0.5, 0.2, 0.0],
     [1.0, 1.0, 1.0, 1.0, 1.0, 0.9, 0.9, 0.7, 0.5, 0.2, 0.0],
     [1.0, 1.0, 1.0, 1.0, 1.0, 0.9, 0.8, 0.7, 0.5, 0.3, 0.0]]
    """
    rays = np.array(list_rays)
    sc = cluster.SpectralClustering(nb_components)
    sc.fit(rays)
    logging.debug('SpectralClustering found % components with counts: %r',
                  len(np.unique(sc.labels_)), np.bincount(sc.labels_))

    labels = sc.labels_
    means = np.zeros((len(np.unique(labels)), rays.shape[1]))
    stds = np.zeros((len(means), rays.shape[1]))
    for i, lb in enumerate(np.unique(labels)):
        means[i, :] = np.mean(np.asarray(list_rays)[labels == lb], axis=0)
        means[i, :] = ndimage.filters.gaussian_filter1d(means[i, :], 1)
        stds[i, :] = np.std(np.asarray(list_rays)[labels == lb], axis=0)
    stds += 1
    weights = np.bincount(sc.labels_) / float(len(sc.labels_))

    # compute the fairest mean + sigma over all components and ray angles
    max_dist = np.max([[m[i] + c[i] for i in range(len(m))]
                       for m, c in zip(means, stds)])

    cdist = compute_cumulative_distrib(means, stds, weights, max_dist)
    return sc, cdist.tolist() 
Example #17
Source File: test_spectral.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_spectral_unknown_mode():
    # Test that SpectralClustering fails with an unknown mode set.
    centers = np.array([
        [0., 0., 0.],
        [10., 10., 10.],
        [20., 20., 20.],
    ])
    X, true_labels = make_blobs(n_samples=100, centers=centers,
                                cluster_std=1., random_state=42)
    D = pairwise_distances(X)  # Distance matrix
    S = np.max(D) - D  # Similarity matrix
    S = sparse.coo_matrix(S)
    assert_raises(ValueError, spectral_clustering, S, n_clusters=2,
                  random_state=0, eigen_solver="<unknown>") 
Example #18
Source File: scdiff.py    From scdiff with MIT License 5 votes vote down vote up
def performClustering(self):
		print('start clustering...')
		KET=self.KET
		# default clustering model
		[dCK,dBS]=self.getClusteringPars()
		#pdb.set_trace()
		AC=[]
		gc.collect()
		for i in range(len(KET)):
			print("clustering for time: "+str(KET[i]))
			ti=KET[i]
			CT = self.dET[ti]
			CKT=dCK[ti]
			BST=dBS[ti]
			
			if CKT > 1:
				if (self.largeType=='1' or self.largeType=='True'):
					X=copy.deepcopy(self.affMatrix[ti])
					SC = KMeans(n_clusters=CKT, random_state=BST)
				else:
					X=copy.deepcopy(self.affMatrix[ti])
					SC = SpectralClustering(n_clusters=CKT, random_state=BST)
				
				
				SC.fit(X)
				Y = SC.labels_
				
				for j in range(len(CT)):
					CT[j].Label = Y[j]
				CC = [Cluster([item for item in CT if item.Label == j], ti, str(ti) + '_' + str(j)) for j in range(CKT)]
				AC += CC
			else:
				for j in range(len(CT)):
					CT[j].Label = 0
				CC = [Cluster([item for item in CT if item.Label == 0], ti, str(ti)+'_'+str(0))]
				AC += CC
		return AC

# cluster 
Example #19
Source File: baseline_clustering.py    From cdp with MIT License 5 votes vote down vote up
def spectral(feat, n_clusters=2):
    spectral = cluster.SpectralClustering(n_clusters=n_clusters,
                                        assign_labels="discretize",
                                        affinity="nearest_neighbors",
                                        random_state=0).fit(feat)
    return spectral.labels_ 
Example #20
Source File: test_spectral.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_spectral_unknown_assign_labels():
    # Test that SpectralClustering fails with an unknown assign_labels set.
    centers = np.array([
        [0., 0., 0.],
        [10., 10., 10.],
        [20., 20., 20.],
    ])
    X, true_labels = make_blobs(n_samples=100, centers=centers,
                                cluster_std=1., random_state=42)
    D = pairwise_distances(X)  # Distance matrix
    S = np.max(D) - D  # Similarity matrix
    S = sparse.coo_matrix(S)
    assert_raises(ValueError, spectral_clustering, S, n_clusters=2,
                  random_state=0, assign_labels="<unknown>") 
Example #21
Source File: scdiff.py    From scdiff with MIT License 4 votes vote down vote up
def determineSeed(self,dCK):
		#return {14.0:0,16.0:0,18.0:0}
		print("learning clustering seeds...")
		dBS = {}  # Best seeds
		KET=self.KET
		NSEEDS=100 if self.largeType ==None else 1 #100
		SPECTRALIMIT=100
		
		for T in KET[1:]:
			try:
				CT = self.dET[T]
				CKi = dCK[T]
				SS=[]
				if self.largeType=='1' or self.largeType=='True':
					X=copy.deepcopy(self.affMatrix[T])
					SEEDS = range(NSEEDS)
					for s in SEEDS:
						SC = KMeans(n_clusters=CKi)
						SC.fit(X)
						Y = SC.labels_
						sscore = silhouette_score(X, Y)
						SS.append(sscore)
						print("seeds:"+str(s))
					sbest = SEEDS[SS.index(max(SS))]
					dBS[T] = sbest
				else:
					X=copy.deepcopy(self.affMatrix[T])
					DX=self.affinity2Distance(X)
					SEEDS = range(NSEEDS)
					for s in SEEDS:
						SC = SpectralClustering(n_clusters=CKi, random_state=s)
						SC.fit(X)
						Y = SC.labels_
						sscore = silhouette_score(DX, Y, metric="precomputed")
						SS.append(sscore)
						print("seeds:"+str(s))
					sbest = SEEDS[SS.index(max(SS))]
					dBS[T] = sbest
			except:
				dBS[T]=0
		dBS[KET[0]] = 0
		return dBS 
Example #22
Source File: test_spectral.py    From Mastering-Elasticsearch-7.0 with MIT License 4 votes vote down vote up
def test_affinities():
    # Note: in the following, random_state has been selected to have
    # a dataset that yields a stable eigen decomposition both when built
    # on OSX and Linux
    X, y = make_blobs(n_samples=20, random_state=0,
                      centers=[[1, 1], [-1, -1]], cluster_std=0.01
                     )
    # nearest neighbors affinity
    sp = SpectralClustering(n_clusters=2, affinity='nearest_neighbors',
                            random_state=0)
    assert_warns_message(UserWarning, 'not fully connected', sp.fit, X)
    assert adjusted_rand_score(y, sp.labels_) == 1

    sp = SpectralClustering(n_clusters=2, gamma=2, random_state=0)
    labels = sp.fit(X).labels_
    assert adjusted_rand_score(y, labels) == 1

    X = check_random_state(10).rand(10, 5) * 10

    kernels_available = kernel_metrics()
    for kern in kernels_available:
        # Additive chi^2 gives a negative similarity matrix which
        # doesn't make sense for spectral clustering
        if kern != 'additive_chi2':
            sp = SpectralClustering(n_clusters=2, affinity=kern,
                                    random_state=0)
            labels = sp.fit(X).labels_
            assert (X.shape[0],) == labels.shape

    sp = SpectralClustering(n_clusters=2, affinity=lambda x, y: 1,
                            random_state=0)
    labels = sp.fit(X).labels_
    assert (X.shape[0],) == labels.shape

    def histogram(x, y, **kwargs):
        # Histogram kernel implemented as a callable.
        assert_equal(kwargs, {})    # no kernel_params that we didn't ask for
        return np.minimum(x, y).sum()

    sp = SpectralClustering(n_clusters=2, affinity=histogram, random_state=0)
    labels = sp.fit(X).labels_
    assert (X.shape[0],) == labels.shape

    # raise error on unknown affinity
    sp = SpectralClustering(n_clusters=2, affinity='<unknown>')
    assert_raises(ValueError, sp.fit, X) 
Example #23
Source File: test_cluster.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 3 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.cluster.AffinityPropagation, cluster.AffinityPropagation)
        self.assertIs(df.cluster.AgglomerativeClustering, cluster.AgglomerativeClustering)
        self.assertIs(df.cluster.Birch, cluster.Birch)
        self.assertIs(df.cluster.DBSCAN, cluster.DBSCAN)
        self.assertIs(df.cluster.FeatureAgglomeration, cluster.FeatureAgglomeration)
        self.assertIs(df.cluster.KMeans, cluster.KMeans)
        self.assertIs(df.cluster.MiniBatchKMeans, cluster.MiniBatchKMeans)
        self.assertIs(df.cluster.MeanShift, cluster.MeanShift)
        self.assertIs(df.cluster.SpectralClustering, cluster.SpectralClustering)

        self.assertIs(df.cluster.bicluster.SpectralBiclustering,
                      cluster.bicluster.SpectralBiclustering)
        self.assertIs(df.cluster.bicluster.SpectralCoclustering,
                      cluster.bicluster.SpectralCoclustering)