Python sklearn.cluster.spectral_clustering() Examples

The following are 13 code examples of sklearn.cluster.spectral_clustering(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.cluster , or try the search function .
Example #1
Source File: test_spectral.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_spectral_unknown_mode():
    # Test that SpectralClustering fails with an unknown mode set.
    centers = np.array([
        [0., 0., 0.],
        [10., 10., 10.],
        [20., 20., 20.],
    ])
    X, true_labels = make_blobs(n_samples=100, centers=centers,
                                cluster_std=1., random_state=42)
    D = pairwise_distances(X)  # Distance matrix
    S = np.max(D) - D  # Similarity matrix
    S = sparse.coo_matrix(S)
    assert_raises(ValueError, spectral_clustering, S, n_clusters=2,
                  random_state=0, eigen_solver="<unknown>") 
Example #2
Source File: test_spectral.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_spectral_unknown_assign_labels():
    # Test that SpectralClustering fails with an unknown assign_labels set.
    centers = np.array([
        [0., 0., 0.],
        [10., 10., 10.],
        [20., 20., 20.],
    ])
    X, true_labels = make_blobs(n_samples=100, centers=centers,
                                cluster_std=1., random_state=42)
    D = pairwise_distances(X)  # Distance matrix
    S = np.max(D) - D  # Similarity matrix
    S = sparse.coo_matrix(S)
    assert_raises(ValueError, spectral_clustering, S, n_clusters=2,
                  random_state=0, assign_labels="<unknown>") 
Example #3
Source File: test_spectral.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_spectral_clustering_with_arpack_amg_solvers():
    # Test that spectral_clustering is the same for arpack and amg solver
    # Based on toy example from plot_segmentation_toy.py

    # a small two coin image
    x, y = np.indices((40, 40))

    center1, center2 = (14, 12), (20, 25)
    radius1, radius2 = 8, 7

    circle1 = (x - center1[0]) ** 2 + (y - center1[1]) ** 2 < radius1 ** 2
    circle2 = (x - center2[0]) ** 2 + (y - center2[1]) ** 2 < radius2 ** 2

    circles = circle1 | circle2
    mask = circles.copy()
    img = circles.astype(float)

    graph = img_to_graph(img, mask=mask)
    graph.data = np.exp(-graph.data / graph.data.std())

    labels_arpack = spectral_clustering(
        graph, n_clusters=2, eigen_solver='arpack', random_state=0)

    assert len(np.unique(labels_arpack)) == 2

    if amg_loaded:
        labels_amg = spectral_clustering(
            graph, n_clusters=2, eigen_solver='amg', random_state=0)
        assert adjusted_rand_score(labels_arpack, labels_amg) == 1
    else:
        assert_raises(
            ValueError, spectral_clustering,
            graph, n_clusters=2, eigen_solver='amg', random_state=0) 
Example #4
Source File: graph_clustering.py    From Hydra with MIT License 5 votes vote down vote up
def _spectral_solver(outer_stress,
                     serving_tasks,
                     task_groups,
                     n_clusters=2):
    """Graph clusterization using spectral methods.

    Args:
      outer_stress:   a list of tuples of outer measurements info:
                      [(task_id_i, task_id_j, stress_value)]
                      returned by `applications.measures.stress_points`.
      serving_tasks:  list of tasks that the current branch is serving.
      task_groups:    these are list of list of tasks of children nodes.
      n_clusters:     number of clusters to divide to.

    Returns:
      a numpy array of cluster indices of each group, e.g. [0, 1, 0]
    """
    task_id_idx = dict((k, i) for i, k in enumerate(serving_tasks))
    data = np.zeros((len(serving_tasks), len(serving_tasks)))
    for task_id_i, task_id_j, stressval in outer_stress:
        data[task_id_idx[task_id_i], task_id_idx[task_id_j]] = stressval
    df_tasks = pd.DataFrame(
            data=data, index=serving_tasks, columns=serving_tasks)

    data = np.zeros((len(task_groups), len(task_groups)))
    for gid_i in range(len(task_groups)):
        for gid_j in range(len(task_groups)):
            t = df_tasks.loc[task_groups[gid_i], task_groups[gid_j]]
            ij_stress = t.max(axis=1).mean()
            t = df_tasks.loc[task_groups[gid_j], task_groups[gid_i]]
            ji_stress = t.max(axis=1).mean()
            data[gid_i, gid_j] = (ij_stress + ji_stress) / 2.
    df_groups = pd.DataFrame(data=data)

    affinity = df_groups.values
    affinity = np.exp(-affinity / affinity.max())
    clusters = spectral_clustering(affinity, n_clusters=n_clusters)
    return clusters 
Example #5
Source File: networkclustering.py    From PyPSA with GNU General Public License v3.0 5 votes vote down vote up
def busmap_by_spectral_clustering(network, n_clusters, **kwds):
        lines = network.lines.loc[:,['bus0', 'bus1']].assign(weight=network.lines.num_parallel).set_index(['bus0','bus1'])
        lines.weight+=0.1
        G = nx.Graph()
        G.add_nodes_from(network.buses.index)
        G.add_edges_from((u,v,dict(weight=w)) for (u,v),w in lines.itertuples())
        return pd.Series(list(map(str,sk_spectral_clustering(nx.adjacency_matrix(G), n_clusters, **kwds) + 1)),
                         index=network.buses.index) 
Example #6
Source File: metrics.py    From snfpy with GNU Lesser General Public License v3.0 5 votes vote down vote up
def rank_feature_by_nmi(inputs, W, *, K=20, mu=0.5, n_clusters=None):
    """
    Calculates NMI of each feature in `inputs` with `W`

    Parameters
    ----------
    inputs : list-of-tuple
        Each tuple should contain (1) an (N, M) data array, where N is samples
        M is features, and (2) a string indicating the metric to use to compute
        a distance matrix for the given data. This MUST be one of the options
        available in :py:func:`scipy.spatial.distance.cdist`
    W : (N, N) array_like
        Similarity array generated by :py:func:`snf.compute.snf`
    K : (0, N) int, optional
        Hyperparameter normalization factor for scaling. Default: 20
    mu : (0, 1) float, optional
        Hyperparameter normalization factor for scaling. Default: 0.5
    n_clusters : int, optional
        Number of desired clusters. Default: determined by eigengap (see
        `snf.get_n_clusters()`)

    Returns
    -------
    nmi : list of (M,) np.ndarray
        Normalized mutual information scores for each feature of input arrays
    """

    if n_clusters is None:
        n_clusters = compute.get_n_clusters(W)[0]
    snf_labels = spectral_clustering(W, n_clusters)
    nmi = [np.empty(shape=(d.shape[-1])) for d, m in inputs]
    for ndtype, (dtype, metric) in enumerate(inputs):
        for nfeature, feature in enumerate(np.asarray(dtype).T):
            aff = compute.make_affinity(np.vstack(feature), K=K, mu=mu,
                                        metric=metric)
            aff_labels = spectral_clustering(aff, n_clusters)
            nmi[ndtype][nfeature] = v_measure_score(snf_labels, aff_labels)

    return nmi 
Example #7
Source File: test_cluster.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_spectral_clustering(self):
        N = 50
        m = np.random.random_integers(1, 200, size=(N, N))
        m = (m + m.T) / 2

        df = pdml.ModelFrame(m)
        result = df.cluster.spectral_clustering(random_state=self.random_state)
        expected = cluster.spectral_clustering(m, random_state=self.random_state)

        self.assertIsInstance(result, pdml.ModelSeries)
        tm.assert_index_equal(result.index, df.index)
        tm.assert_numpy_array_equal(result.values, expected) 
Example #8
Source File: LST.py    From python-urbanPlanning with MIT License 5 votes vote down vote up
def __init__(self,LST):
        self.LST=LST
##应用spectral_clustering()聚类。此次实验中,深入分析时未使用该数据,可以自行解读所建立的区域反映LST数据的含义 
Example #9
Source File: test_spectral.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_spectral_amg_mode():
    # Test the amg mode of SpectralClustering
    centers = np.array([
        [0., 0., 0.],
        [10., 10., 10.],
        [20., 20., 20.],
    ])
    X, true_labels = make_blobs(n_samples=100, centers=centers,
                                cluster_std=1., random_state=42)
    D = pairwise_distances(X)  # Distance matrix
    S = np.max(D) - D  # Similarity matrix
    S = sparse.coo_matrix(S)
    try:
        from pyamg import smoothed_aggregation_solver  # noqa

        amg_loaded = True
    except ImportError:
        amg_loaded = False
    if amg_loaded:
        labels = spectral_clustering(S, n_clusters=len(centers),
                                     random_state=0, eigen_solver="amg")
        # We don't care too much that it's good, just that it *worked*.
        # There does have to be some lower limit on the performance though.
        assert_greater(np.mean(labels == true_labels), .3)
    else:
        assert_raises(ValueError, spectral_embedding, S,
                      n_components=len(centers),
                      random_state=0, eigen_solver="amg") 
Example #10
Source File: test_spectral.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_spectral_unknown_mode():
    # Test that SpectralClustering fails with an unknown mode set.
    centers = np.array([
        [0., 0., 0.],
        [10., 10., 10.],
        [20., 20., 20.],
    ])
    X, true_labels = make_blobs(n_samples=100, centers=centers,
                                cluster_std=1., random_state=42)
    D = pairwise_distances(X)  # Distance matrix
    S = np.max(D) - D  # Similarity matrix
    S = sparse.coo_matrix(S)
    assert_raises(ValueError, spectral_clustering, S, n_clusters=2,
                  random_state=0, eigen_solver="<unknown>") 
Example #11
Source File: test_spectral.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_spectral_unknown_assign_labels():
    # Test that SpectralClustering fails with an unknown assign_labels set.
    centers = np.array([
        [0., 0., 0.],
        [10., 10., 10.],
        [20., 20., 20.],
    ])
    X, true_labels = make_blobs(n_samples=100, centers=centers,
                                cluster_std=1., random_state=42)
    D = pairwise_distances(X)  # Distance matrix
    S = np.max(D) - D  # Similarity matrix
    S = sparse.coo_matrix(S)
    assert_raises(ValueError, spectral_clustering, S, n_clusters=2,
                  random_state=0, assign_labels="<unknown>") 
Example #12
Source File: cv.py    From snfpy with GNU Lesser General Public License v3.0 4 votes vote down vote up
def compute_SNF(*data, metric='sqeuclidean', K=20, mu=1, n_clusters=None,
                t=20, n_perms=1000, normalize=True, seed=None):
    """
    Runs a full SNF on `data` and returns cluster affinity scores and labels

    Parameters
    ----------
    *data : (N, M) array_like
        Raw data arrays, where `N` is samples and `M` is features.
    metric : str or list-of-str, optional
        Distance metrics to compute on `data`. Must be one of available metrics
        in ``scipy.spatial.distance.pdist``. If a list is provided for `data` a
        list of equal length may be supplied here. Default: 'sqeuclidean'
    K : int, optional
        Number of neighbors to compare similarity against. Default: 20
    mu : (0, 1) float, optional
        Hyperparameter normalization factor for scaling. Default: 0.5
    n_clusters : int or list-of-int, optional
        Number of clusters to find in combined data. Default: determined by
        eigengap (see `compute.get_n_clusters()`)
    t : int, optional
        Number of iterations to perform information swapping. Default: 20
    n_perms : int, optional
        Number of permutations for calculating z_affinity. Default: 1000
    normalize : bool, optional
        Whether to normalize (zscore) the data before constructing the affinity
        matrix. Each feature is separately normalized. Default: True

    Returns
    -------
    z_affinity : list-of-float
        Z-score of silhouette (affinity) score
    snf_labels : list of (N,) np.ndarray
        Cluster labels for subjects
    """

    rs = check_random_state(seed)

    # make affinity matrices for all inputs and run SNF
    all_aff = compute.make_affinity(*data, metric=metric, K=K, mu=mu,
                                    normalize=normalize)
    snf_aff = compute.snf(*all_aff, K=K, t=t)

    # get estimated number of clusters (if not provided)
    if n_clusters is None:
        n_clusters = [compute.get_n_clusters(snf_aff)[0]]
    elif isinstance(n_clusters, int):
        n_clusters = [n_clusters]

    # perform spectral clustering across all `n_clusters`
    snf_labels = [spectral_clustering(snf_aff, clust, random_state=rs)
                  for clust in n_clusters]

    # get z-affinity as desired
    if n_perms is not None and n_perms > 0:
        z_affinity = [metrics.affinity_zscore(snf_aff, label, n_perms, seed=rs)
                      for label in snf_labels]
        return z_affinity, snf_labels

    return snf_labels 
Example #13
Source File: LST.py    From python-urbanPlanning with MIT License 4 votes vote down vote up
def LSTClustering(self):
        # 参考“Segmenting the picture of greek coins in regions”方法,Author: Gael Varoquaux <gael.varoquaux@normalesup.org>, Brian Cheung
        # License: BSD 3 clause
        orig_coins=self.LST
        # these were introduced in skimage-0.14
        if LooseVersion(skimage.__version__) >= '0.14':
            rescale_params = {'anti_aliasing': False, 'multichannel': False}
        else:
            rescale_params = {}
        smoothened_coins = gaussian_filter(orig_coins, sigma=2)
        rescaled_coins = rescale(smoothened_coins, 0.2, mode="reflect",**rescale_params)        
        # Convert the image into a graph with the value of the gradient on the
        # edges.
        graph = image.img_to_graph(rescaled_coins)        
        # Take a decreasing function of the gradient: an exponential
        # The smaller beta is, the more independent the segmentation is of the
        # actual image. For beta=1, the segmentation is close to a voronoi
        beta = 10
        eps = 1e-6
        graph.data = np.exp(-beta * graph.data / graph.data.std()) + eps        
        # Apply spectral clustering (this step goes much faster if you have pyamg
        # installed)
        N_REGIONS = 200        
        for assign_labels in ('discretize',):
#        for assign_labels in ('kmeans', 'discretize'):
            t0 = time.time()
            labels = spectral_clustering(graph, n_clusters=N_REGIONS,assign_labels=assign_labels, random_state=42)
            t1 = time.time()
            labels = labels.reshape(rescaled_coins.shape)
        
            plt.figure(figsize=(5*3, 5*3))
            plt.imshow(rescaled_coins, cmap=plt.cm.gray)
            for l in range(N_REGIONS):
                plt.contour(labels == l,
                            colors=[plt.cm.nipy_spectral(l / float(N_REGIONS))])
            plt.xticks(())
            plt.yticks(())
            title = 'Spectral clustering: %s, %.2fs' % (assign_labels, (t1 - t0))
            print(title)
            plt.title(title)
        plt.show()

##基于卷积温度梯度变化界定冷区和热区的空间分布结构