Python scipy.cluster.vq.kmeans2() Examples

The following are code examples for showing how to use scipy.cluster.vq.kmeans2(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: pytim   Author: Marcello-Sega   File: utilities_dbscan.py    GNU General Public License v3.0 6 votes vote down vote up
def determine_samples(threshold_density, cluster_cut, n_neighbors):

    if isinstance(threshold_density, type(None)):
        return 2

    if isinstance(threshold_density, (float, int)):
        min_samples = threshold_density * 4. / 3. * np.pi * cluster_cut**3

    elif (threshold_density == 'auto'):
        modes = 2
        centroid, _ = vq.kmeans2(
            n_neighbors * 1.0, modes, iter=10, check_finite=False)
        min_samples = np.max(centroid)

    else:
        raise ValueError("Wrong value of 'threshold_density' passed\
                              to do_cluster_analysis_DBSCAN() ")

    return np.max([min_samples, 2]) 
Example 2
Project: Fractalis   Author: LCSB-BioCore   File: cluster.py    Apache License 2.0 6 votes vote down vote up
def _kmeans(self, df: pd.DataFrame, n_centroids) -> Tuple[List, List]:
        names = list(df.index)
        values = df.as_matrix().astype('float')
        cluster = list(kmeans2(values, k=n_centroids, minit='points')[1])
        cluster_count = Counter(cluster)
        # sort elements by their cluster size
        sorted_cluster = sorted(zip(names, cluster),
                                key=lambda x: (cluster_count[x[1]], x[1]),
                                reverse=True)
        names = [x[0] for x in sorted_cluster]
        cluster = [x[1] for x in sorted_cluster]
        # relabel cluster, with the biggest cluster being 0
        c = 0
        relabeled_cluster = []
        for i, v in enumerate(cluster):
            if i > 0 and cluster[i] != cluster[i-1]:
                c += 1
            relabeled_cluster.append(c)
        cluster = relabeled_cluster
        return names, cluster 
Example 3
Project: Computable   Author: ktraunmueller   File: test_vq.py    MIT License 6 votes vote down vote up
def test_kmeans_lost_cluster(self):
        """This will cause kmean to have a cluster with no points."""
        data = np.fromfile(DATAFILE1, sep=", ")
        data = data.reshape((200, 2))
        initk = np.array([[-1.8127404, -0.67128041],
                         [2.04621601, 0.07401111],
                         [-2.31149087,-0.05160469]])

        res = kmeans(data, initk)

        warn_ctx = WarningManager()
        warn_ctx.__enter__()
        try:
            warnings.simplefilter('ignore', UserWarning)
            res = kmeans2(data, initk, missing='warn')
        finally:
            warn_ctx.__exit__()

        assert_raises(ClusterError, kmeans2, data, initk, missing='raise') 
Example 4
Project: Computable   Author: ktraunmueller   File: test_vq.py    MIT License 6 votes vote down vote up
def test_kmeans2_init(self):
        """Testing that kmeans2 init methods work."""
        data = np.fromfile(DATAFILE1, sep=", ")
        data = data.reshape((200, 2))

        kmeans2(data, 3, minit='points')
        kmeans2(data[:, :1], 3, minit='points')  # special case (1-D)

        # minit='random' can give warnings, filter those
        warn_ctx = WarningManager()
        warn_ctx.__enter__()
        try:
            warnings.filterwarnings('ignore',
                        message="One of the clusters is empty. Re-run")
            kmeans2(data, 3, minit='random')
            kmeans2(data[:, :1], 3, minit='random')  # special case (1-D)
        finally:
            warn_ctx.__exit__() 
Example 5
Project: practicalDataAnalysisCookbook   Author: drabastomek   File: clustering_kmeans_alternative.py    GNU General Public License v2.0 6 votes vote down vote up
def findClusters_kmeans(data):
    '''
        Cluster data using k-means
    '''
    # whiten the observations
    data_w = vq.whiten(data)

    # create the classifier object
    kmeans, labels = vq.kmeans2(
        data_w,
        k=4,
        iter=30
    )

    # fit the data
    return kmeans, labels

# the file name of the dataset 
Example 6
Project: gptorch   Author: cics-nd   File: util.py    MIT License 6 votes vote down vote up
def kmeans_centers(x: np.ndarray, k: int, perturb_if_fail: bool=False) -> \
        np.ndarray:
    """
    Use k-means clustering and find the centers of the clusters.
    :param x: The data
    :param k: Number of clusters
    :param perturb_if_fail: Move the points randomly in case of a numpy 
        LinAlgError.
    :return: the centers
    """
    try:
        return kmeans2(x, k)[0]
    except np.linalg.LinAlgError:
        x_scale = x.std(axis=0)
        x_perturbed = x + 1.0e-4 * x_scale * np.random.randn(*x.shape)
        return kmeans2(x_perturbed, k)[0] 
Example 7
Project: edge-server-placement   Author: cbozi   File: algorithms.py    Apache License 2.0 6 votes vote down vote up
def place_server(self, base_station_num, edge_server_num):
        logging.info("{0}:Start running k-means with N={1}, K={2}".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                                                                          base_station_num, edge_server_num))
        # init data as ndarray
        base_stations = self.base_stations[:base_station_num]
        coordinates = list(map(lambda x: (x.latitude, x.longitude), base_stations))
        data = np.array(coordinates)
        k = edge_server_num

        # k-means
        centroid, label = vq.kmeans2(data, k, iter=100)

        # process result
        edge_servers = [EdgeServer(i, row[0], row[1]) for i, row in enumerate(centroid)]
        for bs, es in enumerate(label):
            edge_servers[es].assigned_base_stations.append(base_stations[bs])
            edge_servers[es].workload += base_stations[bs].workload

        self.edge_servers = list(filter(lambda x: x.workload != 0, edge_servers))
        logging.info("{0}:End running k-means".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) 
Example 8
Project: orth_decoupled_var_gps   Author: hughsalimbeni   File: classification.py    Apache License 2.0 6 votes vote down vote up
def init_model(self, Model, X, Y):
        Dx = X.shape[1]
        kern = Matern52(Dx, lengthscales=SETTINGS.lengthscales * Dx ** 0.5)
        self.K = len(list(set(Y.flatten().astype(int))))
        if self.K == 2:
            lik = Bernoulli()
        else:
            lik = MultiClass(self.K)

        gamma = kmeans2(X, self.M_gamma, minit='points')[0] if self.M_gamma > 0 else np.empty((0, Dx))
        beta = kmeans2(X, self.M_beta, minit='points')[0]

        gamma_minibatch_size = SETTINGS.gamma_minibatch_size if self.M_gamma > 0 else None

        self.model = Model(X, Y, kern, lik, gamma, beta,
                           minibatch_size=SETTINGS.minibatch_size,
                           gamma_minibatch_size=gamma_minibatch_size)
        self.sess = self.model.enquire_session() 
Example 9
Project: senior-design   Author: james-tate   File: test_vq.py    GNU General Public License v2.0 6 votes vote down vote up
def test_kmeans_lost_cluster(self):
        """This will cause kmean to have a cluster with no points."""
        data = np.fromfile(open(DATAFILE1), sep = ", ")
        data = data.reshape((200, 2))
        initk = np.array([[-1.8127404, -0.67128041],
                         [ 2.04621601, 0.07401111],
                         [-2.31149087,-0.05160469]])

        res = kmeans(data, initk)
        warnings.simplefilter('ignore', UserWarning)
        try:
            res = kmeans2(data, initk, missing = 'warn')
        finally:
            warnings.simplefilter('default', UserWarning)

        try :
            res = kmeans2(data, initk, missing = 'raise')
            raise AssertionError("Exception not raised ! Should not happen")
        except ClusterError, e:
            pass 
Example 10
Project: senior-design   Author: james-tate   File: test_vq.py    GNU General Public License v2.0 6 votes vote down vote up
def test_kmeans_0k(self):
        """Regression test for #546: fail when k arg is 0."""
        try:
            kmeans(X, 0)
            raise AssertionError("kmeans with 0 clusters should fail.")
        except ValueError:
            pass

        try:
            kmeans2(X, 0)
            raise AssertionError("kmeans2 with 0 clusters should fail.")
        except ValueError:
            pass

        try:
            kmeans2(X, np.array([]))
            raise AssertionError("kmeans2 with 0 clusters should fail.")
        except ValueError:
            pass 
Example 11
Project: ddp   Author: nutszebra   File: vlad.py    MIT License 5 votes vote down vote up
def kMeansByScipy(arr, k, threshold=1.0e-05):
  centroids, clusterid = kmeans2(arr, k=k, thresh=threshold, minit='points')
  return [centroids, clusterid] 
Example 12
Project: Computable   Author: ktraunmueller   File: test_vq.py    MIT License 5 votes vote down vote up
def test_kmeans2_simple(self):
        """Testing simple call to kmeans2 and its results."""
        initc = np.concatenate(([[X[0]], [X[1]], [X[2]]]))
        code = initc.copy()
        code1 = kmeans2(X, code, iter=1)[0]
        code2 = kmeans2(X, code, iter=2)[0]

        assert_array_almost_equal(code1, CODET1)
        assert_array_almost_equal(code2, CODET2) 
Example 13
Project: Computable   Author: ktraunmueller   File: test_vq.py    MIT License 5 votes vote down vote up
def test_kmeans2_rank1(self):
        """Testing simple call to kmeans2 with rank 1 data."""
        data = np.fromfile(DATAFILE1, sep=", ")
        data = data.reshape((200, 2))
        data1 = data[:, 0]
        data2 = data[:, 1]

        initc = data1[:3]
        code = initc.copy()
        code1 = kmeans2(data1, code, iter=1)[0]
        code2 = kmeans2(data1, code, iter=2)[0] 
Example 14
Project: Computable   Author: ktraunmueller   File: test_vq.py    MIT License 5 votes vote down vote up
def test_kmeans2_rank1_2(self):
        """Testing simple call to kmeans2 with rank 1 data."""
        data = np.fromfile(DATAFILE1, sep=", ")
        data = data.reshape((200, 2))
        data1 = data[:, 0]

        code1 = kmeans2(data1, 2, iter=1) 
Example 15
Project: Computable   Author: ktraunmueller   File: test_vq.py    MIT License 5 votes vote down vote up
def test_kmeans_0k(self):
        """Regression test for #546: fail when k arg is 0."""
        assert_raises(ValueError, kmeans, X, 0)
        assert_raises(ValueError, kmeans2, X, 0)
        assert_raises(ValueError, kmeans2, X, np.array([])) 
Example 16
Project: TicTacToe   Author: Neural-Network   File: kmeans.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def kmeanspp(Y, k):
    return kmeans2(Y, kinit(Y, k), minit='points') 
Example 17
Project: workspace_2017   Author: nwiizo   File: neural_doodle.py    MIT License 5 votes vote down vote up
def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype("float64"), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels 
Example 18
Project: applications   Author: geomstats   File: neural_doodle.py    MIT License 5 votes vote down vote up
def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype('float64'), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels 
Example 19
Project: orth_decoupled_var_gps   Author: hughsalimbeni   File: regression.py    Apache License 2.0 5 votes vote down vote up
def init_model(self, Model, X, Y):
        Dx = X.shape[1]
        kern = Matern52(Dx, lengthscales=SETTINGS.lengthscales * Dx ** 0.5)
        lik = Gaussian()
        lik.variance = SETTINGS.likelihood_variance

        gamma = kmeans2(X, self.M_gamma, minit='points')[0] if self.M_gamma > 0 else np.empty((0, Dx))
        beta = kmeans2(X, self.M_beta, minit='points')[0]

        gamma_minibatch_size = SETTINGS.gamma_minibatch_size if self.M_gamma>0 else None

        self.model = Model(X, Y, kern, lik, gamma, beta,
                           minibatch_size=SETTINGS.minibatch_size,
                           gamma_minibatch_size=gamma_minibatch_size)
        self.sess = self.model.enquire_session() 
Example 20
Project: football-scorelines   Author: pwalch   File: experiment_clustering.py    GNU General Public License v3.0 5 votes vote down vote up
def findCentroids(obsList, centroidCount):
    standardobsList = numpy.array(obsList)
    # Choose k random observations
    centroidList, labelList = clustering.kmeans2(data = standardobsList, k = centroidCount, minit = 'points')

    return centroidList, labelList 
Example 21
Project: pybrain2   Author: pybrain2   File: kmeans.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def kmeanspp(Y, k):
    return kmeans2(Y, kinit(Y, k), minit='points') 
Example 22
Project: pCVR   Author: xjtushilei   File: neural_doodle.py    Apache License 2.0 5 votes vote down vote up
def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype('float64'), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels 
Example 23
Project: specimen-vision   Author: jrdurrant   File: color_analysis.py    GNU General Public License v2.0 5 votes vote down vote up
def dominant_colors(image, num_colors, mask=None):
    """Reduce image colors to a representative set of a given size.

    Args:
        image (ndarray): RGB image of shape n x m x 3.
        num_colors (int): Number of colors to reduce to.
        mask (array_like, optional): Foreground mask. Defaults to None.

    Returns:
        list: The list of Color objects representing the most dominant colors in the image.

    """
    image = rgb2lab(image / 255.0)

    if mask is not None:
        data = image[mask > 250]
    else:
        data = np.reshape(image, (-1, 3))

    # kmeans algorithm has inherent randomness - result will not be exactly the same
    # every time. Fairly consistent with >= 30 iterations
    centroids, labels = kmeans2(data, num_colors, iter=30)
    counts = np.histogram(labels, bins=range(0, num_colors + 1), normed=True)[0]

    centroids_RGB = lab2rgb(centroids.reshape(-1, 1, 3))[:, 0, :] * 255.0
    colors = [Color(centroid, count) for centroid, count in zip(centroids_RGB, counts)]
    colors.sort(key=lambda color: np.mean(color.RGB))

    return colors 
Example 24
Project: GGP   Author: yincheng   File: al_exp.py    Apache License 2.0 5 votes vote down vote up
def setup_model_and_opt(self):
        n_class = len(np.unique(self.all_y))
        x_tr = self.all_x[self.tr_mask]; y_tr = self.all_y[self.tr_mask]; n_inducing_points = len(x_tr)
        k = SparseGraphPolynomial(self.adj_mat, self.node_features, x_tr, degree=1.)
        k.offset = 0.; k.offset.fixed = True; k.variance = 1.; k.variance.fixed = True
        ind_points = kmeans2(self.node_features, n_inducing_points, minit='points')[0]
        opt = tf.train.AdamOptimizer(0.005)
        m = GraphSVGP(x_tr, y_tr, k, GPflow.likelihoods.MultiClass(n_class), ind_points,
                      num_latent=n_class, minibatch_size=len(x_tr), whiten=True, q_diag=False)
        return m, opt 
Example 25
Project: GGP   Author: yincheng   File: ssl_exp.py    Apache License 2.0 5 votes vote down vote up
def __init__(self, data_name, random_seed):
        self.data_name = data_name.lower()
        self.random_seed = int(random_seed); np.random.seed(self.random_seed); tf.set_random_seed(self.random_seed)
        # Load data
        self.adj_mat, self.node_features, self.x_tr, self.y_tr, self.x_val, self.y_val, self.x_test, self.y_test \
            = load_data_ssl(self.data_name)
        # Init kernel
        k = SparseGraphPolynomial(self.adj_mat, self.node_features, self.x_tr, degree=3.)
        k.offset = np.abs(np.random.randn(1) + 5.); k.offset.fixed = False
        k.variance = 1.; k.variance.fixed = True
        # Init inducing points
        ind_points = kmeans2(self.node_features, len(self.x_tr), minit='points')[0]
        # Init optimizer
        self.optimizer = tf.train.AdamOptimizer(0.0005)
        # Init model
        self.m = GraphSVGP(self.x_tr, self.y_tr, k, GPflow.likelihoods.MultiClass(len(np.unique(self.y_tr))), ind_points,
                      num_latent=len(np.unique(self.y_tr)), minibatch_size=len(self.x_tr), whiten=True, q_diag=False)
        # Define housekeeping variables
        self.last_ts = time.time()
        self.iter = 0; self.check_obj_every = 200
        self.log_iter = []; self.log_t = []; self.log_obj = []; self.log_param = None; self.log_opt_state = None;
        self.param_fp = os.path.join(os.getenv('PWD'), 'ssl_param_files')
        if not (os.path.isdir(self.param_fp)):
            os.mkdir(self.param_fp)
        self.param_fp = os.path.join(self.param_fp, 'SSL-{0}-rs_{1}.p'.format(self.data_name, random_seed))
        self.m._compile(self.optimizer)
        if os.path.isfile(self.param_fp):
            print 'Param. file already exists! Loading from {0}.'.format(self.param_fp)
            self.load_snapshot(self.param_fp)
        else:
            self.save_snapshot(self.param_fp, update_before_saving=True) 
Example 26
Project: senior-design   Author: james-tate   File: test_vq.py    GNU General Public License v2.0 5 votes vote down vote up
def test_kmeans2_simple(self):
        """Testing simple call to kmeans2 and its results."""
        initc = np.concatenate(([[X[0]], [X[1]], [X[2]]]))
        code = initc.copy()
        code1 = kmeans2(X, code, iter = 1)[0]
        code2 = kmeans2(X, code, iter = 2)[0]

        assert_array_almost_equal(code1, CODET1)
        assert_array_almost_equal(code2, CODET2) 
Example 27
Project: senior-design   Author: james-tate   File: test_vq.py    GNU General Public License v2.0 5 votes vote down vote up
def test_kmeans2_rank1(self):
        """Testing simple call to kmeans2 with rank 1 data."""
        data = np.fromfile(open(DATAFILE1), sep = ", ")
        data = data.reshape((200, 2))
        data1 = data[:, 0]
        data2 = data[:, 1]

        initc = data1[:3]
        code = initc.copy()
        code1 = kmeans2(data1, code, iter = 1)[0]
        code2 = kmeans2(data1, code, iter = 2)[0] 
Example 28
Project: senior-design   Author: james-tate   File: test_vq.py    GNU General Public License v2.0 5 votes vote down vote up
def test_kmeans2_rank1_2(self):
        """Testing simple call to kmeans2 with rank 1 data."""
        data = np.fromfile(open(DATAFILE1), sep = ", ")
        data = data.reshape((200, 2))
        data1 = data[:, 0]

        code1 = kmeans2(data1, 2, iter = 1) 
Example 29
Project: senior-design   Author: james-tate   File: test_vq.py    GNU General Public License v2.0 5 votes vote down vote up
def test_kmeans2_init(self):
        """Testing that kmeans2 init methods work."""
        data = np.fromfile(open(DATAFILE1), sep = ", ")
        data = data.reshape((200, 2))

        kmeans2(data, 3, minit = 'random')
        kmeans2(data, 3, minit = 'points')

        # Check special case 1d
        data = data[:, :1]
        kmeans2(data, 3, minit = 'random')
        kmeans2(data, 3, minit = 'points') 
Example 30
Project: DGPs_with_IWVI   Author: hughsalimbeni   File: test_latent_var_layer.py    Apache License 2.0 4 votes vote down vote up
def test_pos_def():
    # N = 10
    # Dx = 3
    # Dy = 1
    # K = 5
    from bayesian_benchmarks.data import get_regression_data
    data = get_regression_data('wilson_3droad')
    X = data.X_train
    Y = data.Y_train
    M = 128
    from scipy.cluster.vq import kmeans2
    Z = kmeans2(X, M, minit='points')[0]


    N, Dx = X.shape
    Dy = Y.shape[1]
    K = 1

    lik = gpflow.likelihoods.Gaussian(variance=0.1)
    kern = gpflow.kernels.RBF(Dx, lengthscales=0.1)

    X = np.random.randn(N, Dx)
    Y = np.random.randn(N, Dy)

    layers_vi = [LatentVariableLayer(Dx, XY_dim=Dx+Dy),
                 GPLayer(kern, Z, Dy)]

    layers_iw = [LatentVariableLayer(Dx, XY_dim=Dx+Dy),
                 GPLayer(kern, Z, Dy)]

    m_dgp_vi = DGP_VI(X, Y, layers_vi, lik, num_samples=K, minibatch_size=512)
    m_dgp_iw = DGP_IWVI(X, Y, layers_iw, lik, num_samples=K, minibatch_size=512)

    for model in [m_dgp_vi, m_dgp_iw]:

        model.layers[-1].q_mu.set_trainable(False)
        model.layers[-1].q_sqrt.set_trainable(False)

        optimizer_adam = gpflow.train.AdamOptimizer(0.005)
        adam_op = optimizer_adam.make_optimize_tensor(model)

        optimizer_ng = gpflow.train.NatGradOptimizer(gamma=0.01)
        ng_op = optimizer_ng.make_optimize_tensor(model,
                                                  var_list=[[model.layers[-1].q_mu,
                                                             model.layers[-1].q_sqrt]])
        sess = model.enquire_session()
        for _ in range(10):
            print('{} {:.2f}'.format(_, sess.run(model.likelihood_tensor)))
            sess.run(ng_op)
            sess.run(adam_op)


    L_vi = [m_dgp_vi.compute_log_likelihood() for _ in range(100)]
    L_iw = [m_dgp_iw.compute_log_likelihood() for _ in range(100)]

    L_vi = np.average(L_vi)
    L_iw = np.average(L_iw)

    print(L_vi, L_iw) 
Example 31
Project: dynesty   Author: joshspeagle   File: bounding.py    MIT License 4 votes vote down vote up
def bounding_ellipsoids(points, pointvol=0., vol_dec=0.5, vol_check=2.):
    """
    Calculate a set of ellipsoids that bound the collection of points.

    Parameters
    ----------
    points : `~numpy.ndarray` with shape (npoints, ndim)
        A set of coordinates.

    pointvol : float, optional
        Volume represented by a single point. When provided,
        used to set a minimum bound on the ellipsoid volume
        as `npoints * pointvol`. Default is `0.`.

    vol_dec : float, optional
        The required fractional reduction in volume after splitting an
        ellipsoid in order to to accept the split. Default is `0.5`.

    vol_check : float, optional
        The factor used to when checking whether the volume of the
        original bounding ellipsoid is large enough to warrant more
        trial splits via `ell.vol > vol_check * npoints * pointvol`.
        Default is `2.0`.

    Returns
    -------
    mell : :class:`MultiEllipsoid` object
        The :class:`MultiEllipsoid` object used to bound the
        collection of points.

    """

    if not HAVE_KMEANS:
        raise ValueError("scipy.cluster.vq.kmeans2 is required to compute "
                         "ellipsoid decompositions.")  # pragma: no cover

    # Calculate the bounding ellipsoid for the points possibly
    # enlarged to a minimum volume.
    ell = bounding_ellipsoid(points, pointvol=pointvol)

    # Recursively split the bounding ellipsoid until the volume of each
    # split no longer decreases by a factor of `vol_dec`.
    ells = _bounding_ellipsoids(points, ell, pointvol=pointvol,
                                vol_dec=vol_dec, vol_check=vol_check)

    return MultiEllipsoid(ells=ells)