Python scipy.cluster.vq.kmeans2() Examples

The following are 23 code examples of scipy.cluster.vq.kmeans2(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.cluster.vq , or try the search function .
Example #1
Source File: test_vq.py    From Computable with MIT License 6 votes vote down vote up
def test_kmeans_lost_cluster(self):
        """This will cause kmean to have a cluster with no points."""
        data = np.fromfile(DATAFILE1, sep=", ")
        data = data.reshape((200, 2))
        initk = np.array([[-1.8127404, -0.67128041],
                         [2.04621601, 0.07401111],
                         [-2.31149087,-0.05160469]])

        res = kmeans(data, initk)

        warn_ctx = WarningManager()
        warn_ctx.__enter__()
        try:
            warnings.simplefilter('ignore', UserWarning)
            res = kmeans2(data, initk, missing='warn')
        finally:
            warn_ctx.__exit__()

        assert_raises(ClusterError, kmeans2, data, initk, missing='raise') 
Example #2
Source File: test_vq.py    From Computable with MIT License 6 votes vote down vote up
def test_kmeans2_init(self):
        """Testing that kmeans2 init methods work."""
        data = np.fromfile(DATAFILE1, sep=", ")
        data = data.reshape((200, 2))

        kmeans2(data, 3, minit='points')
        kmeans2(data[:, :1], 3, minit='points')  # special case (1-D)

        # minit='random' can give warnings, filter those
        warn_ctx = WarningManager()
        warn_ctx.__enter__()
        try:
            warnings.filterwarnings('ignore',
                        message="One of the clusters is empty. Re-run")
            kmeans2(data, 3, minit='random')
            kmeans2(data[:, :1], 3, minit='random')  # special case (1-D)
        finally:
            warn_ctx.__exit__() 
Example #3
Source File: clustering_kmeans_alternative.py    From practicalDataAnalysisCookbook with GNU General Public License v2.0 6 votes vote down vote up
def findClusters_kmeans(data):
    '''
        Cluster data using k-means
    '''
    # whiten the observations
    data_w = vq.whiten(data)

    # create the classifier object
    kmeans, labels = vq.kmeans2(
        data_w,
        k=4,
        iter=30
    )

    # fit the data
    return kmeans, labels

# the file name of the dataset 
Example #4
Source File: utilities_dbscan.py    From pytim with GNU General Public License v3.0 6 votes vote down vote up
def determine_samples(threshold_density, cluster_cut, n_neighbors):

    if isinstance(threshold_density, type(None)):
        return 2

    if isinstance(threshold_density, (float, int)):
        min_samples = threshold_density * 4. / 3. * np.pi * cluster_cut**3

    elif (threshold_density == 'auto'):
        modes = 2
        centroid, _ = vq.kmeans2(
            n_neighbors * 1.0, modes, iter=10, check_finite=False)
        min_samples = np.max(centroid)

    else:
        raise ValueError("Wrong value of 'threshold_density' passed\
                              to do_cluster_analysis_DBSCAN() ")

    return np.max([min_samples, 2]) 
Example #5
Source File: neural_doodle.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype('float64'), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels 
Example #6
Source File: pygmmis.py    From pygmmis with MIT License 5 votes vote down vote up
def initFromKMeans(gmm, data, covar=None, rng=np.random):
    """Initialization callback from a k-means clustering run.

    See Algorithm 1 from Bloemer & Bujna (arXiv:1312.5946)
    NOTE: The result of this call are not deterministic even if rng is set
    because scipy.cluster.vq.kmeans2 uses its own initialization.

    Args:
        gmm: A GMM to be initialized
        data: numpy array (N,D) to define the range of the component means
        covar: ignored in this callback
        rng: numpy.random.RandomState for deterministic behavior

    Returns:
        None
    """
    from scipy.cluster.vq import kmeans2
    center, label = kmeans2(data, gmm.K)
    for k in xrange(gmm.K):
        mask = (label == k)
        gmm.amp[k] = mask.sum() / len(data)
        gmm.mean[k,:] = data[mask].mean(axis=0)
        d_m = data[mask] - gmm.mean[k]
        # funny way of saying: for each point i, do the outer product
        # of d_m with its transpose and sum over i
        gmm.covar[k,:,:] = (d_m[:, :, None] * d_m[:, None, :]).sum(axis=0) / len(data) 
Example #7
Source File: pq.py    From nanopq with MIT License 5 votes vote down vote up
def fit(self, vecs, iter=20, seed=123):
        """Given training vectors, run k-means for each sub-space and create
        codewords for each sub-space.

        This function should be run once first of all.

        Args:
            vecs (np.ndarray): Training vectors with shape=(N, D) and dtype=np.float32.
            iter (int): The number of iteration for k-means
            seed (int): The seed for random process

        Returns:
            object: self

        """
        assert vecs.dtype == np.float32
        assert vecs.ndim == 2
        N, D = vecs.shape
        assert self.Ks < N, "the number of training vector should be more than Ks"
        assert D % self.M == 0, "input dimension must be dividable by M"
        self.Ds = int(D / self.M)

        np.random.seed(seed)
        if self.verbose:
            print("iter: {}, seed: {}".format(iter, seed))

        # [m][ks][ds]: m-th subspace, ks-the codeword, ds-th dim
        self.codewords = np.zeros((self.M, self.Ks, self.Ds), dtype=np.float32)
        for m in range(self.M):
            if self.verbose:
                print("Training the subspace: {} / {}".format(m, self.M))
            vecs_sub = vecs[:, m * self.Ds : (m+1) * self.Ds]
            self.codewords[m], _ = kmeans2(vecs_sub, self.Ks, iter=iter, minit='points')

        return self 
Example #8
Source File: ssl_exp.py    From GGP with Apache License 2.0 5 votes vote down vote up
def __init__(self, data_name, random_seed):
        self.data_name = data_name.lower()
        self.random_seed = int(random_seed); np.random.seed(self.random_seed); tf.set_random_seed(self.random_seed)
        # Load data
        self.adj_mat, self.node_features, self.x_tr, self.y_tr, self.x_val, self.y_val, self.x_test, self.y_test \
            = load_data_ssl(self.data_name)
        # Init kernel
        k = SparseGraphPolynomial(self.adj_mat, self.node_features, self.x_tr, degree=3.)
        k.offset = np.abs(np.random.randn(1) + 5.); k.offset.fixed = False
        k.variance = 1.; k.variance.fixed = True
        # Init inducing points
        ind_points = kmeans2(self.node_features, len(self.x_tr), minit='points')[0]
        # Init optimizer
        self.optimizer = tf.train.AdamOptimizer(0.0005)
        # Init model
        self.m = GraphSVGP(self.x_tr, self.y_tr, k, GPflow.likelihoods.MultiClass(len(np.unique(self.y_tr))), ind_points,
                      num_latent=len(np.unique(self.y_tr)), minibatch_size=len(self.x_tr), whiten=True, q_diag=False)
        # Define housekeeping variables
        self.last_ts = time.time()
        self.iter = 0; self.check_obj_every = 200
        self.log_iter = []; self.log_t = []; self.log_obj = []; self.log_param = None; self.log_opt_state = None;
        self.param_fp = os.path.join(os.getenv('PWD'), 'ssl_param_files')
        if not (os.path.isdir(self.param_fp)):
            os.mkdir(self.param_fp)
        self.param_fp = os.path.join(self.param_fp, 'SSL-{0}-rs_{1}.p'.format(self.data_name, random_seed))
        self.m._compile(self.optimizer)
        if os.path.isfile(self.param_fp):
            print 'Param. file already exists! Loading from {0}.'.format(self.param_fp)
            self.load_snapshot(self.param_fp)
        else:
            self.save_snapshot(self.param_fp, update_before_saving=True) 
Example #9
Source File: al_exp.py    From GGP with Apache License 2.0 5 votes vote down vote up
def setup_model_and_opt(self):
        n_class = len(np.unique(self.all_y))
        x_tr = self.all_x[self.tr_mask]; y_tr = self.all_y[self.tr_mask]; n_inducing_points = len(x_tr)
        k = SparseGraphPolynomial(self.adj_mat, self.node_features, x_tr, degree=1.)
        k.offset = 0.; k.offset.fixed = True; k.variance = 1.; k.variance.fixed = True
        ind_points = kmeans2(self.node_features, n_inducing_points, minit='points')[0]
        opt = tf.train.AdamOptimizer(0.005)
        m = GraphSVGP(x_tr, y_tr, k, GPflow.likelihoods.MultiClass(n_class), ind_points,
                      num_latent=n_class, minibatch_size=len(x_tr), whiten=True, q_diag=False)
        return m, opt 
Example #10
Source File: neural_doodle.py    From pCVR with Apache License 2.0 5 votes vote down vote up
def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype('float64'), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels 
Example #11
Source File: neural_doodle.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype('float64'), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels 
Example #12
Source File: neural_doodle.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype('float64'), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels 
Example #13
Source File: neural_doodle.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype('float64'), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels 
Example #14
Source File: neural_doodle.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype('float64'), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels 
Example #15
Source File: neural_doodle.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype('float64'), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels 
Example #16
Source File: neural_doodle.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype('float64'), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels 
Example #17
Source File: cluster_pose.py    From Pose2Seg with MIT License 5 votes vote down vote up
def cluster_zixi(kpts, cat_num):
    # kpts: center-normalized (N, 17, 3)    
    datas = np.array(kpts)
    inds = np.where(datas[:, :, 2] == 0)
    datas[inds[0], inds[1], 0:2] = 0.5
    
    datas = datas.reshape(len(datas), -1)
    res = vq.kmeans2(datas, cat_num, minit='points', iter=100)
    return res 
Example #18
Source File: test_vq.py    From Computable with MIT License 5 votes vote down vote up
def test_kmeans_0k(self):
        """Regression test for #546: fail when k arg is 0."""
        assert_raises(ValueError, kmeans, X, 0)
        assert_raises(ValueError, kmeans2, X, 0)
        assert_raises(ValueError, kmeans2, X, np.array([])) 
Example #19
Source File: test_vq.py    From Computable with MIT License 5 votes vote down vote up
def test_kmeans2_rank1_2(self):
        """Testing simple call to kmeans2 with rank 1 data."""
        data = np.fromfile(DATAFILE1, sep=", ")
        data = data.reshape((200, 2))
        data1 = data[:, 0]

        code1 = kmeans2(data1, 2, iter=1) 
Example #20
Source File: test_vq.py    From Computable with MIT License 5 votes vote down vote up
def test_kmeans2_rank1(self):
        """Testing simple call to kmeans2 with rank 1 data."""
        data = np.fromfile(DATAFILE1, sep=", ")
        data = data.reshape((200, 2))
        data1 = data[:, 0]
        data2 = data[:, 1]

        initc = data1[:3]
        code = initc.copy()
        code1 = kmeans2(data1, code, iter=1)[0]
        code2 = kmeans2(data1, code, iter=2)[0] 
Example #21
Source File: test_vq.py    From Computable with MIT License 5 votes vote down vote up
def test_kmeans2_simple(self):
        """Testing simple call to kmeans2 and its results."""
        initc = np.concatenate(([[X[0]], [X[1]], [X[2]]]))
        code = initc.copy()
        code1 = kmeans2(X, code, iter=1)[0]
        code2 = kmeans2(X, code, iter=2)[0]

        assert_array_almost_equal(code1, CODET1)
        assert_array_almost_equal(code2, CODET2) 
Example #22
Source File: util.py    From hdidx with MIT License 5 votes vote down vote up
def kmeans(vs, ks, niter):
        centers, labels = vq.kmeans2(vs, ks, niter)
        return centers


# finding nearest neighbor 
Example #23
Source File: bounding.py    From dynesty with MIT License 4 votes vote down vote up
def bounding_ellipsoids(points, pointvol=0., vol_dec=0.5, vol_check=2.):
    """
    Calculate a set of ellipsoids that bound the collection of points.

    Parameters
    ----------
    points : `~numpy.ndarray` with shape (npoints, ndim)
        A set of coordinates.

    pointvol : float, optional
        Volume represented by a single point. When provided,
        used to set a minimum bound on the ellipsoid volume
        as `npoints * pointvol`. Default is `0.`.

    vol_dec : float, optional
        The required fractional reduction in volume after splitting an
        ellipsoid in order to to accept the split. Default is `0.5`.

    vol_check : float, optional
        The factor used to when checking whether the volume of the
        original bounding ellipsoid is large enough to warrant more
        trial splits via `ell.vol > vol_check * npoints * pointvol`.
        Default is `2.0`.

    Returns
    -------
    mell : :class:`MultiEllipsoid` object
        The :class:`MultiEllipsoid` object used to bound the
        collection of points.

    """

    if not HAVE_KMEANS:
        raise ValueError("scipy.cluster.vq.kmeans2 is required to compute "
                         "ellipsoid decompositions.")  # pragma: no cover

    # Calculate the bounding ellipsoid for the points possibly
    # enlarged to a minimum volume.
    ell = bounding_ellipsoid(points, pointvol=pointvol)

    # Recursively split the bounding ellipsoid until the volume of each
    # split no longer decreases by a factor of `vol_dec`.
    ells = _bounding_ellipsoids(points, ell, pointvol=pointvol,
                                vol_dec=vol_dec, vol_check=vol_check)

    return MultiEllipsoid(ells=ells)