Python scipy.cluster.vq.kmeans2() Examples
The following are 23
code examples of scipy.cluster.vq.kmeans2().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.cluster.vq
, or try the search function
.
Example #1
Source File: test_vq.py From Computable with MIT License | 6 votes |
def test_kmeans_lost_cluster(self): """This will cause kmean to have a cluster with no points.""" data = np.fromfile(DATAFILE1, sep=", ") data = data.reshape((200, 2)) initk = np.array([[-1.8127404, -0.67128041], [2.04621601, 0.07401111], [-2.31149087,-0.05160469]]) res = kmeans(data, initk) warn_ctx = WarningManager() warn_ctx.__enter__() try: warnings.simplefilter('ignore', UserWarning) res = kmeans2(data, initk, missing='warn') finally: warn_ctx.__exit__() assert_raises(ClusterError, kmeans2, data, initk, missing='raise')
Example #2
Source File: test_vq.py From Computable with MIT License | 6 votes |
def test_kmeans2_init(self): """Testing that kmeans2 init methods work.""" data = np.fromfile(DATAFILE1, sep=", ") data = data.reshape((200, 2)) kmeans2(data, 3, minit='points') kmeans2(data[:, :1], 3, minit='points') # special case (1-D) # minit='random' can give warnings, filter those warn_ctx = WarningManager() warn_ctx.__enter__() try: warnings.filterwarnings('ignore', message="One of the clusters is empty. Re-run") kmeans2(data, 3, minit='random') kmeans2(data[:, :1], 3, minit='random') # special case (1-D) finally: warn_ctx.__exit__()
Example #3
Source File: clustering_kmeans_alternative.py From practicalDataAnalysisCookbook with GNU General Public License v2.0 | 6 votes |
def findClusters_kmeans(data): ''' Cluster data using k-means ''' # whiten the observations data_w = vq.whiten(data) # create the classifier object kmeans, labels = vq.kmeans2( data_w, k=4, iter=30 ) # fit the data return kmeans, labels # the file name of the dataset
Example #4
Source File: utilities_dbscan.py From pytim with GNU General Public License v3.0 | 6 votes |
def determine_samples(threshold_density, cluster_cut, n_neighbors): if isinstance(threshold_density, type(None)): return 2 if isinstance(threshold_density, (float, int)): min_samples = threshold_density * 4. / 3. * np.pi * cluster_cut**3 elif (threshold_density == 'auto'): modes = 2 centroid, _ = vq.kmeans2( n_neighbors * 1.0, modes, iter=10, check_finite=False) min_samples = np.max(centroid) else: raise ValueError("Wrong value of 'threshold_density' passed\ to do_cluster_analysis_DBSCAN() ") return np.max([min_samples, 2])
Example #5
Source File: neural_doodle.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def kmeans(xs, k): assert xs.ndim == 2 try: from sklearn.cluster import k_means _, labels, _ = k_means(xs.astype('float64'), k) except ImportError: from scipy.cluster.vq import kmeans2 _, labels = kmeans2(xs, k, missing='raise') return labels
Example #6
Source File: pygmmis.py From pygmmis with MIT License | 5 votes |
def initFromKMeans(gmm, data, covar=None, rng=np.random): """Initialization callback from a k-means clustering run. See Algorithm 1 from Bloemer & Bujna (arXiv:1312.5946) NOTE: The result of this call are not deterministic even if rng is set because scipy.cluster.vq.kmeans2 uses its own initialization. Args: gmm: A GMM to be initialized data: numpy array (N,D) to define the range of the component means covar: ignored in this callback rng: numpy.random.RandomState for deterministic behavior Returns: None """ from scipy.cluster.vq import kmeans2 center, label = kmeans2(data, gmm.K) for k in xrange(gmm.K): mask = (label == k) gmm.amp[k] = mask.sum() / len(data) gmm.mean[k,:] = data[mask].mean(axis=0) d_m = data[mask] - gmm.mean[k] # funny way of saying: for each point i, do the outer product # of d_m with its transpose and sum over i gmm.covar[k,:,:] = (d_m[:, :, None] * d_m[:, None, :]).sum(axis=0) / len(data)
Example #7
Source File: pq.py From nanopq with MIT License | 5 votes |
def fit(self, vecs, iter=20, seed=123): """Given training vectors, run k-means for each sub-space and create codewords for each sub-space. This function should be run once first of all. Args: vecs (np.ndarray): Training vectors with shape=(N, D) and dtype=np.float32. iter (int): The number of iteration for k-means seed (int): The seed for random process Returns: object: self """ assert vecs.dtype == np.float32 assert vecs.ndim == 2 N, D = vecs.shape assert self.Ks < N, "the number of training vector should be more than Ks" assert D % self.M == 0, "input dimension must be dividable by M" self.Ds = int(D / self.M) np.random.seed(seed) if self.verbose: print("iter: {}, seed: {}".format(iter, seed)) # [m][ks][ds]: m-th subspace, ks-the codeword, ds-th dim self.codewords = np.zeros((self.M, self.Ks, self.Ds), dtype=np.float32) for m in range(self.M): if self.verbose: print("Training the subspace: {} / {}".format(m, self.M)) vecs_sub = vecs[:, m * self.Ds : (m+1) * self.Ds] self.codewords[m], _ = kmeans2(vecs_sub, self.Ks, iter=iter, minit='points') return self
Example #8
Source File: ssl_exp.py From GGP with Apache License 2.0 | 5 votes |
def __init__(self, data_name, random_seed): self.data_name = data_name.lower() self.random_seed = int(random_seed); np.random.seed(self.random_seed); tf.set_random_seed(self.random_seed) # Load data self.adj_mat, self.node_features, self.x_tr, self.y_tr, self.x_val, self.y_val, self.x_test, self.y_test \ = load_data_ssl(self.data_name) # Init kernel k = SparseGraphPolynomial(self.adj_mat, self.node_features, self.x_tr, degree=3.) k.offset = np.abs(np.random.randn(1) + 5.); k.offset.fixed = False k.variance = 1.; k.variance.fixed = True # Init inducing points ind_points = kmeans2(self.node_features, len(self.x_tr), minit='points')[0] # Init optimizer self.optimizer = tf.train.AdamOptimizer(0.0005) # Init model self.m = GraphSVGP(self.x_tr, self.y_tr, k, GPflow.likelihoods.MultiClass(len(np.unique(self.y_tr))), ind_points, num_latent=len(np.unique(self.y_tr)), minibatch_size=len(self.x_tr), whiten=True, q_diag=False) # Define housekeeping variables self.last_ts = time.time() self.iter = 0; self.check_obj_every = 200 self.log_iter = []; self.log_t = []; self.log_obj = []; self.log_param = None; self.log_opt_state = None; self.param_fp = os.path.join(os.getenv('PWD'), 'ssl_param_files') if not (os.path.isdir(self.param_fp)): os.mkdir(self.param_fp) self.param_fp = os.path.join(self.param_fp, 'SSL-{0}-rs_{1}.p'.format(self.data_name, random_seed)) self.m._compile(self.optimizer) if os.path.isfile(self.param_fp): print 'Param. file already exists! Loading from {0}.'.format(self.param_fp) self.load_snapshot(self.param_fp) else: self.save_snapshot(self.param_fp, update_before_saving=True)
Example #9
Source File: al_exp.py From GGP with Apache License 2.0 | 5 votes |
def setup_model_and_opt(self): n_class = len(np.unique(self.all_y)) x_tr = self.all_x[self.tr_mask]; y_tr = self.all_y[self.tr_mask]; n_inducing_points = len(x_tr) k = SparseGraphPolynomial(self.adj_mat, self.node_features, x_tr, degree=1.) k.offset = 0.; k.offset.fixed = True; k.variance = 1.; k.variance.fixed = True ind_points = kmeans2(self.node_features, n_inducing_points, minit='points')[0] opt = tf.train.AdamOptimizer(0.005) m = GraphSVGP(x_tr, y_tr, k, GPflow.likelihoods.MultiClass(n_class), ind_points, num_latent=n_class, minibatch_size=len(x_tr), whiten=True, q_diag=False) return m, opt
Example #10
Source File: neural_doodle.py From pCVR with Apache License 2.0 | 5 votes |
def kmeans(xs, k): assert xs.ndim == 2 try: from sklearn.cluster import k_means _, labels, _ = k_means(xs.astype('float64'), k) except ImportError: from scipy.cluster.vq import kmeans2 _, labels = kmeans2(xs, k, missing='raise') return labels
Example #11
Source File: neural_doodle.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def kmeans(xs, k): assert xs.ndim == 2 try: from sklearn.cluster import k_means _, labels, _ = k_means(xs.astype('float64'), k) except ImportError: from scipy.cluster.vq import kmeans2 _, labels = kmeans2(xs, k, missing='raise') return labels
Example #12
Source File: neural_doodle.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def kmeans(xs, k): assert xs.ndim == 2 try: from sklearn.cluster import k_means _, labels, _ = k_means(xs.astype('float64'), k) except ImportError: from scipy.cluster.vq import kmeans2 _, labels = kmeans2(xs, k, missing='raise') return labels
Example #13
Source File: neural_doodle.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def kmeans(xs, k): assert xs.ndim == 2 try: from sklearn.cluster import k_means _, labels, _ = k_means(xs.astype('float64'), k) except ImportError: from scipy.cluster.vq import kmeans2 _, labels = kmeans2(xs, k, missing='raise') return labels
Example #14
Source File: neural_doodle.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def kmeans(xs, k): assert xs.ndim == 2 try: from sklearn.cluster import k_means _, labels, _ = k_means(xs.astype('float64'), k) except ImportError: from scipy.cluster.vq import kmeans2 _, labels = kmeans2(xs, k, missing='raise') return labels
Example #15
Source File: neural_doodle.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def kmeans(xs, k): assert xs.ndim == 2 try: from sklearn.cluster import k_means _, labels, _ = k_means(xs.astype('float64'), k) except ImportError: from scipy.cluster.vq import kmeans2 _, labels = kmeans2(xs, k, missing='raise') return labels
Example #16
Source File: neural_doodle.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def kmeans(xs, k): assert xs.ndim == 2 try: from sklearn.cluster import k_means _, labels, _ = k_means(xs.astype('float64'), k) except ImportError: from scipy.cluster.vq import kmeans2 _, labels = kmeans2(xs, k, missing='raise') return labels
Example #17
Source File: cluster_pose.py From Pose2Seg with MIT License | 5 votes |
def cluster_zixi(kpts, cat_num): # kpts: center-normalized (N, 17, 3) datas = np.array(kpts) inds = np.where(datas[:, :, 2] == 0) datas[inds[0], inds[1], 0:2] = 0.5 datas = datas.reshape(len(datas), -1) res = vq.kmeans2(datas, cat_num, minit='points', iter=100) return res
Example #18
Source File: test_vq.py From Computable with MIT License | 5 votes |
def test_kmeans_0k(self): """Regression test for #546: fail when k arg is 0.""" assert_raises(ValueError, kmeans, X, 0) assert_raises(ValueError, kmeans2, X, 0) assert_raises(ValueError, kmeans2, X, np.array([]))
Example #19
Source File: test_vq.py From Computable with MIT License | 5 votes |
def test_kmeans2_rank1_2(self): """Testing simple call to kmeans2 with rank 1 data.""" data = np.fromfile(DATAFILE1, sep=", ") data = data.reshape((200, 2)) data1 = data[:, 0] code1 = kmeans2(data1, 2, iter=1)
Example #20
Source File: test_vq.py From Computable with MIT License | 5 votes |
def test_kmeans2_rank1(self): """Testing simple call to kmeans2 with rank 1 data.""" data = np.fromfile(DATAFILE1, sep=", ") data = data.reshape((200, 2)) data1 = data[:, 0] data2 = data[:, 1] initc = data1[:3] code = initc.copy() code1 = kmeans2(data1, code, iter=1)[0] code2 = kmeans2(data1, code, iter=2)[0]
Example #21
Source File: test_vq.py From Computable with MIT License | 5 votes |
def test_kmeans2_simple(self): """Testing simple call to kmeans2 and its results.""" initc = np.concatenate(([[X[0]], [X[1]], [X[2]]])) code = initc.copy() code1 = kmeans2(X, code, iter=1)[0] code2 = kmeans2(X, code, iter=2)[0] assert_array_almost_equal(code1, CODET1) assert_array_almost_equal(code2, CODET2)
Example #22
Source File: util.py From hdidx with MIT License | 5 votes |
def kmeans(vs, ks, niter): centers, labels = vq.kmeans2(vs, ks, niter) return centers # finding nearest neighbor
Example #23
Source File: bounding.py From dynesty with MIT License | 4 votes |
def bounding_ellipsoids(points, pointvol=0., vol_dec=0.5, vol_check=2.): """ Calculate a set of ellipsoids that bound the collection of points. Parameters ---------- points : `~numpy.ndarray` with shape (npoints, ndim) A set of coordinates. pointvol : float, optional Volume represented by a single point. When provided, used to set a minimum bound on the ellipsoid volume as `npoints * pointvol`. Default is `0.`. vol_dec : float, optional The required fractional reduction in volume after splitting an ellipsoid in order to to accept the split. Default is `0.5`. vol_check : float, optional The factor used to when checking whether the volume of the original bounding ellipsoid is large enough to warrant more trial splits via `ell.vol > vol_check * npoints * pointvol`. Default is `2.0`. Returns ------- mell : :class:`MultiEllipsoid` object The :class:`MultiEllipsoid` object used to bound the collection of points. """ if not HAVE_KMEANS: raise ValueError("scipy.cluster.vq.kmeans2 is required to compute " "ellipsoid decompositions.") # pragma: no cover # Calculate the bounding ellipsoid for the points possibly # enlarged to a minimum volume. ell = bounding_ellipsoid(points, pointvol=pointvol) # Recursively split the bounding ellipsoid until the volume of each # split no longer decreases by a factor of `vol_dec`. ells = _bounding_ellipsoids(points, ell, pointvol=pointvol, vol_dec=vol_dec, vol_check=vol_check) return MultiEllipsoid(ells=ells)