Python scipy.cluster.vq.kmeans() Examples
The following are 20
code examples of scipy.cluster.vq.kmeans().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.cluster.vq
, or try the search function
.
Example #1
Source File: knn.py From deepnl with GNU General Public License v3.0 | 7 votes |
def Kmeans(file, vocabfile, k): np.random.seed((1000,2000)) whitened = whiten(embeddings) codebook, distortion = kmeans(whitened, k) clusters = [l2_nearest(embeddings, c, representatives+1) for c in codebook] # output print(len(codebook), distortion) for centroid in codebook: print(' '.join([str(x) for x in centroid])) print() for cluster in clusters: print(' '.join([id_word[i] for i, d in cluster]).encode('utf-8')) print() # assign clusters to words codes, _ = vq(embeddings, codebook) for w, c in zip(word_id.keys(), codes): print(w, c)
Example #2
Source File: cmag.py From neuropythy with GNU Affero General Public License v3.0 | 6 votes |
def sigma_bin_walls(sigma, bins): import scipy, scipy.cluster, scipy.cluster.vq as vq std = np.std(sigma) if np.isclose(std, 0): return pimms.imm_array([0, np.max(sigma)]) cl = sorted(std * vq.kmeans(sigma/std, bins)[0]) cl = np.mean([cl[:-1],cl[1:]], axis=0) return pimms.imm_array(np.concatenate(([0], cl, [np.max(sigma)])))
Example #3
Source File: test_vq.py From Computable with MIT License | 6 votes |
def test_large_features(self): # Generate a data set with large values, and run kmeans on it to # (regression for 1077). d = 300 n = 100 m1 = np.random.randn(d) m2 = np.random.randn(d) x = 10000 * np.random.randn(n, d) - 20000 * m1 y = 10000 * np.random.randn(n, d) + 20000 * m2 data = np.empty((x.shape[0] + y.shape[0], d), np.double) data[:x.shape[0]] = x data[x.shape[0]:] = y kmeans(data, 2)
Example #4
Source File: test_vq.py From Computable with MIT License | 6 votes |
def test_kmeans_lost_cluster(self): """This will cause kmean to have a cluster with no points.""" data = np.fromfile(DATAFILE1, sep=", ") data = data.reshape((200, 2)) initk = np.array([[-1.8127404, -0.67128041], [2.04621601, 0.07401111], [-2.31149087,-0.05160469]]) res = kmeans(data, initk) warn_ctx = WarningManager() warn_ctx.__enter__() try: warnings.simplefilter('ignore', UserWarning) res = kmeans2(data, initk, missing='warn') finally: warn_ctx.__exit__() assert_raises(ClusterError, kmeans2, data, initk, missing='raise')
Example #5
Source File: clustering.py From Load-Forecasting with MIT License | 6 votes |
def kMeansClustering(x,k): # Convert list into numpy format conv = np.asarray(x) # Compute the centroids centroids = kmeans(conv,k,iter=10)[0] # Relabel the x's labels = [] for y in range(len(x)): minDist = float('inf') minLabel = -1 for z in range(len(centroids)): e = euclidean(conv[y],centroids[z]) if (e < minDist): minDist = e minLabel = z labels.append(minLabel) # Return the list of centroids and labels return (centroids,labels) # Performs a weighted clustering on the examples in xTest # Returns a 1-d vector of predictions
Example #6
Source File: cluster.py From deephar with MIT License | 6 votes |
def mean_on_most_assigned(x, c): nb_c = len(c) assign = np.zeros(nb_c) mean = np.zeros(c.shape) for i in range(len(x)): y = x[i].reshape((1,2)) d = np.sqrt(np.sum(np.power(y.repeat(nb_c, axis=0) - c, 2), axis=1)) idx = d.argmin() assign[idx] += 1 mean[idx,:] += x[i] idx = assign.argmax() return mean[idx,:] / assign[idx] # def best_kmeans(pred): # plt.scatter(pred[:,0], pred[:,1], color='b') # c,v = kmeans(pred, 3) # plt.scatter(c[:,0], c[:,1], color='g') # n = most_assigned(pred, c) # plt.scatter(c[n,0], c[n,1], color='r') # plt.show()
Example #7
Source File: colorz.py From colorz with MIT License | 6 votes |
def colorz(fd, n=DEFAULT_NUM_COLORS, min_v=DEFAULT_MINV, max_v=DEFAULT_MAXV, bold_add=DEFAULT_BOLD_ADD, order_colors=True): """ Get the n most dominant colors of an image. Clamps value to between min_v and max_v. Creates bold colors using bold_add. Total number of colors returned is 2*n, optionally ordered by hue. Returns as a list of pairs of RGB triples. For terminal colors, the hue order is: red, yellow, green, cyan, blue, magenta """ img = Image.open(fd) img.thumbnail(THUMB_SIZE) obs = get_colors(img) clamped = [clamp(color, min_v, max_v) for color in obs] clusters, _ = kmeans(array(clamped).astype(float), n) colors = order_by_hue(clusters) if order_colors else clusters return list(zip(colors, [brighten(c, bold_add) for c in colors]))
Example #8
Source File: ckmean.py From TextDetector with GNU General Public License v3.0 | 6 votes |
def __init__(self, data, kk): # Convolutional K-means # INPUT: # data: matrix each column is a sample vector # kk: number of total clusters # ii: number of iterations for kmeans training # OUTPUT: # D: matrix containing center vectors in columns""" print('starting kmeans quatization...(.py file is used)') # Initialization of D by randomly pick from training data col_idx = random.sample(range(0, len(data)), kk) D = data[col_idx, :] D = self.colnorm(D) self.data = data self.kk = kk self.D = D
Example #9
Source File: clustergeojson.py From open-context-py with GNU General Public License v3.0 | 6 votes |
def cluster_lon_lats(self): """Clusters the list of lon_lats into groups """ np_lon_lats = [] for lon_lat in self.lon_lats: dpoint = np.fromiter(lon_lat, np.dtype('float')) np_lon_lats.append(dpoint) data = array(np_lon_lats) centroids, _ = kmeans(data, self.number_clusters) idx, _ = vq(data, centroids) self.idx = idx self.data = data self.centroids = centroids # Sort the centroids by lon, then lat sc = centroids[centroids[:,1].argsort()] sc = sc[sc[:,0].argsort()] self.sorted_centroids = sc.tolist()
Example #10
Source File: noteshrink_module.py From noteshrinker-django with MIT License | 5 votes |
def get_palette(samples, options, return_mask=False, kmeans_iter=40): '''Extract the palette for the set of sampled RGB values. The first palette entry is always the background color; the rest are determined from foreground pixels by running K-means clustering. Returns the palette, as well as a mask corresponding to the foreground pixels. ''' if not options.quiet: print(' getting palette...') bg_color = get_bg_color(samples, 6) fg_mask = get_fg_mask(bg_color, samples, options) centers, _ = kmeans(samples[fg_mask].astype(np.float32), options.num_colors - 1, iter=kmeans_iter) palette = np.vstack((bg_color, centers)).astype(np.uint8) if not return_mask: return palette else: return palette, fg_mask ######################################################################
Example #11
Source File: clustered_kde.py From kombine with MIT License | 5 votes |
def __init__(self, data, k=1): self._data = data self._nclusters = k self._mean = np.mean(data, axis=0) self._std = np.std(data, axis=0) # Cluster data that's mean 0 and scaled to unit width in each parameter independently white_data = self._whiten(data) self._centroids, _ = kmeans(white_data, k) self._assignments, _ = vq(white_data, self.centroids) self._kdes = [KDE(self.data[self.assignments == c]) for c in range(k)] self._logweights = np.log([np.count_nonzero(self.assignments == c)/self.size for c in range(k)])
Example #12
Source File: cluster.py From deephar with MIT License | 5 votes |
def clustering_joints(y_pred, k=3): _,nb_spl,nb_joints,dim = y_pred.shape y = np.zeros((nb_spl, nb_joints, dim)) for s in range(nb_spl): for j in range(nb_joints): d = y_pred[:,s,j] c,v = kmeans(d, k) n = most_assigned(d, c) y[s,j,:] = c[n] return y
Example #13
Source File: color.py From dvt with GNU General Public License v2.0 | 5 votes |
def _get_dominant(img, num_dominant): img_flat = img.reshape(-1, 3).astype(float32) # increasing iter would give 'better' clustering, at the cost of speed dominant_colors, _ = kmeans(img_flat, num_dominant, iter=5) #kmeans_code = vq(img_flat, dominant_colors) if dominant_colors.shape[0] != num_dominant: # pragma: no cover diff = num_dominant - dominant_colors.shape[0] dominant_colors = vstack([ dominant_colors, zeros((diff, dominant_colors.shape[1])) ]) return dominant_colors.astype(uint8)
Example #14
Source File: SPGP.py From kusanagi with MIT License | 5 votes |
def init_pseudo_inputs(self): msg = "Dataset must have more than n_inducing [ %n ] to enable" msg += " inference with sparse pseudo inputs" assert self.N >= self.n_inducing, msg % (self.n_inducing) self.should_recompile = True # pick initial cluster centers from dataset X = self.X.get_value() X_sp_ = utils.kmeanspp(X, self.n_inducing) # perform kmeans to get initial cluster centers utils.print_with_stamp('Initialising pseudo inputs', self.name) X_sp_, dist = kmeans(X, X_sp_, iter=200, thresh=1e-9) # initialize symbolic tensor variable if necessary # (this will create the self.X_sp atttribute) self.set_params({'X_sp': X_sp_})
Example #15
Source File: xmeans.py From msaf with MIT License | 5 votes |
def test_kmeans(K=5): """Test k-means with the synthetic data.""" X = XMeans.generate_2d_data(K=4) wX = vq.whiten(X) dic, dist = vq.kmeans(wX, K, iter=100) plt.scatter(wX[:, 0], wX[:, 1]) plt.scatter(dic[:, 0], dic[:, 1], color="m") plt.show()
Example #16
Source File: xmeans.py From msaf with MIT License | 5 votes |
def run_kmeans(self, X, K): """Runs k-means and returns the labels assigned to the data.""" wX = vq.whiten(X) means, dist = vq.kmeans(wX, K, iter=100) labels, dist = vq.vq(wX, means) return means, labels
Example #17
Source File: test_vq.py From Computable with MIT License | 5 votes |
def test_kmeans_0k(self): """Regression test for #546: fail when k arg is 0.""" assert_raises(ValueError, kmeans, X, 0) assert_raises(ValueError, kmeans2, X, 0) assert_raises(ValueError, kmeans2, X, np.array([]))
Example #18
Source File: test_vq.py From Computable with MIT License | 5 votes |
def test_kmeans_simple(self): initc = np.concatenate(([[X[0]], [X[1]], [X[2]]])) code = initc.copy() code1 = kmeans(X, code, iter=1)[0] assert_array_almost_equal(code1, CODET2)
Example #19
Source File: noteshrink.py From noteshrink with MIT License | 5 votes |
def get_palette(samples, options, return_mask=False, kmeans_iter=40): '''Extract the palette for the set of sampled RGB values. The first palette entry is always the background color; the rest are determined from foreground pixels by running K-means clustering. Returns the palette, as well as a mask corresponding to the foreground pixels. ''' if not options.quiet: print(' getting palette...') bg_color = get_bg_color(samples, 6) fg_mask = get_fg_mask(bg_color, samples, options) centers, _ = kmeans(samples[fg_mask].astype(np.float32), options.num_colors-1, iter=kmeans_iter) palette = np.vstack((bg_color, centers)).astype(np.uint8) if not return_mask: return palette else: return palette, fg_mask ######################################################################
Example #20
Source File: analysis.py From enlopy with BSD 3-Clause "New" or "Revised" License | 4 votes |
def get_load_archetypes(Load, k=2, x='hour', y='dayofyear', plot_diagnostics=False): """Extract typical load profiles using k-means and vector quantization. the time scale of archetypes depend on the selected dimensions (x,y). For the default values daily archetypes will be extracted. Parameters: Load (pd.Series): timeseries k (int): number of archetypes to identify and extract x (str): This will define how the timeseries will be grouped by. Has to be an accessor of pd.DatetimeIndex y (str): similar to above for y axis. plot_diagnostics (bool): If true a figure is plotted showing an overview of the results Returns: np.ndarray: dimensions (k, len(x)) """ from scipy.cluster.vq import whiten, kmeans, vq df = reshape_timeseries(Load, x=x, y=y, aggfunc='mean').astype(float) df_white = whiten(df) clusters_center, __ = kmeans(df_white, k) clusters_center_dewhitened = clusters_center.T * np.array([df.std(), ] * k ).T if plot_diagnostics: try: import matplotlib.pyplot as plt clusters, _ = vq(df_white, clusters_center) cm = _n_colors_from_colormap(k) ax1 = df.T.plot(legend=False, alpha=.1, color=[cm[i] for i in clusters]) # Add colored cluster centers as lines ax1.set_prop_cycle('color', cm) ax1.plot(clusters_center_dewhitened, linewidth=3, linestyle='--') plt.figure() # FIXME: works only with weekdays day_clusters = pd.DataFrame({y: Load.resample('d').mean().index.weekday, 'clusters': clusters, 'val': 1}) x_labels = "Mon Tue Wed Thu Fri Sat Sun".split() day_clusters.pivot_table(columns=y, index='clusters', aggfunc='count').T.plot.bar(stacked=True) plt.gca().set_xticklabels(x_labels) except Exception: #FIXME: specify exception print ('Works only with daily profile clustering') return clusters_center_dewhitened