Python scipy.cluster.vq.vq() Examples
The following are 22
code examples of scipy.cluster.vq.vq().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.cluster.vq
, or try the search function
.
Example #1
Source File: knn.py From deepnl with GNU General Public License v3.0 | 7 votes |
def Kmeans(file, vocabfile, k): np.random.seed((1000,2000)) whitened = whiten(embeddings) codebook, distortion = kmeans(whitened, k) clusters = [l2_nearest(embeddings, c, representatives+1) for c in codebook] # output print(len(codebook), distortion) for centroid in codebook: print(' '.join([str(x) for x in centroid])) print() for cluster in clusters: print(' '.join([id_word[i] for i, d in cluster]).encode('utf-8')) print() # assign clusters to words codes, _ = vq(embeddings, codebook) for w, c in zip(word_id.keys(), codes): print(w, c)
Example #2
Source File: test_vq.py From Computable with MIT License | 6 votes |
def test_vq(self): initc = np.concatenate(([[X[0]], [X[1]], [X[2]]])) if TESTC: label1, dist = _vq.vq(X, initc) assert_array_equal(label1, LABEL1) tlabel1, tdist = vq(X, initc) else: print("== not testing C imp of vq ==") #def test_py_vq_1d(self): # """Test special rank 1 vq algo, python implementation.""" # data = X[:, 0] # initc = data[:3] # a, b = _py_vq_1d(data, initc) # ta, tb = py_vq(data[:, np.newaxis], initc[:, np.newaxis]) # assert_array_equal(a, ta) # assert_array_equal(b, tb)
Example #3
Source File: pq.py From nanopq with MIT License | 6 votes |
def encode(self, vecs): """Encode input vectors into PQ-codes. Args: vecs (np.ndarray): Input vectors with shape=(N, D) and dtype=np.float32. Returns: np.ndarray: PQ codes with shape=(N, M) and dtype=self.code_dtype """ assert vecs.dtype == np.float32 assert vecs.ndim == 2 N, D = vecs.shape assert D == self.Ds * self.M, "input dimension must be Ds * M" # codes[n][m] : code of n-th vec, m-th subspace codes = np.empty((N, self.M), dtype=self.code_dtype) for m in range(self.M): if self.verbose: print("Encoding the subspace: {} / {}".format(m, self.M)) vecs_sub = vecs[:, m * self.Ds : (m+1) * self.Ds] codes[:, m], _ = vq(vecs_sub, self.codewords[m]) return codes
Example #4
Source File: vq_test.py From Computable with MIT License | 6 votes |
def python_vq(all_data,code_book): import time t1 = time.time() codes1,dist1 = vq.vq(all_data,code_book) t2 = time.time() #print 'fast (double):', t2 - t1 #print ' first codes:', codes1[:5] #print ' first dist:', dist1[:5] #print ' last codes:', codes1[-5:] #print ' last dist:', dist1[-5:] float_obs = all_data.astype(np.float32) float_code = code_book.astype(np.float32) t1 = time.time() codes1,dist1 = vq.vq(float_obs,float_code) t2 = time.time() #print 'fast (float):', t2 - t1 #print ' first codes:', codes1[:5] #print ' first dist:', dist1[:5] #print ' last codes:', codes1[-5:] #print ' last dist:', dist1[-5:] return codes1,dist1
Example #5
Source File: clustergeojson.py From open-context-py with GNU General Public License v3.0 | 6 votes |
def cluster_lon_lats(self): """Clusters the list of lon_lats into groups """ np_lon_lats = [] for lon_lat in self.lon_lats: dpoint = np.fromiter(lon_lat, np.dtype('float')) np_lon_lats.append(dpoint) data = array(np_lon_lats) centroids, _ = kmeans(data, self.number_clusters) idx, _ = vq(data, centroids) self.idx = idx self.data = data self.centroids = centroids # Sort the centroids by lon, then lat sc = centroids[centroids[:,1].argsort()] sc = sc[sc[:,0].argsort()] self.sorted_centroids = sc.tolist()
Example #6
Source File: bow.py From mmfeat with BSD 3-Clause "New" or "Revised" License | 5 votes |
def quantize(self): clusters = range(self.centroids.shape[0] + 1) histograms = {} for fname in sorted(self.data.keys()): if self.data[fname] is None: continue idx,_ = vq(self.data[fname], self.centroids) histograms[fname], _ = np.histogram(idx, bins=clusters, normed=self.normalize) return histograms
Example #7
Source File: noteshrink_module.py From noteshrinker-django with MIT License | 5 votes |
def apply_palette(img, palette, options): '''Apply the pallete to the given image. The first step is to set all background pixels to the background color; then, nearest-neighbor matching is used to map each foreground color to the closest one in the palette. ''' if not options.quiet: print(' applying palette...') bg_color = palette[0] fg_mask = get_fg_mask(bg_color, img, options) orig_shape = img.shape pixels = img.reshape((-1, 3)) fg_mask = fg_mask.flatten() num_pixels = pixels.shape[0] labels = np.zeros(num_pixels, dtype=np.uint8) labels[fg_mask], _ = vq(pixels[fg_mask], palette) return labels.reshape(orig_shape[:-1]) ######################################################################
Example #8
Source File: clustered_kde.py From kombine with MIT License | 5 votes |
def __init__(self, data, k=1): self._data = data self._nclusters = k self._mean = np.mean(data, axis=0) self._std = np.std(data, axis=0) # Cluster data that's mean 0 and scaled to unit width in each parameter independently white_data = self._whiten(data) self._centroids, _ = kmeans(white_data, k) self._assignments, _ = vq(white_data, self.centroids) self._kdes = [KDE(self.data[self.assignments == c]) for c in range(k)] self._logweights = np.log([np.count_nonzero(self.assignments == c)/self.size for c in range(k)])
Example #9
Source File: make_bow_vector.py From KTH-Action-Recognition with MIT License | 5 votes |
def make_bow(dataset, clusters, tfidf): print("Make bow vector for each frame") # Count total number of frames. n_frames = 0 for video in dataset: n_frames += len(video["features"]) # Init bow vectors for all frames. bow = np.zeros((n_frames, clusters.shape[0]), dtype=np.float) # Make bow vectors for all frames. frame_index = 0 for video in dataset: for frame in video["features"]: visual_word_ids = vq(frame, clusters)[0] for word_id in visual_word_ids: bow[frame_index, word_id] += 1 frame_index += 1 # Check whether to use TF-IDF weighting. if tfidf: print("Applying TF-IDF weighting") freq = np.sum((bow > 0) * 1, axis = 0) idf = np.log((n_frames + 1) / (freq + 1)) bow = bow * idf # Replace features in dataset with the bow vector we've computed. frame_index = 0 for i in range(len(dataset)): features = [] for frame in dataset[i]["features"]: features.append(bow[frame_index]) frame_index += 1 dataset[i]["features"] = features if (i + 1) % 50 == 0: print("Processed %d/%d videos" % (i + 1, len(dataset))) return dataset
Example #10
Source File: make_bow_vector.py From KTH-Action-Recognition with MIT License | 5 votes |
def make_bow(dataset, clusters, tfidf): print("Make bow vector for each frame") n_videos = len(dataset) bow = np.zeros((n_videos, clusters.shape[0]), dtype=np.float) # Make bow vectors for all videos. video_index = 0 for video in dataset: visual_word_ids = vq(video["features"], clusters)[0] for word_id in visual_word_ids: bow[video_index, word_id] += 1 video_index += 1 # Check whether to use TF-IDF weighting. if tfidf: print("Applying TF-IDF weighting") freq = np.sum((bow > 0) * 1, axis = 0) idf = np.log((n_videos + 1) / (freq + 1)) bow = bow * idf # Replace features in dataset with the bow vector we've computed. video_index = 0 for i in range(len(dataset)): dataset[i]["features"] = bow[video_index] video_index += 1 if (i + 1) % 50 == 0: print("Processed %d/%d videos" % (i + 1, len(dataset))) return dataset
Example #11
Source File: bow.py From mmfeat with BSD 3-Clause "New" or "Revised" License | 5 votes |
def sequences(self): sequences = {} for fname in sorted(self.data.keys()): if self.data[fname] is None: continue idx,_ = vq(self.data[fname], self.centroids) sequences[fname] = idx return sequences
Example #12
Source File: xmeans.py From msaf with MIT License | 5 votes |
def test_kmeans(K=5): """Test k-means with the synthetic data.""" X = XMeans.generate_2d_data(K=4) wX = vq.whiten(X) dic, dist = vq.kmeans(wX, K, iter=100) plt.scatter(wX[:, 0], wX[:, 1]) plt.scatter(dic[:, 0], dic[:, 1], color="m") plt.show()
Example #13
Source File: xmeans.py From msaf with MIT License | 5 votes |
def compute_bic(self, D, means, labels, K, R): """Computes the Bayesian Information Criterion.""" D = vq.whiten(D) Rn = D.shape[0] M = D.shape[1] if R == K: return 1 # Maximum likelihood estimate (MLE) mle_var = 0 for k in range(len(means)): X = D[np.argwhere(labels == k)] X = X.reshape((X.shape[0], X.shape[-1])) for x in X: mle_var += distance.euclidean(x, means[k]) #print x, means[k], mle_var mle_var /= float(R - K) # Log-likelihood of the data l_D = - Rn/2. * np.log(2*np.pi) - (Rn * M)/2. * np.log(mle_var) - \ (Rn - K) / 2. + Rn * np.log(Rn) - Rn * np.log(R) # Params of BIC p = (K-1) + M * K + mle_var #print "BIC:", l_D, p, R, K # Return the bic return l_D - p / 2. * np.log(R)
Example #14
Source File: xmeans.py From msaf with MIT License | 5 votes |
def run_kmeans(self, X, K): """Runs k-means and returns the labels assigned to the data.""" wX = vq.whiten(X) means, dist = vq.kmeans(wX, K, iter=100) labels, dist = vq.vq(wX, means) return means, labels
Example #15
Source File: test_vq.py From Computable with MIT License | 5 votes |
def test__vq_sametype(self): if TESTC: a = np.array([1, 2]) b = a.astype(float) assert_raises(ValueError, _vq.vq, a, b)
Example #16
Source File: test_vq.py From Computable with MIT License | 5 votes |
def test_vq_1d(self): """Test special rank 1 vq algo, python implementation.""" data = X[:, 0] initc = data[:3] if TESTC: a, b = _vq.vq(data, initc) ta, tb = py_vq(data[:, np.newaxis], initc[:, np.newaxis]) assert_array_equal(a, ta) assert_array_equal(b, tb) else: print("== not testing C imp of vq (rank 1) ==")
Example #17
Source File: noteshrink.py From noteshrink with MIT License | 5 votes |
def apply_palette(img, palette, options): '''Apply the pallete to the given image. The first step is to set all background pixels to the background color; then, nearest-neighbor matching is used to map each foreground color to the closest one in the palette. ''' if not options.quiet: print(' applying palette...') bg_color = palette[0] fg_mask = get_fg_mask(bg_color, img, options) orig_shape = img.shape pixels = img.reshape((-1, 3)) fg_mask = fg_mask.flatten() num_pixels = pixels.shape[0] labels = np.zeros(num_pixels, dtype=np.uint8) labels[fg_mask], _ = vq(pixels[fg_mask], palette) return labels.reshape(orig_shape[:-1]) ######################################################################
Example #18
Source File: audio_tools.py From tools with BSD 3-Clause "New" or "Revised" License | 4 votes |
def run_phase_vq_example(): def _pre(list_of_data): # Temporal window setting is crucial! - 512 seems OK for music, 256 # fruit perhaps due to samplerates n_fft = 256 step = 32 f_r = np.vstack([np.abs(stft(dd, n_fft, step=step, real=False, compute_onesided=False)) for dd in list_of_data]) return f_r, n_fft, step def preprocess_train(list_of_data, random_state): f_r, n_fft, step = _pre(list_of_data) clusters = copy.deepcopy(f_r) return clusters def apply_preprocess(list_of_data, clusters): f_r, n_fft, step = _pre(list_of_data) f_clust = f_r # Nondeterministic ? memberships, distances = vq(f_clust, clusters) vq_r = clusters[memberships] d_k = iterate_invert_spectrogram(vq_r, n_fft, step, verbose=True) return d_k random_state = np.random.RandomState(1999) fs, d = fetch_sample_speech_fruit() d1 = d[::9] d2 = d[7::8][:5] # make sure d1 and d2 aren't the same! assert [len(di) for di in d1] != [len(di) for di in d2] clusters = preprocess_train(d1, random_state) fix_d1 = np.concatenate(d1) fix_d2 = np.concatenate(d2) vq_d2 = apply_preprocess(d2, clusters) wavfile.write("phase_train_no_agc.wav", fs, soundsc(fix_d1)) wavfile.write("phase_vq_test_no_agc.wav", fs, soundsc(vq_d2)) agc_d1, freq_d1, energy_d1 = time_attack_agc(fix_d1, fs, .5, 5) agc_d2, freq_d2, energy_d2 = time_attack_agc(fix_d2, fs, .5, 5) agc_vq_d2, freq_vq_d2, energy_vq_d2 = time_attack_agc(vq_d2, fs, .5, 5) """ import matplotlib.pyplot as plt plt.specgram(agc_vq_d2, cmap="gray") #plt.title("Fake") plt.figure() plt.specgram(agc_d2, cmap="gray") #plt.title("Real") plt.show() """ wavfile.write("phase_train_agc.wav", fs, soundsc(agc_d1)) wavfile.write("phase_test_agc.wav", fs, soundsc(agc_d2)) wavfile.write("phase_vq_test_agc.wav", fs, soundsc(agc_vq_d2))
Example #19
Source File: audio_tools.py From dagbldr with BSD 3-Clause "New" or "Revised" License | 4 votes |
def run_phase_vq_example(): def _pre(list_of_data): # Temporal window setting is crucial! - 512 seems OK for music, 256 # fruit perhaps due to samplerates n_fft = 256 step = 32 f_r = np.vstack([np.abs(stft(dd, n_fft, step=step, real=False, compute_onesided=False)) for dd in list_of_data]) return f_r, n_fft, step def preprocess_train(list_of_data, random_state): f_r, n_fft, step = _pre(list_of_data) clusters = copy.deepcopy(f_r) return clusters def apply_preprocess(list_of_data, clusters): f_r, n_fft, step = _pre(list_of_data) f_clust = f_r # Nondeterministic ? memberships, distances = vq(f_clust, clusters) vq_r = clusters[memberships] d_k = iterate_invert_spectrogram(vq_r, n_fft, step, verbose=True) return d_k random_state = np.random.RandomState(1999) fs, d = fetch_sample_speech_fruit() d1 = d[::9] d2 = d[7::8][:5] # make sure d1 and d2 aren't the same! assert [len(di) for di in d1] != [len(di) for di in d2] clusters = preprocess_train(d1, random_state) fix_d1 = np.concatenate(d1) fix_d2 = np.concatenate(d2) vq_d2 = apply_preprocess(d2, clusters) wavfile.write("phase_train_no_agc.wav", fs, soundsc(fix_d1)) wavfile.write("phase_vq_test_no_agc.wav", fs, soundsc(vq_d2)) agc_d1, freq_d1, energy_d1 = time_attack_agc(fix_d1, fs, .5, 5) agc_d2, freq_d2, energy_d2 = time_attack_agc(fix_d2, fs, .5, 5) agc_vq_d2, freq_vq_d2, energy_vq_d2 = time_attack_agc(vq_d2, fs, .5, 5) """ import matplotlib.pyplot as plt plt.specgram(agc_vq_d2, cmap="gray") #plt.title("Fake") plt.figure() plt.specgram(agc_d2, cmap="gray") #plt.title("Real") plt.show() """ wavfile.write("phase_train_agc.wav", fs, soundsc(agc_d1)) wavfile.write("phase_test_agc.wav", fs, soundsc(agc_d2)) wavfile.write("phase_vq_test_agc.wav", fs, soundsc(agc_vq_d2))
Example #20
Source File: analysis.py From enlopy with BSD 3-Clause "New" or "Revised" License | 4 votes |
def get_load_archetypes(Load, k=2, x='hour', y='dayofyear', plot_diagnostics=False): """Extract typical load profiles using k-means and vector quantization. the time scale of archetypes depend on the selected dimensions (x,y). For the default values daily archetypes will be extracted. Parameters: Load (pd.Series): timeseries k (int): number of archetypes to identify and extract x (str): This will define how the timeseries will be grouped by. Has to be an accessor of pd.DatetimeIndex y (str): similar to above for y axis. plot_diagnostics (bool): If true a figure is plotted showing an overview of the results Returns: np.ndarray: dimensions (k, len(x)) """ from scipy.cluster.vq import whiten, kmeans, vq df = reshape_timeseries(Load, x=x, y=y, aggfunc='mean').astype(float) df_white = whiten(df) clusters_center, __ = kmeans(df_white, k) clusters_center_dewhitened = clusters_center.T * np.array([df.std(), ] * k ).T if plot_diagnostics: try: import matplotlib.pyplot as plt clusters, _ = vq(df_white, clusters_center) cm = _n_colors_from_colormap(k) ax1 = df.T.plot(legend=False, alpha=.1, color=[cm[i] for i in clusters]) # Add colored cluster centers as lines ax1.set_prop_cycle('color', cm) ax1.plot(clusters_center_dewhitened, linewidth=3, linestyle='--') plt.figure() # FIXME: works only with weekdays day_clusters = pd.DataFrame({y: Load.resample('d').mean().index.weekday, 'clusters': clusters, 'val': 1}) x_labels = "Mon Tue Wed Thu Fri Sat Sun".split() day_clusters.pivot_table(columns=y, index='clusters', aggfunc='count').T.plot.bar(stacked=True) plt.gca().set_xticklabels(x_labels) except Exception: #FIXME: specify exception print ('Works only with daily profile clustering') return clusters_center_dewhitened
Example #21
Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 4 votes |
def run_phase_vq_example(): def _pre(list_of_data): # Temporal window setting is crucial! - 512 seems OK for music, 256 # fruit perhaps due to samplerates n_fft = 256 step = 32 f_r = np.vstack([np.abs(stft(dd, fftsize=n_fft, step=step, real=False, compute_onesided=False)) for dd in list_of_data]) return f_r, n_fft, step def preprocess_train(list_of_data, random_state): f_r, n_fft, step = _pre(list_of_data) clusters = copy.deepcopy(f_r) return clusters def apply_preprocess(list_of_data, clusters): f_r, n_fft, step = _pre(list_of_data) f_clust = f_r # Nondeterministic ? memberships, distances = vq(f_clust, clusters) vq_r = clusters[memberships] d_k = iterate_invert_spectrogram(vq_r, n_fft, step, verbose=True) return d_k random_state = np.random.RandomState(1999) fs, d = fetch_sample_speech_fruit() d1 = d[::9] d2 = d[7::8][:5] # make sure d1 and d2 aren't the same! assert [len(di) for di in d1] != [len(di) for di in d2] clusters = preprocess_train(d1, random_state) fix_d1 = np.concatenate(d1) fix_d2 = np.concatenate(d2) vq_d2 = apply_preprocess(d2, clusters) wavfile.write("phase_train_no_agc.wav", fs, soundsc(fix_d1)) wavfile.write("phase_vq_test_no_agc.wav", fs, soundsc(vq_d2)) agc_d1, freq_d1, energy_d1 = time_attack_agc(fix_d1, fs, .5, 5) agc_d2, freq_d2, energy_d2 = time_attack_agc(fix_d2, fs, .5, 5) agc_vq_d2, freq_vq_d2, energy_vq_d2 = time_attack_agc(vq_d2, fs, .5, 5) """ import matplotlib.pyplot as plt plt.specgram(agc_vq_d2, cmap="gray") #plt.title("Fake") plt.figure() plt.specgram(agc_d2, cmap="gray") #plt.title("Real") plt.show() """ wavfile.write("phase_train_agc.wav", fs, soundsc(agc_d1)) wavfile.write("phase_test_agc.wav", fs, soundsc(agc_d2)) wavfile.write("phase_vq_test_agc.wav", fs, soundsc(agc_vq_d2))
Example #22
Source File: audio_tools.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 4 votes |
def run_phase_vq_example(): def _pre(list_of_data): # Temporal window setting is crucial! - 512 seems OK for music, 256 # fruit perhaps due to samplerates n_fft = 256 step = 32 f_r = np.vstack([np.abs(stft(dd, fftsize=n_fft, step=step, real=False, compute_onesided=False)) for dd in list_of_data]) return f_r, n_fft, step def preprocess_train(list_of_data, random_state): f_r, n_fft, step = _pre(list_of_data) clusters = copy.deepcopy(f_r) return clusters def apply_preprocess(list_of_data, clusters): f_r, n_fft, step = _pre(list_of_data) f_clust = f_r # Nondeterministic ? memberships, distances = vq(f_clust, clusters) vq_r = clusters[memberships] d_k = iterate_invert_spectrogram(vq_r, n_fft, step, verbose=True) return d_k random_state = np.random.RandomState(1999) fs, d = fetch_sample_speech_fruit() d1 = d[::9] d2 = d[7::8][:5] # make sure d1 and d2 aren't the same! assert [len(di) for di in d1] != [len(di) for di in d2] clusters = preprocess_train(d1, random_state) fix_d1 = np.concatenate(d1) fix_d2 = np.concatenate(d2) vq_d2 = apply_preprocess(d2, clusters) wavfile.write("phase_train_no_agc.wav", fs, soundsc(fix_d1)) wavfile.write("phase_vq_test_no_agc.wav", fs, soundsc(vq_d2)) agc_d1, freq_d1, energy_d1 = time_attack_agc(fix_d1, fs, .5, 5) agc_d2, freq_d2, energy_d2 = time_attack_agc(fix_d2, fs, .5, 5) agc_vq_d2, freq_vq_d2, energy_vq_d2 = time_attack_agc(vq_d2, fs, .5, 5) """ import matplotlib.pyplot as plt plt.specgram(agc_vq_d2, cmap="gray") #plt.title("Fake") plt.figure() plt.specgram(agc_d2, cmap="gray") #plt.title("Real") plt.show() """ wavfile.write("phase_train_agc.wav", fs, soundsc(agc_d1)) wavfile.write("phase_test_agc.wav", fs, soundsc(agc_d2)) wavfile.write("phase_vq_test_agc.wav", fs, soundsc(agc_vq_d2))