Python scipy.cluster.vq.whiten() Examples
The following are 9
code examples of scipy.cluster.vq.whiten().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.cluster.vq
, or try the search function
.
Example #1
Source File: knn.py From deepnl with GNU General Public License v3.0 | 7 votes |
def Kmeans(file, vocabfile, k): np.random.seed((1000,2000)) whitened = whiten(embeddings) codebook, distortion = kmeans(whitened, k) clusters = [l2_nearest(embeddings, c, representatives+1) for c in codebook] # output print(len(codebook), distortion) for centroid in codebook: print(' '.join([str(x) for x in centroid])) print() for cluster in clusters: print(' '.join([id_word[i] for i, d in cluster]).encode('utf-8')) print() # assign clusters to words codes, _ = vq(embeddings, codebook) for w, c in zip(word_id.keys(), codes): print(w, c)
Example #2
Source File: clustering_kmeans_alternative.py From practicalDataAnalysisCookbook with GNU General Public License v2.0 | 6 votes |
def findClusters_kmeans(data): ''' Cluster data using k-means ''' # whiten the observations data_w = vq.whiten(data) # create the classifier object kmeans, labels = vq.kmeans2( data_w, k=4, iter=30 ) # fit the data return kmeans, labels # the file name of the dataset
Example #3
Source File: segmenter.py From msaf with MIT License | 6 votes |
def compute_labels_kmeans(fmcs, k): # Removing the higher frequencies seem to yield better results fmcs = fmcs[:, fmcs.shape[1] // 2:] # Pre-process fmcs = np.log1p(fmcs) wfmcs = vq.whiten(fmcs) # Make sure we are not using more clusters than existing segments if k > fmcs.shape[0]: k = fmcs.shape[0] # K-means kmeans = KMeans(n_clusters=k, n_init=100) kmeans.fit(wfmcs) return kmeans.labels_
Example #4
Source File: pairdist.py From link-prediction_with_deep-learning with MIT License | 5 votes |
def argparser(): try: import argparse except ImportError: import compat.argparse as argparse ap=argparse.ArgumentParser() ap.add_argument('vectors', nargs=1, metavar='FILE', help='word vectors') ap.add_argument('-a', '--approximate', default=False, action='store_true', help='filter by approximate similarity (with -t)') ap.add_argument('-i', '--min-index', default=0, type=int, help='index of first word (default 0)') ap.add_argument('-M', '--metric', default=DEFAULT_METRIC, choices=sorted(metrics.keys()), help='distance metric to apply') ap.add_argument('-n', '--normalize', default=False, action='store_true', help='normalize vectors to unit length') ap.add_argument('-r', '--max-rank', metavar='INT', default=None, type=int, help='only consider r most frequent words') ap.add_argument('-t', '--threshold', metavar='FLOAT', default=None, type=float, help='only output distances <= t') ap.add_argument('-T', '--tolerance', metavar='FLOAT', default=0.1, type=float, help='approximation tolerace (with -a)') ap.add_argument('-w', '--whiten', default=False, action='store_true', help='normalize features to unit variance ') ap.add_argument('-W', '--words', default=False, action='store_true', help='output words instead of indices') return ap
Example #5
Source File: pairdist.py From link-prediction_with_deep-learning with MIT License | 5 votes |
def process_options(args): options = argparser().parse_args(args) if options.max_rank is not None and options.max_rank < 1: raise ValueError('max-rank must be >= 1') if options.threshold is not None and options.threshold < 0.0: raise ValueError('threshold must be >= 0') if options.tolerance is not None and options.tolerance < 0.0: raise ValueError('tolerance must be >= 0') if options.approximate and not options.threshold: raise ValueError('approximate only makes sense with a threshold') if options.approximate and options.metric != 'cosine': raise NotImplementedError('approximate only supported for cosine') wv = wvlib.load(options.vectors[0], max_rank=options.max_rank) if options.normalize: logging.info('normalize vectors to unit length') wv.normalize() words, vectors = wv.words(), wv.vectors() if options.whiten: # whitening should be implemented in wvlib to support together with # approximate similarity if options.approximate: raise NotImplemenedError logging.info('normalize features to unit variance') vectors = whiten(vectors) return words, vectors, wv, options
Example #6
Source File: xmeans.py From msaf with MIT License | 5 votes |
def run_kmeans(self, X, K): """Runs k-means and returns the labels assigned to the data.""" wX = vq.whiten(X) means, dist = vq.kmeans(wX, K, iter=100) labels, dist = vq.vq(wX, means) return means, labels
Example #7
Source File: xmeans.py From msaf with MIT License | 5 votes |
def compute_bic(self, D, means, labels, K, R): """Computes the Bayesian Information Criterion.""" D = vq.whiten(D) Rn = D.shape[0] M = D.shape[1] if R == K: return 1 # Maximum likelihood estimate (MLE) mle_var = 0 for k in range(len(means)): X = D[np.argwhere(labels == k)] X = X.reshape((X.shape[0], X.shape[-1])) for x in X: mle_var += distance.euclidean(x, means[k]) #print x, means[k], mle_var mle_var /= float(R - K) # Log-likelihood of the data l_D = - Rn/2. * np.log(2*np.pi) - (Rn * M)/2. * np.log(mle_var) - \ (Rn - K) / 2. + Rn * np.log(Rn) - Rn * np.log(R) # Params of BIC p = (K-1) + M * K + mle_var #print "BIC:", l_D, p, R, K # Return the bic return l_D - p / 2. * np.log(R)
Example #8
Source File: xmeans.py From msaf with MIT License | 5 votes |
def test_kmeans(K=5): """Test k-means with the synthetic data.""" X = XMeans.generate_2d_data(K=4) wX = vq.whiten(X) dic, dist = vq.kmeans(wX, K, iter=100) plt.scatter(wX[:, 0], wX[:, 1]) plt.scatter(dic[:, 0], dic[:, 1], color="m") plt.show()
Example #9
Source File: analysis.py From enlopy with BSD 3-Clause "New" or "Revised" License | 4 votes |
def get_load_archetypes(Load, k=2, x='hour', y='dayofyear', plot_diagnostics=False): """Extract typical load profiles using k-means and vector quantization. the time scale of archetypes depend on the selected dimensions (x,y). For the default values daily archetypes will be extracted. Parameters: Load (pd.Series): timeseries k (int): number of archetypes to identify and extract x (str): This will define how the timeseries will be grouped by. Has to be an accessor of pd.DatetimeIndex y (str): similar to above for y axis. plot_diagnostics (bool): If true a figure is plotted showing an overview of the results Returns: np.ndarray: dimensions (k, len(x)) """ from scipy.cluster.vq import whiten, kmeans, vq df = reshape_timeseries(Load, x=x, y=y, aggfunc='mean').astype(float) df_white = whiten(df) clusters_center, __ = kmeans(df_white, k) clusters_center_dewhitened = clusters_center.T * np.array([df.std(), ] * k ).T if plot_diagnostics: try: import matplotlib.pyplot as plt clusters, _ = vq(df_white, clusters_center) cm = _n_colors_from_colormap(k) ax1 = df.T.plot(legend=False, alpha=.1, color=[cm[i] for i in clusters]) # Add colored cluster centers as lines ax1.set_prop_cycle('color', cm) ax1.plot(clusters_center_dewhitened, linewidth=3, linestyle='--') plt.figure() # FIXME: works only with weekdays day_clusters = pd.DataFrame({y: Load.resample('d').mean().index.weekday, 'clusters': clusters, 'val': 1}) x_labels = "Mon Tue Wed Thu Fri Sat Sun".split() day_clusters.pivot_table(columns=y, index='clusters', aggfunc='count').T.plot.bar(stacked=True) plt.gca().set_xticklabels(x_labels) except Exception: #FIXME: specify exception print ('Works only with daily profile clustering') return clusters_center_dewhitened