Python scipy.cluster.vq.kmeans() Examples

The following are 20 code examples of scipy.cluster.vq.kmeans(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.cluster.vq , or try the search function .
Example #1
Source File: knn.py    From deepnl with GNU General Public License v3.0 7 votes vote down vote up
def Kmeans(file, vocabfile, k):
  np.random.seed((1000,2000))
  whitened = whiten(embeddings)
  codebook, distortion = kmeans(whitened, k)
  clusters = [l2_nearest(embeddings, c, representatives+1) for c in codebook]
  # output
  print(len(codebook), distortion)
  for centroid in codebook:
    print(' '.join([str(x) for x in centroid]))
  print()
  for cluster in clusters:
    print(' '.join([id_word[i] for i, d in cluster]).encode('utf-8'))
  print()
  # assign clusters to words
  codes, _ = vq(embeddings, codebook)
  for w, c in zip(word_id.keys(), codes):
    print(w, c) 
Example #2
Source File: cmag.py    From neuropythy with GNU Affero General Public License v3.0 6 votes vote down vote up
def sigma_bin_walls(sigma, bins):
        import scipy, scipy.cluster, scipy.cluster.vq as vq
        std = np.std(sigma)
        if np.isclose(std, 0): return pimms.imm_array([0, np.max(sigma)])
        cl = sorted(std * vq.kmeans(sigma/std, bins)[0])
        cl = np.mean([cl[:-1],cl[1:]], axis=0)
        return pimms.imm_array(np.concatenate(([0], cl, [np.max(sigma)]))) 
Example #3
Source File: test_vq.py    From Computable with MIT License 6 votes vote down vote up
def test_large_features(self):
        # Generate a data set with large values, and run kmeans on it to
        # (regression for 1077).
        d = 300
        n = 100

        m1 = np.random.randn(d)
        m2 = np.random.randn(d)
        x = 10000 * np.random.randn(n, d) - 20000 * m1
        y = 10000 * np.random.randn(n, d) + 20000 * m2

        data = np.empty((x.shape[0] + y.shape[0], d), np.double)
        data[:x.shape[0]] = x
        data[x.shape[0]:] = y

        kmeans(data, 2) 
Example #4
Source File: test_vq.py    From Computable with MIT License 6 votes vote down vote up
def test_kmeans_lost_cluster(self):
        """This will cause kmean to have a cluster with no points."""
        data = np.fromfile(DATAFILE1, sep=", ")
        data = data.reshape((200, 2))
        initk = np.array([[-1.8127404, -0.67128041],
                         [2.04621601, 0.07401111],
                         [-2.31149087,-0.05160469]])

        res = kmeans(data, initk)

        warn_ctx = WarningManager()
        warn_ctx.__enter__()
        try:
            warnings.simplefilter('ignore', UserWarning)
            res = kmeans2(data, initk, missing='warn')
        finally:
            warn_ctx.__exit__()

        assert_raises(ClusterError, kmeans2, data, initk, missing='raise') 
Example #5
Source File: clustering.py    From Load-Forecasting with MIT License 6 votes vote down vote up
def kMeansClustering(x,k):

    # Convert list into numpy format
    conv = np.asarray(x)

    # Compute the centroids
    centroids = kmeans(conv,k,iter=10)[0]

    # Relabel the x's
    labels = []
    for y in range(len(x)):
        minDist = float('inf')
        minLabel = -1
        for z in range(len(centroids)):
            e = euclidean(conv[y],centroids[z])
            if (e < minDist):
                minDist = e
                minLabel = z
        labels.append(minLabel)

    # Return the list of centroids and labels
    return (centroids,labels)

# Performs a weighted clustering on the examples in xTest
# Returns a 1-d vector of predictions 
Example #6
Source File: cluster.py    From deephar with MIT License 6 votes vote down vote up
def mean_on_most_assigned(x, c):
    nb_c = len(c)
    assign = np.zeros(nb_c)
    mean = np.zeros(c.shape)
    for i in range(len(x)):
        y = x[i].reshape((1,2))
        d = np.sqrt(np.sum(np.power(y.repeat(nb_c, axis=0) - c, 2), axis=1))
        idx = d.argmin()
        assign[idx] += 1
        mean[idx,:] += x[i]
    idx = assign.argmax()
    return mean[idx,:] / assign[idx]

# def best_kmeans(pred):
    # plt.scatter(pred[:,0], pred[:,1], color='b')
    # c,v = kmeans(pred, 3)
    # plt.scatter(c[:,0], c[:,1], color='g')
    # n = most_assigned(pred, c)
    # plt.scatter(c[n,0], c[n,1], color='r')
    # plt.show() 
Example #7
Source File: colorz.py    From colorz with MIT License 6 votes vote down vote up
def colorz(fd, n=DEFAULT_NUM_COLORS, min_v=DEFAULT_MINV, max_v=DEFAULT_MAXV,
           bold_add=DEFAULT_BOLD_ADD, order_colors=True):
    """
    Get the n most dominant colors of an image.
    Clamps value to between min_v and max_v.

    Creates bold colors using bold_add.
    Total number of colors returned is 2*n, optionally ordered by hue.
    Returns as a list of pairs of RGB triples.

    For terminal colors, the hue order is:
    red, yellow, green, cyan, blue, magenta
    """
    img = Image.open(fd)
    img.thumbnail(THUMB_SIZE)

    obs = get_colors(img)
    clamped = [clamp(color, min_v, max_v) for color in obs]
    clusters, _ = kmeans(array(clamped).astype(float), n)
    colors = order_by_hue(clusters) if order_colors else clusters
    return list(zip(colors, [brighten(c, bold_add) for c in colors])) 
Example #8
Source File: ckmean.py    From TextDetector with GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, data, kk):
		# Convolutional K-means 
		# INPUT:
		# data: matrix each column is a sample vector
		# kk: number of total clusters
		# ii: number of iterations for kmeans training
		# OUTPUT:
		# D: matrix containing center vectors in columns"""

		print('starting kmeans quatization...(.py file is used)')
		# Initialization of D by randomly pick from training data
		col_idx = random.sample(range(0, len(data)), kk)
		D = data[col_idx, :]
		D = self.colnorm(D)
		self.data = data
		self.kk = kk
		self.D = D 
Example #9
Source File: clustergeojson.py    From open-context-py with GNU General Public License v3.0 6 votes vote down vote up
def cluster_lon_lats(self):
        """Clusters the list of lon_lats into groups """
        np_lon_lats = []
        for lon_lat in self.lon_lats:
            dpoint = np.fromiter(lon_lat, np.dtype('float'))
            np_lon_lats.append(dpoint)
        data = array(np_lon_lats)
        centroids, _ = kmeans(data, self.number_clusters)
        idx, _ = vq(data, centroids)
        self.idx = idx
        self.data = data
        self.centroids = centroids
        # Sort the centroids by lon, then lat
        sc = centroids[centroids[:,1].argsort()]
        sc = sc[sc[:,0].argsort()]
        self.sorted_centroids = sc.tolist() 
Example #10
Source File: noteshrink_module.py    From noteshrinker-django with MIT License 5 votes vote down vote up
def get_palette(samples, options, return_mask=False, kmeans_iter=40):
    '''Extract the palette for the set of sampled RGB values. The first
palette entry is always the background color; the rest are determined
from foreground pixels by running K-means clustering. Returns the
palette, as well as a mask corresponding to the foreground pixels.

    '''

    if not options.quiet:
        print('  getting palette...')

    bg_color = get_bg_color(samples, 6)

    fg_mask = get_fg_mask(bg_color, samples, options)

    centers, _ = kmeans(samples[fg_mask].astype(np.float32),
                        options.num_colors - 1,
                        iter=kmeans_iter)

    palette = np.vstack((bg_color, centers)).astype(np.uint8)

    if not return_mask:
        return palette
    else:
        return palette, fg_mask


###################################################################### 
Example #11
Source File: clustered_kde.py    From kombine with MIT License 5 votes vote down vote up
def __init__(self, data, k=1):
        self._data = data
        self._nclusters = k

        self._mean = np.mean(data, axis=0)
        self._std = np.std(data, axis=0)

        # Cluster data that's mean 0 and scaled to unit width in each parameter independently
        white_data = self._whiten(data)
        self._centroids, _ = kmeans(white_data, k)
        self._assignments, _ = vq(white_data, self.centroids)

        self._kdes = [KDE(self.data[self.assignments == c]) for c in range(k)]
        self._logweights = np.log([np.count_nonzero(self.assignments == c)/self.size
                                   for c in range(k)]) 
Example #12
Source File: cluster.py    From deephar with MIT License 5 votes vote down vote up
def clustering_joints(y_pred, k=3):
    _,nb_spl,nb_joints,dim = y_pred.shape
    y = np.zeros((nb_spl, nb_joints, dim))
    for s in range(nb_spl):
        for j in range(nb_joints):
            d = y_pred[:,s,j]
            c,v = kmeans(d, k)
            n = most_assigned(d, c)
            y[s,j,:] = c[n]
    return y 
Example #13
Source File: color.py    From dvt with GNU General Public License v2.0 5 votes vote down vote up
def _get_dominant(img, num_dominant):
    img_flat = img.reshape(-1, 3).astype(float32)

    # increasing iter would give 'better' clustering, at the cost of speed
    dominant_colors, _ = kmeans(img_flat, num_dominant, iter=5)
    #kmeans_code = vq(img_flat, dominant_colors)

    if dominant_colors.shape[0] != num_dominant:         # pragma: no cover
        diff = num_dominant - dominant_colors.shape[0]
        dominant_colors = vstack([
            dominant_colors,
            zeros((diff, dominant_colors.shape[1]))
        ])

    return dominant_colors.astype(uint8) 
Example #14
Source File: SPGP.py    From kusanagi with MIT License 5 votes vote down vote up
def init_pseudo_inputs(self):
        msg = "Dataset must have more than n_inducing [ %n ] to enable"
        msg += " inference with sparse pseudo inputs"
        assert self.N >= self.n_inducing, msg % (self.n_inducing)
        self.should_recompile = True
        # pick initial cluster centers from dataset
        X = self.X.get_value()
        X_sp_ = utils.kmeanspp(X, self.n_inducing)

        # perform kmeans to get initial cluster centers
        utils.print_with_stamp('Initialising pseudo inputs', self.name)
        X_sp_, dist = kmeans(X, X_sp_, iter=200, thresh=1e-9)
        # initialize symbolic tensor variable if necessary
        # (this will create the self.X_sp atttribute)
        self.set_params({'X_sp': X_sp_}) 
Example #15
Source File: xmeans.py    From msaf with MIT License 5 votes vote down vote up
def test_kmeans(K=5):
    """Test k-means with the synthetic data."""
    X = XMeans.generate_2d_data(K=4)
    wX = vq.whiten(X)
    dic, dist = vq.kmeans(wX, K, iter=100)

    plt.scatter(wX[:, 0], wX[:, 1])
    plt.scatter(dic[:, 0], dic[:, 1], color="m")
    plt.show() 
Example #16
Source File: xmeans.py    From msaf with MIT License 5 votes vote down vote up
def run_kmeans(self, X, K):
        """Runs k-means and returns the labels assigned to the data."""
        wX = vq.whiten(X)
        means, dist = vq.kmeans(wX, K, iter=100)
        labels, dist = vq.vq(wX, means)
        return means, labels 
Example #17
Source File: test_vq.py    From Computable with MIT License 5 votes vote down vote up
def test_kmeans_0k(self):
        """Regression test for #546: fail when k arg is 0."""
        assert_raises(ValueError, kmeans, X, 0)
        assert_raises(ValueError, kmeans2, X, 0)
        assert_raises(ValueError, kmeans2, X, np.array([])) 
Example #18
Source File: test_vq.py    From Computable with MIT License 5 votes vote down vote up
def test_kmeans_simple(self):
        initc = np.concatenate(([[X[0]], [X[1]], [X[2]]]))
        code = initc.copy()
        code1 = kmeans(X, code, iter=1)[0]

        assert_array_almost_equal(code1, CODET2) 
Example #19
Source File: noteshrink.py    From noteshrink with MIT License 5 votes vote down vote up
def get_palette(samples, options, return_mask=False, kmeans_iter=40):

    '''Extract the palette for the set of sampled RGB values. The first
palette entry is always the background color; the rest are determined
from foreground pixels by running K-means clustering. Returns the
palette, as well as a mask corresponding to the foreground pixels.

    '''

    if not options.quiet:
        print('  getting palette...')

    bg_color = get_bg_color(samples, 6)

    fg_mask = get_fg_mask(bg_color, samples, options)

    centers, _ = kmeans(samples[fg_mask].astype(np.float32),
                        options.num_colors-1,
                        iter=kmeans_iter)

    palette = np.vstack((bg_color, centers)).astype(np.uint8)

    if not return_mask:
        return palette
    else:
        return palette, fg_mask

###################################################################### 
Example #20
Source File: analysis.py    From enlopy with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def get_load_archetypes(Load, k=2, x='hour', y='dayofyear', plot_diagnostics=False):
    """Extract typical load profiles using k-means and vector quantization. the time scale of archetypes depend on the selected dimensions (x,y).
    For the default values daily archetypes will be extracted.

    Parameters:
        Load (pd.Series): timeseries
        k (int): number of archetypes to identify and extract
        x (str): This will define how the timeseries will be grouped by. Has to be an accessor of pd.DatetimeIndex
        y (str): similar to above for y axis.
        plot_diagnostics (bool): If true a figure is plotted showing an overview of the results
    Returns:
        np.ndarray: dimensions (k, len(x))
    """
    from scipy.cluster.vq import whiten, kmeans, vq

    df = reshape_timeseries(Load, x=x, y=y, aggfunc='mean').astype(float)
    df_white = whiten(df)
    clusters_center, __ = kmeans(df_white, k)
    clusters_center_dewhitened = clusters_center.T * np.array([df.std(), ] * k ).T

    if plot_diagnostics:
        try:
            import matplotlib.pyplot as plt
            clusters, _ = vq(df_white, clusters_center)
            cm = _n_colors_from_colormap(k)
            ax1 = df.T.plot(legend=False, alpha=.1,
                            color=[cm[i] for i in clusters])
            # Add colored cluster centers as lines
            ax1.set_prop_cycle('color', cm)
            ax1.plot(clusters_center_dewhitened, linewidth=3, linestyle='--')
            plt.figure()  # FIXME: works only with weekdays
            day_clusters = pd.DataFrame({y: Load.resample('d').mean().index.weekday,
                                         'clusters': clusters,
                                         'val': 1})
            x_labels = "Mon Tue Wed Thu Fri Sat Sun".split()
            day_clusters.pivot_table(columns=y, index='clusters',
                                     aggfunc='count').T.plot.bar(stacked=True)
            plt.gca().set_xticklabels(x_labels)
        except Exception: #FIXME: specify exception
            print ('Works only with daily profile clustering')

    return clusters_center_dewhitened