Python scipy.cluster.vq.vq() Examples

The following are 22 code examples of scipy.cluster.vq.vq(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.cluster.vq , or try the search function

Example #1

Source File: knn.py From deepnl with GNU General Public License v3.0

7 votes

def Kmeans(file, vocabfile, k):
  np.random.seed((1000,2000))
  whitened = whiten(embeddings)
  codebook, distortion = kmeans(whitened, k)
  clusters = [l2_nearest(embeddings, c, representatives+1) for c in codebook]
  # output
  print(len(codebook), distortion)
  for centroid in codebook:
    print(' '.join([str(x) for x in centroid]))
  print()
  for cluster in clusters:
    print(' '.join([id_word[i] for i, d in cluster]).encode('utf-8'))
  print()
  # assign clusters to words
  codes, _ = vq(embeddings, codebook)
  for w, c in zip(word_id.keys(), codes):
    print(w, c)

Example #2

Source File: test_vq.py From Computable with MIT License

6 votes

def test_vq(self):
        initc = np.concatenate(([[X[0]], [X[1]], [X[2]]]))
        if TESTC:
            label1, dist = _vq.vq(X, initc)
            assert_array_equal(label1, LABEL1)
            tlabel1, tdist = vq(X, initc)
        else:
            print("== not testing C imp of vq ==")

    #def test_py_vq_1d(self):
    #    """Test special rank 1 vq algo, python implementation."""
    #    data = X[:, 0]
    #    initc = data[:3]
    #    a, b = _py_vq_1d(data, initc)
    #    ta, tb = py_vq(data[:, np.newaxis], initc[:, np.newaxis])
    #    assert_array_equal(a, ta)
    #    assert_array_equal(b, tb)

Example #3

Source File: pq.py From nanopq with MIT License

6 votes

def encode(self, vecs):
        """Encode input vectors into PQ-codes.

        Args:
            vecs (np.ndarray): Input vectors with shape=(N, D) and dtype=np.float32.

        Returns:
            np.ndarray: PQ codes with shape=(N, M) and dtype=self.code_dtype

        """
        assert vecs.dtype == np.float32
        assert vecs.ndim == 2
        N, D = vecs.shape
        assert D == self.Ds * self.M, "input dimension must be Ds * M"

        # codes[n][m] : code of n-th vec, m-th subspace
        codes = np.empty((N, self.M), dtype=self.code_dtype)
        for m in range(self.M):
            if self.verbose:
                print("Encoding the subspace: {} / {}".format(m, self.M))
            vecs_sub = vecs[:, m * self.Ds : (m+1) * self.Ds]
            codes[:, m], _ = vq(vecs_sub, self.codewords[m])

        return codes

Example #4

Source File: vq_test.py From Computable with MIT License

6 votes

def python_vq(all_data,code_book):
    import time
    t1 = time.time()
    codes1,dist1 = vq.vq(all_data,code_book)
    t2 = time.time()
    #print 'fast (double):', t2 - t1
    #print '  first codes:', codes1[:5]
    #print '  first dist:', dist1[:5]
    #print '  last codes:', codes1[-5:]
    #print '  last dist:', dist1[-5:]
    float_obs = all_data.astype(np.float32)
    float_code = code_book.astype(np.float32)
    t1 = time.time()
    codes1,dist1 = vq.vq(float_obs,float_code)
    t2 = time.time()
    #print 'fast (float):', t2 - t1
    #print '  first codes:', codes1[:5]
    #print '  first dist:', dist1[:5]
    #print '  last codes:', codes1[-5:]
    #print '  last dist:', dist1[-5:]

    return codes1,dist1

Example #5

Source File: clustergeojson.py From open-context-py with GNU General Public License v3.0

6 votes

def cluster_lon_lats(self):
        """Clusters the list of lon_lats into groups """
        np_lon_lats = []
        for lon_lat in self.lon_lats:
            dpoint = np.fromiter(lon_lat, np.dtype('float'))
            np_lon_lats.append(dpoint)
        data = array(np_lon_lats)
        centroids, _ = kmeans(data, self.number_clusters)
        idx, _ = vq(data, centroids)
        self.idx = idx
        self.data = data
        self.centroids = centroids
        # Sort the centroids by lon, then lat
        sc = centroids[centroids[:,1].argsort()]
        sc = sc[sc[:,0].argsort()]
        self.sorted_centroids = sc.tolist()

Example #6

Source File: bow.py From mmfeat with BSD 3-Clause "New" or "Revised" License

5 votes

def quantize(self):
        clusters = range(self.centroids.shape[0] + 1)
        histograms = {}
        for fname in sorted(self.data.keys()):
            if self.data[fname] is None: continue
            idx,_ = vq(self.data[fname], self.centroids)
            histograms[fname], _ = np.histogram(idx, bins=clusters, normed=self.normalize)
        return histograms

Example #7

Source File: noteshrink_module.py From noteshrinker-django with MIT License

5 votes

def apply_palette(img, palette, options):
    '''Apply the pallete to the given image. The first step is to set all
background pixels to the background color; then, nearest-neighbor
matching is used to map each foreground color to the closest one in
the palette.

    '''

    if not options.quiet:
        print('  applying palette...')

    bg_color = palette[0]

    fg_mask = get_fg_mask(bg_color, img, options)

    orig_shape = img.shape

    pixels = img.reshape((-1, 3))
    fg_mask = fg_mask.flatten()

    num_pixels = pixels.shape[0]

    labels = np.zeros(num_pixels, dtype=np.uint8)

    labels[fg_mask], _ = vq(pixels[fg_mask], palette)

    return labels.reshape(orig_shape[:-1])


######################################################################

Example #8

Source File: clustered_kde.py From kombine with MIT License

5 votes

def __init__(self, data, k=1):
        self._data = data
        self._nclusters = k

        self._mean = np.mean(data, axis=0)
        self._std = np.std(data, axis=0)

        # Cluster data that's mean 0 and scaled to unit width in each parameter independently
        white_data = self._whiten(data)
        self._centroids, _ = kmeans(white_data, k)
        self._assignments, _ = vq(white_data, self.centroids)

        self._kdes = [KDE(self.data[self.assignments == c]) for c in range(k)]
        self._logweights = np.log([np.count_nonzero(self.assignments == c)/self.size
                                   for c in range(k)])

Example #9

Source File: make_bow_vector.py From KTH-Action-Recognition with MIT License

5 votes

def make_bow(dataset, clusters, tfidf):
    print("Make bow vector for each frame")

    # Count total number of frames.
    n_frames = 0
    for video in dataset:
        n_frames += len(video["features"])

    # Init bow vectors for all frames.
    bow = np.zeros((n_frames, clusters.shape[0]), dtype=np.float)

    # Make bow vectors for all frames.
    frame_index = 0
    for video in dataset:
        for frame in video["features"]:
            visual_word_ids = vq(frame, clusters)[0]
            for word_id in visual_word_ids:
                bow[frame_index, word_id] += 1
            frame_index += 1

    # Check whether to use TF-IDF weighting.
    if tfidf:
        print("Applying TF-IDF weighting")
        freq = np.sum((bow > 0) * 1, axis = 0)
        idf = np.log((n_frames + 1) / (freq + 1))
        bow = bow * idf

    # Replace features in dataset with the bow vector we've computed.
    frame_index = 0
    for i in range(len(dataset)):
        features = []
        for frame in dataset[i]["features"]:
            features.append(bow[frame_index])
            frame_index += 1

        dataset[i]["features"] = features

        if (i + 1) % 50 == 0:
            print("Processed %d/%d videos" % (i + 1, len(dataset)))

    return dataset

Example #10

Source File: make_bow_vector.py From KTH-Action-Recognition with MIT License

5 votes

def make_bow(dataset, clusters, tfidf):
    print("Make bow vector for each frame")

    n_videos = len(dataset)

    bow = np.zeros((n_videos, clusters.shape[0]), dtype=np.float)

    # Make bow vectors for all videos.
    video_index = 0
    for video in dataset:
        visual_word_ids = vq(video["features"], clusters)[0]
        for word_id in visual_word_ids:
            bow[video_index, word_id] += 1
        video_index += 1

    # Check whether to use TF-IDF weighting.
    if tfidf:
        print("Applying TF-IDF weighting")
        freq = np.sum((bow > 0) * 1, axis = 0)
        idf = np.log((n_videos + 1) / (freq + 1))
        bow = bow * idf

    # Replace features in dataset with the bow vector we've computed.
    video_index = 0
    for i in range(len(dataset)):

        dataset[i]["features"] = bow[video_index]
        video_index += 1

        if (i + 1) % 50 == 0:
            print("Processed %d/%d videos" % (i + 1, len(dataset)))

    return dataset

Example #11

Source File: bow.py From mmfeat with BSD 3-Clause "New" or "Revised" License

5 votes

def sequences(self):
        sequences = {}
        for fname in sorted(self.data.keys()):
            if self.data[fname] is None: continue
            idx,_ = vq(self.data[fname], self.centroids)
            sequences[fname] = idx
        return sequences

Example #12

Source File: xmeans.py From msaf with MIT License

5 votes

def test_kmeans(K=5):
    """Test k-means with the synthetic data."""
    X = XMeans.generate_2d_data(K=4)
    wX = vq.whiten(X)
    dic, dist = vq.kmeans(wX, K, iter=100)

    plt.scatter(wX[:, 0], wX[:, 1])
    plt.scatter(dic[:, 0], dic[:, 1], color="m")
    plt.show()

Example #13

Source File: xmeans.py From msaf with MIT License

5 votes

def compute_bic(self, D, means, labels, K, R):
        """Computes the Bayesian Information Criterion."""
        D = vq.whiten(D)
        Rn = D.shape[0]
        M = D.shape[1]

        if R == K:
            return 1

        # Maximum likelihood estimate (MLE)
        mle_var = 0
        for k in range(len(means)):
            X = D[np.argwhere(labels == k)]
            X = X.reshape((X.shape[0], X.shape[-1]))
            for x in X:
                mle_var += distance.euclidean(x, means[k])
                #print x, means[k], mle_var
        mle_var /= float(R - K)

        # Log-likelihood of the data
        l_D = - Rn/2. * np.log(2*np.pi) - (Rn * M)/2. * np.log(mle_var) - \
            (Rn - K) / 2. + Rn * np.log(Rn) - Rn * np.log(R)

        # Params of BIC
        p = (K-1) + M * K + mle_var

        #print "BIC:", l_D, p, R, K

        # Return the bic
        return l_D - p / 2. * np.log(R)

Example #14

Source File: xmeans.py From msaf with MIT License

5 votes

def run_kmeans(self, X, K):
        """Runs k-means and returns the labels assigned to the data."""
        wX = vq.whiten(X)
        means, dist = vq.kmeans(wX, K, iter=100)
        labels, dist = vq.vq(wX, means)
        return means, labels

Example #15

Source File: test_vq.py From Computable with MIT License

5 votes

def test__vq_sametype(self):
        if TESTC:
            a = np.array([1, 2])
            b = a.astype(float)
            assert_raises(ValueError, _vq.vq, a, b)

Example #16

Source File: test_vq.py From Computable with MIT License

5 votes

def test_vq_1d(self):
        """Test special rank 1 vq algo, python implementation."""
        data = X[:, 0]
        initc = data[:3]
        if TESTC:
            a, b = _vq.vq(data, initc)
            ta, tb = py_vq(data[:, np.newaxis], initc[:, np.newaxis])
            assert_array_equal(a, ta)
            assert_array_equal(b, tb)
        else:
            print("== not testing C imp of vq (rank 1) ==")

Example #17

Source File: noteshrink.py From noteshrink with MIT License

5 votes

def apply_palette(img, palette, options):

    '''Apply the pallete to the given image. The first step is to set all
background pixels to the background color; then, nearest-neighbor
matching is used to map each foreground color to the closest one in
the palette.

    '''

    if not options.quiet:
        print('  applying palette...')

    bg_color = palette[0]

    fg_mask = get_fg_mask(bg_color, img, options)

    orig_shape = img.shape

    pixels = img.reshape((-1, 3))
    fg_mask = fg_mask.flatten()

    num_pixels = pixels.shape[0]

    labels = np.zeros(num_pixels, dtype=np.uint8)

    labels[fg_mask], _ = vq(pixels[fg_mask], palette)

    return labels.reshape(orig_shape[:-1])

######################################################################

Example #18

Source File: audio_tools.py From tools with BSD 3-Clause "New" or "Revised" License

4 votes

def run_phase_vq_example():
    def _pre(list_of_data):
        # Temporal window setting is crucial! - 512 seems OK for music, 256
        # fruit perhaps due to samplerates
        n_fft = 256
        step = 32
        f_r = np.vstack([np.abs(stft(dd, n_fft, step=step, real=False,
                                compute_onesided=False))
                         for dd in list_of_data])
        return f_r, n_fft, step

    def preprocess_train(list_of_data, random_state):
        f_r, n_fft, step = _pre(list_of_data)
        clusters = copy.deepcopy(f_r)
        return clusters

    def apply_preprocess(list_of_data, clusters):
        f_r, n_fft, step = _pre(list_of_data)
        f_clust = f_r
        # Nondeterministic ?
        memberships, distances = vq(f_clust, clusters)
        vq_r = clusters[memberships]
        d_k = iterate_invert_spectrogram(vq_r, n_fft, step, verbose=True)
        return d_k

    random_state = np.random.RandomState(1999)

    fs, d = fetch_sample_speech_fruit()
    d1 = d[::9]
    d2 = d[7::8][:5]
    # make sure d1 and d2 aren't the same!
    assert [len(di) for di in d1] != [len(di) for di in d2]

    clusters = preprocess_train(d1, random_state)
    fix_d1 = np.concatenate(d1)
    fix_d2 = np.concatenate(d2)
    vq_d2 = apply_preprocess(d2, clusters)

    wavfile.write("phase_train_no_agc.wav", fs, soundsc(fix_d1))
    wavfile.write("phase_vq_test_no_agc.wav", fs, soundsc(vq_d2))

    agc_d1, freq_d1, energy_d1 = time_attack_agc(fix_d1, fs, .5, 5)
    agc_d2, freq_d2, energy_d2 = time_attack_agc(fix_d2, fs, .5, 5)
    agc_vq_d2, freq_vq_d2, energy_vq_d2 = time_attack_agc(vq_d2, fs, .5, 5)

    """
    import matplotlib.pyplot as plt
    plt.specgram(agc_vq_d2, cmap="gray")
    #plt.title("Fake")
    plt.figure()
    plt.specgram(agc_d2, cmap="gray")
    #plt.title("Real")
    plt.show()
    """

    wavfile.write("phase_train_agc.wav", fs, soundsc(agc_d1))
    wavfile.write("phase_test_agc.wav", fs, soundsc(agc_d2))
    wavfile.write("phase_vq_test_agc.wav", fs, soundsc(agc_vq_d2))

Example #19

Source File: audio_tools.py From dagbldr with BSD 3-Clause "New" or "Revised" License

4 votes

def run_phase_vq_example():
    def _pre(list_of_data):
        # Temporal window setting is crucial! - 512 seems OK for music, 256
        # fruit perhaps due to samplerates
        n_fft = 256
        step = 32
        f_r = np.vstack([np.abs(stft(dd, n_fft, step=step, real=False,
                                compute_onesided=False))
                         for dd in list_of_data])
        return f_r, n_fft, step

    def preprocess_train(list_of_data, random_state):
        f_r, n_fft, step = _pre(list_of_data)
        clusters = copy.deepcopy(f_r)
        return clusters

    def apply_preprocess(list_of_data, clusters):
        f_r, n_fft, step = _pre(list_of_data)
        f_clust = f_r
        # Nondeterministic ?
        memberships, distances = vq(f_clust, clusters)
        vq_r = clusters[memberships]
        d_k = iterate_invert_spectrogram(vq_r, n_fft, step, verbose=True)
        return d_k

    random_state = np.random.RandomState(1999)

    fs, d = fetch_sample_speech_fruit()
    d1 = d[::9]
    d2 = d[7::8][:5]
    # make sure d1 and d2 aren't the same!
    assert [len(di) for di in d1] != [len(di) for di in d2]

    clusters = preprocess_train(d1, random_state)
    fix_d1 = np.concatenate(d1)
    fix_d2 = np.concatenate(d2)
    vq_d2 = apply_preprocess(d2, clusters)

    wavfile.write("phase_train_no_agc.wav", fs, soundsc(fix_d1))
    wavfile.write("phase_vq_test_no_agc.wav", fs, soundsc(vq_d2))

    agc_d1, freq_d1, energy_d1 = time_attack_agc(fix_d1, fs, .5, 5)
    agc_d2, freq_d2, energy_d2 = time_attack_agc(fix_d2, fs, .5, 5)
    agc_vq_d2, freq_vq_d2, energy_vq_d2 = time_attack_agc(vq_d2, fs, .5, 5)

    """
    import matplotlib.pyplot as plt
    plt.specgram(agc_vq_d2, cmap="gray")
    #plt.title("Fake")
    plt.figure()
    plt.specgram(agc_d2, cmap="gray")
    #plt.title("Real")
    plt.show()
    """

    wavfile.write("phase_train_agc.wav", fs, soundsc(agc_d1))
    wavfile.write("phase_test_agc.wav", fs, soundsc(agc_d2))
    wavfile.write("phase_vq_test_agc.wav", fs, soundsc(agc_vq_d2))

Example #20

Source File: analysis.py From enlopy with BSD 3-Clause "New" or "Revised" License

4 votes

def get_load_archetypes(Load, k=2, x='hour', y='dayofyear', plot_diagnostics=False):
    """Extract typical load profiles using k-means and vector quantization. the time scale of archetypes depend on the selected dimensions (x,y).
    For the default values daily archetypes will be extracted.

    Parameters:
        Load (pd.Series): timeseries
        k (int): number of archetypes to identify and extract
        x (str): This will define how the timeseries will be grouped by. Has to be an accessor of pd.DatetimeIndex
        y (str): similar to above for y axis.
        plot_diagnostics (bool): If true a figure is plotted showing an overview of the results
    Returns:
        np.ndarray: dimensions (k, len(x))
    """
    from scipy.cluster.vq import whiten, kmeans, vq

    df = reshape_timeseries(Load, x=x, y=y, aggfunc='mean').astype(float)
    df_white = whiten(df)
    clusters_center, __ = kmeans(df_white, k)
    clusters_center_dewhitened = clusters_center.T * np.array([df.std(), ] * k ).T

    if plot_diagnostics:
        try:
            import matplotlib.pyplot as plt
            clusters, _ = vq(df_white, clusters_center)
            cm = _n_colors_from_colormap(k)
            ax1 = df.T.plot(legend=False, alpha=.1,
                            color=[cm[i] for i in clusters])
            # Add colored cluster centers as lines
            ax1.set_prop_cycle('color', cm)
            ax1.plot(clusters_center_dewhitened, linewidth=3, linestyle='--')
            plt.figure()  # FIXME: works only with weekdays
            day_clusters = pd.DataFrame({y: Load.resample('d').mean().index.weekday,
                                         'clusters': clusters,
                                         'val': 1})
            x_labels = "Mon Tue Wed Thu Fri Sat Sun".split()
            day_clusters.pivot_table(columns=y, index='clusters',
                                     aggfunc='count').T.plot.bar(stacked=True)
            plt.gca().set_xticklabels(x_labels)
        except Exception: #FIXME: specify exception
            print ('Works only with daily profile clustering')

    return clusters_center_dewhitened

Example #21

Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License

4 votes

def run_phase_vq_example():
    def _pre(list_of_data):
        # Temporal window setting is crucial! - 512 seems OK for music, 256
        # fruit perhaps due to samplerates
        n_fft = 256
        step = 32
        f_r = np.vstack([np.abs(stft(dd, fftsize=n_fft, step=step, real=False,
                                compute_onesided=False))
                         for dd in list_of_data])
        return f_r, n_fft, step

    def preprocess_train(list_of_data, random_state):
        f_r, n_fft, step = _pre(list_of_data)
        clusters = copy.deepcopy(f_r)
        return clusters

    def apply_preprocess(list_of_data, clusters):
        f_r, n_fft, step = _pre(list_of_data)
        f_clust = f_r
        # Nondeterministic ?
        memberships, distances = vq(f_clust, clusters)
        vq_r = clusters[memberships]
        d_k = iterate_invert_spectrogram(vq_r, n_fft, step, verbose=True)
        return d_k

    random_state = np.random.RandomState(1999)

    fs, d = fetch_sample_speech_fruit()
    d1 = d[::9]
    d2 = d[7::8][:5]
    # make sure d1 and d2 aren't the same!
    assert [len(di) for di in d1] != [len(di) for di in d2]

    clusters = preprocess_train(d1, random_state)
    fix_d1 = np.concatenate(d1)
    fix_d2 = np.concatenate(d2)
    vq_d2 = apply_preprocess(d2, clusters)

    wavfile.write("phase_train_no_agc.wav", fs, soundsc(fix_d1))
    wavfile.write("phase_vq_test_no_agc.wav", fs, soundsc(vq_d2))

    agc_d1, freq_d1, energy_d1 = time_attack_agc(fix_d1, fs, .5, 5)
    agc_d2, freq_d2, energy_d2 = time_attack_agc(fix_d2, fs, .5, 5)
    agc_vq_d2, freq_vq_d2, energy_vq_d2 = time_attack_agc(vq_d2, fs, .5, 5)

    """
    import matplotlib.pyplot as plt
    plt.specgram(agc_vq_d2, cmap="gray")
    #plt.title("Fake")
    plt.figure()
    plt.specgram(agc_d2, cmap="gray")
    #plt.title("Real")
    plt.show()
    """

    wavfile.write("phase_train_agc.wav", fs, soundsc(agc_d1))
    wavfile.write("phase_test_agc.wav", fs, soundsc(agc_d2))
    wavfile.write("phase_vq_test_agc.wav", fs, soundsc(agc_vq_d2))

Example #22

Source File: audio_tools.py From representation_mixing with BSD 3-Clause "New" or "Revised" License

4 votes

def run_phase_vq_example():
    def _pre(list_of_data):
        # Temporal window setting is crucial! - 512 seems OK for music, 256
        # fruit perhaps due to samplerates
        n_fft = 256
        step = 32
        f_r = np.vstack([np.abs(stft(dd, fftsize=n_fft, step=step, real=False,
                                compute_onesided=False))
                         for dd in list_of_data])
        return f_r, n_fft, step

    def preprocess_train(list_of_data, random_state):
        f_r, n_fft, step = _pre(list_of_data)
        clusters = copy.deepcopy(f_r)
        return clusters

    def apply_preprocess(list_of_data, clusters):
        f_r, n_fft, step = _pre(list_of_data)
        f_clust = f_r
        # Nondeterministic ?
        memberships, distances = vq(f_clust, clusters)
        vq_r = clusters[memberships]
        d_k = iterate_invert_spectrogram(vq_r, n_fft, step, verbose=True)
        return d_k

    random_state = np.random.RandomState(1999)

    fs, d = fetch_sample_speech_fruit()
    d1 = d[::9]
    d2 = d[7::8][:5]
    # make sure d1 and d2 aren't the same!
    assert [len(di) for di in d1] != [len(di) for di in d2]

    clusters = preprocess_train(d1, random_state)
    fix_d1 = np.concatenate(d1)
    fix_d2 = np.concatenate(d2)
    vq_d2 = apply_preprocess(d2, clusters)

    wavfile.write("phase_train_no_agc.wav", fs, soundsc(fix_d1))
    wavfile.write("phase_vq_test_no_agc.wav", fs, soundsc(vq_d2))

    agc_d1, freq_d1, energy_d1 = time_attack_agc(fix_d1, fs, .5, 5)
    agc_d2, freq_d2, energy_d2 = time_attack_agc(fix_d2, fs, .5, 5)
    agc_vq_d2, freq_vq_d2, energy_vq_d2 = time_attack_agc(vq_d2, fs, .5, 5)

    """
    import matplotlib.pyplot as plt
    plt.specgram(agc_vq_d2, cmap="gray")
    #plt.title("Fake")
    plt.figure()
    plt.specgram(agc_d2, cmap="gray")
    #plt.title("Real")
    plt.show()
    """

    wavfile.write("phase_train_agc.wav", fs, soundsc(agc_d1))
    wavfile.write("phase_test_agc.wav", fs, soundsc(agc_d2))
    wavfile.write("phase_vq_test_agc.wav", fs, soundsc(agc_vq_d2))