Python scipy.fftpack.dct() Examples

The following are 30 code examples of scipy.fftpack.dct(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.fftpack , or try the search function .
Example #1
Source File: audio.py    From representation_mixing with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def run_fft_dct_example():
    random_state = np.random.RandomState(1999)

    fs, d = fetch_sample_speech_fruit()
    n_fft = 64
    X = d[0]
    X_stft = stft(X, fftsize=n_fft)
    X_rr = complex_to_real_view(X_stft)
    X_dct = fftpack.dct(X_rr, axis=-1, norm='ortho')
    X_dct_sub = X_dct[1:] - X_dct[:-1]
    std = X_dct_sub.std(axis=0, keepdims=True)
    X_dct_sub += .01 * std * random_state.randn(
        X_dct_sub.shape[0], X_dct_sub.shape[1])
    X_dct_unsub = np.cumsum(X_dct_sub, axis=0)
    X_idct = fftpack.idct(X_dct_unsub, axis=-1, norm='ortho')
    X_irr = real_to_complex_view(X_idct)
    X_r = istft(X_irr, n_fft)[:len(X)]

    SNR = 20 * np.log10(np.linalg.norm(X - X_r) / np.linalg.norm(X))
    print(SNR)

    wavfile.write("fftdct_orig.wav", fs, soundsc(X))
    wavfile.write("fftdct_rec.wav", fs, soundsc(X_r)) 
Example #2
Source File: utils.py    From simple-blackbox-attack with MIT License 6 votes vote down vote up
def block_dct(x, block_size=8, masked=False, ratio=0.5):
    z = torch.zeros(x.size())
    num_blocks = int(x.size(2) / block_size)
    mask = np.zeros((x.size(0), x.size(1), block_size, block_size))
    mask[:, :, :int(block_size * ratio), :int(block_size * ratio)] = 1
    for i in range(num_blocks):
        for j in range(num_blocks):
            submat = x[:, :, (i * block_size):((i + 1) * block_size), (j * block_size):((j + 1) * block_size)].numpy()
            submat_dct = dct(dct(submat, axis=2, norm='ortho'), axis=3, norm='ortho')
            if masked:
                submat_dct = submat_dct * mask
            submat_dct = torch.from_numpy(submat_dct)
            z[:, :, (i * block_size):((i + 1) * block_size), (j * block_size):((j + 1) * block_size)] = submat_dct
    return z


# applies IDCT to each block of size block_size 
Example #3
Source File: audio_tools.py    From tools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def run_fft_dct_example():
    random_state = np.random.RandomState(1999)

    fs, d = fetch_sample_speech_fruit()
    n_fft = 64
    X = d[0]
    X_stft = stft(X, n_fft)
    X_rr = complex_to_real_view(X_stft)
    X_dct = fftpack.dct(X_rr, axis=-1, norm='ortho')
    X_dct_sub = X_dct[1:] - X_dct[:-1]
    std = X_dct_sub.std(axis=0, keepdims=True)
    X_dct_sub += .01 * std * random_state.randn(
        X_dct_sub.shape[0], X_dct_sub.shape[1])
    X_dct_unsub = np.cumsum(X_dct_sub, axis=0)
    X_idct = fftpack.idct(X_dct_unsub, axis=-1, norm='ortho')
    X_irr = real_to_complex_view(X_idct)
    X_r = istft(X_irr, n_fft)[:len(X)]

    SNR = 20 * np.log10(np.linalg.norm(X - X_r) / np.linalg.norm(X))
    print(SNR)

    wavfile.write("fftdct_orig.wav", fs, soundsc(X))
    wavfile.write("fftdct_rec.wav", fs, soundsc(X_r)) 
Example #4
Source File: signal.py    From pyGSTi with Apache License 2.0 6 votes vote down vote up
def amplitudes_at_frequencies(freqInds, timeseries, times=None, transform='dct'):
    """
    Finds the amplitudes in the data at the specified frequency indices.
    Todo: better docstring. Currently only works for the DCT.
    """
    amplitudes = {}
    for o in timeseries.keys():

        if transform == 'dct':
            temp = _dct(timeseries[o], norm='ortho')[freqInds] / _np.sqrt(len(timeseries[o]) / 2)
            if 0. in freqInds:
                temp[0] = temp[0] / _np.sqrt(2)
            amplitudes[o] = list(temp)

        else:
            raise NotImplementedError("This function only currently works for the DCT!")

    return amplitudes 
Example #5
Source File: parameters.py    From entropica_qaoa with Apache License 2.0 6 votes vote down vote up
def plot(self, ax=None, **kwargs):
        warnings.warn("Plotting the gammas and x_rotation_angles through DCT "
                      "and DST. If you are interested in v, u_singles and "
                      "u_pairs you can access them via params.v, "
                      "params.u_singles, params.u_pairs")
        if ax is None:
            fig, ax = plt.subplots()

        ax.plot(dct(self.v, n=self.n_steps),
                label="betas", marker="s", ls="", **kwargs)
        if not _is_iterable_empty(self.u_singles):
            ax.plot(dst(self.u_singles, n=self.n_steps),
                    label="gammas_singles", marker="^", ls="", **kwargs)
        if not _is_iterable_empty(self.u_pairs):
            ax.plot(dst(self.u_pairs, n=self.n_steps),
                    label="gammas_pairs", marker="v", ls="", **kwargs)
        ax.set_xlabel("timestep")
        ax.legend() 
Example #6
Source File: _parameter_conversions.py    From entropica_qaoa with Apache License 2.0 6 votes vote down vote up
def fourier_extended_to_extended(
        params: FourierExtendedParams) -> ExtendedParams:
    out = deepcopy(params)
    out.__class__ = ExtendedParams
    out.betas = dct(params.v, n=out.n_steps, axis=0)
    out.gammas_singles = dst(params.u_singles, n=out.n_steps, axis=0)
    out.gammas_pairs = dst(params.u_pairs, n=out.n_steps, axis=0)

    # and clean up
    del out.__u_singles
    del out.__u_pairs
    del out.__v
    del out.q

    return out


# #############################################################################
# And now all the possible compositions as well:
# Todo: Create this code automatically by traversing the tree?
# ############################################################################# 
Example #7
Source File: base.py    From aenet with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True):
    """Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)    
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)    
    :param numcep: the number of cepstrum to return, default 13    
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22. 
    :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
    """            
    feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph)
    feat = numpy.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
    feat = lifter(feat,ceplifter)
    if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat 
Example #8
Source File: base.py    From aenet with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def mfccVTLP(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True, alpha=1.0):
    """Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
    :param numcep: the number of cepstrum to return, default 13
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
    :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
    """
    feat,energy = fbankVTLP(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph,False,alpha)
    feat = numpy.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
    feat = lifter(feat,ceplifter)
    if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat 
Example #9
Source File: qmf_realtime_class.py    From ASP with GNU General Public License v3.0 6 votes vote down vote up
def DCT4(samples):
    """
        Method to create DCT4 transformation using DCT3

        Arguments   :

            samples : (1D Array) Input samples to be transformed

        Returns     :

            y       :  (1D Array) Transformed output samples

    """

    # Initialize
    samplesup=np.zeros(2*N, dtype = np.float32)
    # Upsample signal:
    samplesup[1::2]=samples

    y=spfft.dct(samplesup,type=3,norm='ortho')*np.sqrt(2)#/2

    return y[0:N]

#The DST4 transform: 
Example #10
Source File: apply_dct.py    From Contrastive-Predictive-Coding-PyTorch with MIT License 6 votes vote down vote up
def main():
    ## Settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--input-scp')
    parser.add_argument('--output-scp')
    parser.add_argument('--output-ark')
    parser.add_argument('--dct-dim', type=int)
    args = parser.parse_args()

    ark_scp_output='ark:| copy-feats --compress=true ark:- ark,scp:' + args.output_ark + ',' + args.output_scp
    
    with ko.open_or_fd(ark_scp_output,'wb') as f:
        for key, mat in ko.read_mat_scp(args.input_scp):
            dct_mat = fft.dct(mat, type=2, n=args.dct_dim)
            ko.write_mat(f, dct_mat, key=key)
            
    print('#################success#################') 
Example #11
Source File: forward_pass_v1.py    From Contrastive-Predictive-Coding-PyTorch with MIT License 6 votes vote down vote up
def forward_dct(args, cpc_model, device, data_loader, output_ark, output_scp, dct_dim=24):
    ''' forward with dct '''

    logger.info("Starting Forward Passing")
    cpc_model.eval() # not training cdc model 

    ark_scp_output='ark:| copy-feats --compress=true ark:- ark,scp:' + output_ark + ',' + output_scp 
    with torch.no_grad():
        with ko.open_or_fd(ark_scp_output,'wb') as f:
            for [utt_id, data] in data_loader:
                data = data.float().unsqueeze(1).to(device) # add channel dimension
                data = data.contiguous()
                hidden = cpc_model.init_hidden(len(data))
                output, hidden = cpc_model.predict(data, hidden)
                mat = output.squeeze(0).cpu().numpy() # kaldi io does not accept torch tensor
                dct_mat = fft.dct(mat, type=2, n=dct_dim) # apply dct 
                ko.write_mat(f, dct_mat, key=utt_id[0]) 
Example #12
Source File: linalg.py    From alphacsc with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def dctii(x, axes=None):
    """
    Compute a multi-dimensional DCT-II over specified array axes. This
    function is implemented by calling the one-dimensional DCT-II
    :func:`scipy.fftpack.dct` with normalization mode 'ortho' for each
    of the specified axes.

    Parameters
    ----------
    a : array_like
      Input array
    axes : sequence of ints, optional (default None)
      Axes over which to compute the DCT-II.

    Returns
    -------
    y : ndarray
      DCT-II of input array
    """

    if axes is None:
        axes = list(range(x.ndim))
    for ax in axes:
        x = fftpack.dct(x, type=2, axis=ax, norm='ortho')
    return x 
Example #13
Source File: base.py    From Artificial-Intelligence-with-Python with MIT License 6 votes vote down vote up
def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
          nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True):
    """Compute MFCC features from an audio signal.

    :param signal: the audio signal from which to compute features. Should be an N*1 array
    :param samplerate: the samplerate of the signal we are working with.
    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)    
    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)    
    :param numcep: the number of cepstrum to return, default 13    
    :param nfilt: the number of filters in the filterbank, default 26.
    :param nfft: the FFT size. Default is 512.
    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. 
    :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22. 
    :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
    :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
    """            
    feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph)
    feat = numpy.log(feat)
    feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
    feat = lifter(feat,ceplifter)
    if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
    return feat 
Example #14
Source File: audio_tools.py    From dagbldr with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def run_fft_dct_example():
    random_state = np.random.RandomState(1999)

    fs, d = fetch_sample_speech_fruit()
    n_fft = 64
    X = d[0]
    X_stft = stft(X, n_fft)
    X_rr = complex_to_real_view(X_stft)
    X_dct = fftpack.dct(X_rr, axis=-1, norm='ortho')
    X_dct_sub = X_dct[1:] - X_dct[:-1]
    std = X_dct_sub.std(axis=0, keepdims=True)
    X_dct_sub += .01 * std * random_state.randn(
        X_dct_sub.shape[0], X_dct_sub.shape[1])
    X_dct_unsub = np.cumsum(X_dct_sub, axis=0)
    X_idct = fftpack.idct(X_dct_unsub, axis=-1, norm='ortho')
    X_irr = real_to_complex_view(X_idct)
    X_r = istft(X_irr, n_fft)[:len(X)]

    SNR = 20 * np.log10(np.linalg.norm(X - X_r) / np.linalg.norm(X))
    print(SNR)

    wavfile.write("fftdct_orig.wav", fs, soundsc(X))
    wavfile.write("fftdct_rec.wav", fs, soundsc(X_r)) 
Example #15
Source File: audio.py    From representation_mixing with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def dct_compress(X, n_components, window_size=128):
    """
    Compress using the DCT

    Parameters
    ----------
    X : ndarray, shape=(n_samples,)
        The input signal to compress. Should be 1-dimensional

    n_components : int
        The number of DCT components to keep. Setting n_components to about
        .5 * window_size can give compression with fairly good reconstruction.

    window_size : int
        The input X is broken into windows of window_size, each of which are
        then compressed with the DCT.

    Returns
    -------
    X_compressed : ndarray, shape=(num_windows, window_size)
       A 2D array of non-overlapping DCT coefficients. For use with uncompress

    Reference
    ---------
    http://nbviewer.ipython.org/github/craffel/crucialpython/blob/master/week3/stride_tricks.ipynb
    """
    if len(X) % window_size != 0:
        append = np.zeros((window_size - len(X) % window_size))
        X = np.hstack((X, append))
    num_frames = len(X) // window_size
    X_strided = X.reshape((num_frames, window_size))
    X_dct = fftpack.dct(X_strided, norm='ortho')
    if n_components is not None:
        X_dct = X_dct[:, :n_components]
    return X_dct 
Example #16
Source File: celebA_estimators.py    From csgm with MIT License 5 votes vote down vote up
def dct2(image_channel):
    return fftpack.dct(fftpack.dct(image_channel.T, norm='ortho').T, norm='ortho') 
Example #17
Source File: audio.py    From asr-study with MIT License 5 votes vote down vote up
def _call(self, signal):
        """Compute MFCC features from an audio signal.

        Args:
            signal: the audio signal from which to compute features. Should be
            an N*1 array

        Returns:
            A numpy array of size (NUMFRAMES by numcep) containing features.
            Each row holds 1 feature vector.
        """
        feat, energy = super(MFCC, self)._call(signal)

        feat = np.log(feat)
        feat = dct(feat, type=2, axis=1, norm='ortho')[:, :self.num_cep]
        feat = self._lifter(feat, self.cep_lifter)

        if self.append_energy:
            # replace first cepstral coefficient with log of frame energy
            feat[:, 0] = np.log(energy + self.eps)

        if self.d:
            d = sigproc.delta(feat, 2)
            feat = np.hstack([feat, d])

            if self.dd:
                feat = np.hstack([feat, sigproc.delta(d, 2)])

        return feat 
Example #18
Source File: test_lineardct.py    From torch-dct with MIT License 5 votes vote down vote up
def test_dct_3d():
    for N1 in [2, 5, 32]:
        x = np.random.normal(size=(1, N1, N1, N1))
        ref = fftpack.dct(x, axis=3, type=2)
        ref = fftpack.dct(ref, axis=2, type=2)
        ref = fftpack.dct(ref, axis=1, type=2)
        act = dct_3d(torch.tensor(x).float()).numpy()
        assert np.abs(ref - act).max() < EPS, (ref, act) 
Example #19
Source File: test_lineardct.py    From torch-dct with MIT License 5 votes vote down vote up
def test_dct_2d():
    for N1 in [2, 5, 32]:
        x = np.random.normal(size=(1, N1, N1))
        ref = fftpack.dct(x, axis=2, type=2)
        ref = fftpack.dct(ref, axis=1, type=2)
        act = dct_2d(torch.tensor(x).float()).numpy()
        assert np.abs(ref - act).max() < EPS, (ref, act) 
Example #20
Source File: audio.py    From representation_mixing with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def overlap_dct_compress(X, n_components, window_size):
    """
    Overlap (at 50% of window_size) and compress X.

    Parameters
    ----------
    X : ndarray, shape=(n_samples,)
        Input signal to compress

    n_components : int
        number of DCT components to keep

    window_size : int
        Size of windows to take

    Returns
    -------
    X_dct : ndarray, shape=(n_windows, n_components)
        Windowed and compressed version of X
    """
    X_strided = halfoverlap(X, window_size)
    X_dct = fftpack.dct(X_strided, norm='ortho')
    if n_components is not None:
        X_dct = X_dct[:, :n_components]
    return X_dct


# Evil voice is caused by adding double the zeros before inverse DCT...
# Very cool bug but makes sense 
Example #21
Source File: test_lineardct.py    From torch-dct with MIT License 5 votes vote down vote up
def test_idct():
    for norm in [None, 'ortho']:
        for N in [5, 2, 32, 111]:
            x = np.random.normal(size=(1, N))
            X = dct(torch.tensor(x).float(), norm=norm)
            y = idct(X, norm=norm).numpy()
            assert np.abs(x - y).max() < EPS, x 
Example #22
Source File: test_lineardct.py    From torch-dct with MIT License 5 votes vote down vote up
def test_dct1():
    for N in [2, 5, 32, 111]:
        x = np.random.normal(size=(1, N,))
        ref = fftpack.dct(x, type=1)
        act = dct1(torch.tensor(x).float()).numpy()
        assert np.abs(ref - act).max() < EPS, ref

    for d in [2, 3, 4]:
        x = np.random.normal(size=(2,) * d)
        ref = fftpack.dct(x, type=1)
        act = dct1(torch.tensor(x).float()).numpy()
        assert np.abs(ref - act).max() < EPS, ref 
Example #23
Source File: test_lineardct.py    From torch-dct with MIT License 5 votes vote down vote up
def dct(x, norm=None):
    return torch_dct.LinearDCT(x.size(1), type='dct', norm=norm)(x).data 
Example #24
Source File: test_dct.py    From torch-dct with MIT License 5 votes vote down vote up
def test_idct_3d():
    for N1 in [2, 5, 32]:
        for N2 in [2, 5, 32]:
            for N3 in [2, 5, 32]:
                x = np.random.normal(size=(1, N1, N2, N3))
                X = dct.dct_3d(torch.tensor(x))
                y = dct.idct_3d(X).numpy()
                assert np.abs(x - y).max() < EPS, x 
Example #25
Source File: test_dct.py    From torch-dct with MIT License 5 votes vote down vote up
def test_dct_3d():
    for N1 in [2, 5, 32]:
        for N2 in [2, 5, 32]:
            for N3 in [2, 5, 32]:
                x = np.random.normal(size=(1, N1, N2, N3))
                ref = fftpack.dct(x, axis=3, type=2)
                ref = fftpack.dct(ref, axis=2, type=2)
                ref = fftpack.dct(ref, axis=1, type=2)
                act = dct.dct_3d(torch.tensor(x)).numpy()
                assert np.abs(ref - act).max() < EPS, (ref, act) 
Example #26
Source File: test_dct.py    From torch-dct with MIT License 5 votes vote down vote up
def test_idct_2d():
    for N1 in [2, 5, 32]:
        for N2 in [2, 5, 32]:
            x = np.random.normal(size=(1, N1, N2))
            X = dct.dct_2d(torch.tensor(x))
            y = dct.idct_2d(X).numpy()
            assert np.abs(x - y).max() < EPS, x 
Example #27
Source File: test_dct.py    From torch-dct with MIT License 5 votes vote down vote up
def test_cuda():
    if torch.cuda.is_available():
        device = torch.device('cuda:0')

        for N in [2, 5, 32, 111]:
            x = np.random.normal(size=(1, N,))
            ref = fftpack.dct(x, type=1)
            act = dct.dct1(torch.tensor(x, device=device)).cpu().numpy()
            assert np.abs(ref - act).max() < EPS, ref

        for d in [2, 3, 4]:
            x = np.random.normal(size=(2,) * d)
            ref = fftpack.dct(x, type=1)
            act = dct.dct1(torch.tensor(x, device=device)).cpu().numpy()
            assert np.abs(ref - act).max() < EPS, ref

        for norm in [None, 'ortho']:
            for N in [2, 3, 5, 32, 111]:
                x = np.random.normal(size=(1, N,))
                ref = fftpack.dct(x, type=2, norm=norm)
                act = dct.dct(torch.tensor(x, device=device), norm=norm).cpu().numpy()
                assert np.abs(ref - act).max() < EPS, (norm, N)

            for d in [2, 3, 4, 11]:
                x = np.random.normal(size=(2,) * d)
                ref = fftpack.dct(x, type=2, norm=norm)
                act = dct.dct(torch.tensor(x, device=device), norm=norm).cpu().numpy()
                assert np.abs(ref - act).max() < EPS, (norm, d)

            for N in [5, 2, 32, 111]:
                x = np.random.normal(size=(1, N))
                X = dct.dct(torch.tensor(x, device=device), norm=norm)
                y = dct.idct(X, norm=norm).cpu().numpy()
                assert np.abs(x - y).max() < EPS, x 
Example #28
Source File: test_dct.py    From torch-dct with MIT License 5 votes vote down vote up
def test_idct():
    for norm in [None, 'ortho']:
        for N in [5, 2, 32, 111]:
            x = np.random.normal(size=(1, N))
            X = dct.dct(torch.tensor(x), norm=norm)
            y = dct.idct(X, norm=norm).numpy()
            assert np.abs(x - y).max() < EPS, x 
Example #29
Source File: test_dct.py    From torch-dct with MIT License 5 votes vote down vote up
def test_dct():
    for norm in [None, 'ortho']:
        for N in [2, 3, 5, 32, 111]:
            x = np.random.normal(size=(1, N,))
            ref = fftpack.dct(x, type=2, norm=norm)
            act = dct.dct(torch.tensor(x), norm=norm).numpy()
            assert np.abs(ref - act).max() < EPS, (norm, N)

        for d in [2, 3, 4, 11]:
            x = np.random.normal(size=(2,) * d)
            ref = fftpack.dct(x, type=2, norm=norm)
            act = dct.dct(torch.tensor(x), norm=norm).numpy()
            assert np.abs(ref - act).max() < EPS, (norm, d) 
Example #30
Source File: sonopy.py    From sonopy with Apache License 2.0 5 votes vote down vote up
def mfcc_spec(audio, sample_rate, window_stride=(160, 80),
              fft_size=512, num_filt=20, num_coeffs=13, return_parts=False):
    """Calculates mel frequency cepstrum coefficient spectrogram"""
    powers = power_spec(audio, window_stride, fft_size)
    if powers.size == 0:
        return np.empty((0, min(num_filt, num_coeffs)))

    filters = filterbanks(sample_rate, num_filt, powers.shape[1])
    mels = safe_log(np.dot(powers, filters.T))  # Mel energies (condensed spectrogram)
    mfccs = dct(mels, norm='ortho')[:, :num_coeffs]  # machine readable spectrogram
    mfccs[:, 0] = safe_log(np.sum(powers, 1))  # Replace first band with log energies
    if return_parts:
        return powers, filters, mels, mfccs
    else:
        return mfccs