Python scipy.fftpack.dct() Examples
The following are 30
code examples of scipy.fftpack.dct().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.fftpack
, or try the search function
.
Example #1
Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 6 votes |
def run_fft_dct_example(): random_state = np.random.RandomState(1999) fs, d = fetch_sample_speech_fruit() n_fft = 64 X = d[0] X_stft = stft(X, fftsize=n_fft) X_rr = complex_to_real_view(X_stft) X_dct = fftpack.dct(X_rr, axis=-1, norm='ortho') X_dct_sub = X_dct[1:] - X_dct[:-1] std = X_dct_sub.std(axis=0, keepdims=True) X_dct_sub += .01 * std * random_state.randn( X_dct_sub.shape[0], X_dct_sub.shape[1]) X_dct_unsub = np.cumsum(X_dct_sub, axis=0) X_idct = fftpack.idct(X_dct_unsub, axis=-1, norm='ortho') X_irr = real_to_complex_view(X_idct) X_r = istft(X_irr, n_fft)[:len(X)] SNR = 20 * np.log10(np.linalg.norm(X - X_r) / np.linalg.norm(X)) print(SNR) wavfile.write("fftdct_orig.wav", fs, soundsc(X)) wavfile.write("fftdct_rec.wav", fs, soundsc(X_r))
Example #2
Source File: utils.py From simple-blackbox-attack with MIT License | 6 votes |
def block_dct(x, block_size=8, masked=False, ratio=0.5): z = torch.zeros(x.size()) num_blocks = int(x.size(2) / block_size) mask = np.zeros((x.size(0), x.size(1), block_size, block_size)) mask[:, :, :int(block_size * ratio), :int(block_size * ratio)] = 1 for i in range(num_blocks): for j in range(num_blocks): submat = x[:, :, (i * block_size):((i + 1) * block_size), (j * block_size):((j + 1) * block_size)].numpy() submat_dct = dct(dct(submat, axis=2, norm='ortho'), axis=3, norm='ortho') if masked: submat_dct = submat_dct * mask submat_dct = torch.from_numpy(submat_dct) z[:, :, (i * block_size):((i + 1) * block_size), (j * block_size):((j + 1) * block_size)] = submat_dct return z # applies IDCT to each block of size block_size
Example #3
Source File: audio_tools.py From tools with BSD 3-Clause "New" or "Revised" License | 6 votes |
def run_fft_dct_example(): random_state = np.random.RandomState(1999) fs, d = fetch_sample_speech_fruit() n_fft = 64 X = d[0] X_stft = stft(X, n_fft) X_rr = complex_to_real_view(X_stft) X_dct = fftpack.dct(X_rr, axis=-1, norm='ortho') X_dct_sub = X_dct[1:] - X_dct[:-1] std = X_dct_sub.std(axis=0, keepdims=True) X_dct_sub += .01 * std * random_state.randn( X_dct_sub.shape[0], X_dct_sub.shape[1]) X_dct_unsub = np.cumsum(X_dct_sub, axis=0) X_idct = fftpack.idct(X_dct_unsub, axis=-1, norm='ortho') X_irr = real_to_complex_view(X_idct) X_r = istft(X_irr, n_fft)[:len(X)] SNR = 20 * np.log10(np.linalg.norm(X - X_r) / np.linalg.norm(X)) print(SNR) wavfile.write("fftdct_orig.wav", fs, soundsc(X)) wavfile.write("fftdct_rec.wav", fs, soundsc(X_r))
Example #4
Source File: signal.py From pyGSTi with Apache License 2.0 | 6 votes |
def amplitudes_at_frequencies(freqInds, timeseries, times=None, transform='dct'): """ Finds the amplitudes in the data at the specified frequency indices. Todo: better docstring. Currently only works for the DCT. """ amplitudes = {} for o in timeseries.keys(): if transform == 'dct': temp = _dct(timeseries[o], norm='ortho')[freqInds] / _np.sqrt(len(timeseries[o]) / 2) if 0. in freqInds: temp[0] = temp[0] / _np.sqrt(2) amplitudes[o] = list(temp) else: raise NotImplementedError("This function only currently works for the DCT!") return amplitudes
Example #5
Source File: parameters.py From entropica_qaoa with Apache License 2.0 | 6 votes |
def plot(self, ax=None, **kwargs): warnings.warn("Plotting the gammas and x_rotation_angles through DCT " "and DST. If you are interested in v, u_singles and " "u_pairs you can access them via params.v, " "params.u_singles, params.u_pairs") if ax is None: fig, ax = plt.subplots() ax.plot(dct(self.v, n=self.n_steps), label="betas", marker="s", ls="", **kwargs) if not _is_iterable_empty(self.u_singles): ax.plot(dst(self.u_singles, n=self.n_steps), label="gammas_singles", marker="^", ls="", **kwargs) if not _is_iterable_empty(self.u_pairs): ax.plot(dst(self.u_pairs, n=self.n_steps), label="gammas_pairs", marker="v", ls="", **kwargs) ax.set_xlabel("timestep") ax.legend()
Example #6
Source File: _parameter_conversions.py From entropica_qaoa with Apache License 2.0 | 6 votes |
def fourier_extended_to_extended( params: FourierExtendedParams) -> ExtendedParams: out = deepcopy(params) out.__class__ = ExtendedParams out.betas = dct(params.v, n=out.n_steps, axis=0) out.gammas_singles = dst(params.u_singles, n=out.n_steps, axis=0) out.gammas_pairs = dst(params.u_pairs, n=out.n_steps, axis=0) # and clean up del out.__u_singles del out.__u_pairs del out.__v del out.q return out # ############################################################################# # And now all the possible compositions as well: # Todo: Create this code automatically by traversing the tree? # #############################################################################
Example #7
Source File: base.py From aenet with BSD 3-Clause "New" or "Revised" License | 6 votes |
def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13, nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True): """Compute MFCC features from an audio signal. :param signal: the audio signal from which to compute features. Should be an N*1 array :param samplerate: the samplerate of the signal we are working with. :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) :param numcep: the number of cepstrum to return, default 13 :param nfilt: the number of filters in the filterbank, default 26. :param nfft: the FFT size. Default is 512. :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22. :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy. :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector. """ feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph) feat = numpy.log(feat) feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep] feat = lifter(feat,ceplifter) if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy return feat
Example #8
Source File: base.py From aenet with BSD 3-Clause "New" or "Revised" License | 6 votes |
def mfccVTLP(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13, nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True, alpha=1.0): """Compute MFCC features from an audio signal. :param signal: the audio signal from which to compute features. Should be an N*1 array :param samplerate: the samplerate of the signal we are working with. :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) :param numcep: the number of cepstrum to return, default 13 :param nfilt: the number of filters in the filterbank, default 26. :param nfft: the FFT size. Default is 512. :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22. :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy. :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector. """ feat,energy = fbankVTLP(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph,False,alpha) feat = numpy.log(feat) feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep] feat = lifter(feat,ceplifter) if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy return feat
Example #9
Source File: qmf_realtime_class.py From ASP with GNU General Public License v3.0 | 6 votes |
def DCT4(samples): """ Method to create DCT4 transformation using DCT3 Arguments : samples : (1D Array) Input samples to be transformed Returns : y : (1D Array) Transformed output samples """ # Initialize samplesup=np.zeros(2*N, dtype = np.float32) # Upsample signal: samplesup[1::2]=samples y=spfft.dct(samplesup,type=3,norm='ortho')*np.sqrt(2)#/2 return y[0:N] #The DST4 transform:
Example #10
Source File: apply_dct.py From Contrastive-Predictive-Coding-PyTorch with MIT License | 6 votes |
def main(): ## Settings parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--input-scp') parser.add_argument('--output-scp') parser.add_argument('--output-ark') parser.add_argument('--dct-dim', type=int) args = parser.parse_args() ark_scp_output='ark:| copy-feats --compress=true ark:- ark,scp:' + args.output_ark + ',' + args.output_scp with ko.open_or_fd(ark_scp_output,'wb') as f: for key, mat in ko.read_mat_scp(args.input_scp): dct_mat = fft.dct(mat, type=2, n=args.dct_dim) ko.write_mat(f, dct_mat, key=key) print('#################success#################')
Example #11
Source File: forward_pass_v1.py From Contrastive-Predictive-Coding-PyTorch with MIT License | 6 votes |
def forward_dct(args, cpc_model, device, data_loader, output_ark, output_scp, dct_dim=24): ''' forward with dct ''' logger.info("Starting Forward Passing") cpc_model.eval() # not training cdc model ark_scp_output='ark:| copy-feats --compress=true ark:- ark,scp:' + output_ark + ',' + output_scp with torch.no_grad(): with ko.open_or_fd(ark_scp_output,'wb') as f: for [utt_id, data] in data_loader: data = data.float().unsqueeze(1).to(device) # add channel dimension data = data.contiguous() hidden = cpc_model.init_hidden(len(data)) output, hidden = cpc_model.predict(data, hidden) mat = output.squeeze(0).cpu().numpy() # kaldi io does not accept torch tensor dct_mat = fft.dct(mat, type=2, n=dct_dim) # apply dct ko.write_mat(f, dct_mat, key=utt_id[0])
Example #12
Source File: linalg.py From alphacsc with BSD 3-Clause "New" or "Revised" License | 6 votes |
def dctii(x, axes=None): """ Compute a multi-dimensional DCT-II over specified array axes. This function is implemented by calling the one-dimensional DCT-II :func:`scipy.fftpack.dct` with normalization mode 'ortho' for each of the specified axes. Parameters ---------- a : array_like Input array axes : sequence of ints, optional (default None) Axes over which to compute the DCT-II. Returns ------- y : ndarray DCT-II of input array """ if axes is None: axes = list(range(x.ndim)) for ax in axes: x = fftpack.dct(x, type=2, axis=ax, norm='ortho') return x
Example #13
Source File: base.py From Artificial-Intelligence-with-Python with MIT License | 6 votes |
def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13, nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True): """Compute MFCC features from an audio signal. :param signal: the audio signal from which to compute features. Should be an N*1 array :param samplerate: the samplerate of the signal we are working with. :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) :param numcep: the number of cepstrum to return, default 13 :param nfilt: the number of filters in the filterbank, default 26. :param nfft: the FFT size. Default is 512. :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22. :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy. :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector. """ feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph) feat = numpy.log(feat) feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep] feat = lifter(feat,ceplifter) if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy return feat
Example #14
Source File: audio_tools.py From dagbldr with BSD 3-Clause "New" or "Revised" License | 6 votes |
def run_fft_dct_example(): random_state = np.random.RandomState(1999) fs, d = fetch_sample_speech_fruit() n_fft = 64 X = d[0] X_stft = stft(X, n_fft) X_rr = complex_to_real_view(X_stft) X_dct = fftpack.dct(X_rr, axis=-1, norm='ortho') X_dct_sub = X_dct[1:] - X_dct[:-1] std = X_dct_sub.std(axis=0, keepdims=True) X_dct_sub += .01 * std * random_state.randn( X_dct_sub.shape[0], X_dct_sub.shape[1]) X_dct_unsub = np.cumsum(X_dct_sub, axis=0) X_idct = fftpack.idct(X_dct_unsub, axis=-1, norm='ortho') X_irr = real_to_complex_view(X_idct) X_r = istft(X_irr, n_fft)[:len(X)] SNR = 20 * np.log10(np.linalg.norm(X - X_r) / np.linalg.norm(X)) print(SNR) wavfile.write("fftdct_orig.wav", fs, soundsc(X)) wavfile.write("fftdct_rec.wav", fs, soundsc(X_r))
Example #15
Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 5 votes |
def dct_compress(X, n_components, window_size=128): """ Compress using the DCT Parameters ---------- X : ndarray, shape=(n_samples,) The input signal to compress. Should be 1-dimensional n_components : int The number of DCT components to keep. Setting n_components to about .5 * window_size can give compression with fairly good reconstruction. window_size : int The input X is broken into windows of window_size, each of which are then compressed with the DCT. Returns ------- X_compressed : ndarray, shape=(num_windows, window_size) A 2D array of non-overlapping DCT coefficients. For use with uncompress Reference --------- http://nbviewer.ipython.org/github/craffel/crucialpython/blob/master/week3/stride_tricks.ipynb """ if len(X) % window_size != 0: append = np.zeros((window_size - len(X) % window_size)) X = np.hstack((X, append)) num_frames = len(X) // window_size X_strided = X.reshape((num_frames, window_size)) X_dct = fftpack.dct(X_strided, norm='ortho') if n_components is not None: X_dct = X_dct[:, :n_components] return X_dct
Example #16
Source File: celebA_estimators.py From csgm with MIT License | 5 votes |
def dct2(image_channel): return fftpack.dct(fftpack.dct(image_channel.T, norm='ortho').T, norm='ortho')
Example #17
Source File: audio.py From asr-study with MIT License | 5 votes |
def _call(self, signal): """Compute MFCC features from an audio signal. Args: signal: the audio signal from which to compute features. Should be an N*1 array Returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector. """ feat, energy = super(MFCC, self)._call(signal) feat = np.log(feat) feat = dct(feat, type=2, axis=1, norm='ortho')[:, :self.num_cep] feat = self._lifter(feat, self.cep_lifter) if self.append_energy: # replace first cepstral coefficient with log of frame energy feat[:, 0] = np.log(energy + self.eps) if self.d: d = sigproc.delta(feat, 2) feat = np.hstack([feat, d]) if self.dd: feat = np.hstack([feat, sigproc.delta(d, 2)]) return feat
Example #18
Source File: test_lineardct.py From torch-dct with MIT License | 5 votes |
def test_dct_3d(): for N1 in [2, 5, 32]: x = np.random.normal(size=(1, N1, N1, N1)) ref = fftpack.dct(x, axis=3, type=2) ref = fftpack.dct(ref, axis=2, type=2) ref = fftpack.dct(ref, axis=1, type=2) act = dct_3d(torch.tensor(x).float()).numpy() assert np.abs(ref - act).max() < EPS, (ref, act)
Example #19
Source File: test_lineardct.py From torch-dct with MIT License | 5 votes |
def test_dct_2d(): for N1 in [2, 5, 32]: x = np.random.normal(size=(1, N1, N1)) ref = fftpack.dct(x, axis=2, type=2) ref = fftpack.dct(ref, axis=1, type=2) act = dct_2d(torch.tensor(x).float()).numpy() assert np.abs(ref - act).max() < EPS, (ref, act)
Example #20
Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 5 votes |
def overlap_dct_compress(X, n_components, window_size): """ Overlap (at 50% of window_size) and compress X. Parameters ---------- X : ndarray, shape=(n_samples,) Input signal to compress n_components : int number of DCT components to keep window_size : int Size of windows to take Returns ------- X_dct : ndarray, shape=(n_windows, n_components) Windowed and compressed version of X """ X_strided = halfoverlap(X, window_size) X_dct = fftpack.dct(X_strided, norm='ortho') if n_components is not None: X_dct = X_dct[:, :n_components] return X_dct # Evil voice is caused by adding double the zeros before inverse DCT... # Very cool bug but makes sense
Example #21
Source File: test_lineardct.py From torch-dct with MIT License | 5 votes |
def test_idct(): for norm in [None, 'ortho']: for N in [5, 2, 32, 111]: x = np.random.normal(size=(1, N)) X = dct(torch.tensor(x).float(), norm=norm) y = idct(X, norm=norm).numpy() assert np.abs(x - y).max() < EPS, x
Example #22
Source File: test_lineardct.py From torch-dct with MIT License | 5 votes |
def test_dct1(): for N in [2, 5, 32, 111]: x = np.random.normal(size=(1, N,)) ref = fftpack.dct(x, type=1) act = dct1(torch.tensor(x).float()).numpy() assert np.abs(ref - act).max() < EPS, ref for d in [2, 3, 4]: x = np.random.normal(size=(2,) * d) ref = fftpack.dct(x, type=1) act = dct1(torch.tensor(x).float()).numpy() assert np.abs(ref - act).max() < EPS, ref
Example #23
Source File: test_lineardct.py From torch-dct with MIT License | 5 votes |
def dct(x, norm=None): return torch_dct.LinearDCT(x.size(1), type='dct', norm=norm)(x).data
Example #24
Source File: test_dct.py From torch-dct with MIT License | 5 votes |
def test_idct_3d(): for N1 in [2, 5, 32]: for N2 in [2, 5, 32]: for N3 in [2, 5, 32]: x = np.random.normal(size=(1, N1, N2, N3)) X = dct.dct_3d(torch.tensor(x)) y = dct.idct_3d(X).numpy() assert np.abs(x - y).max() < EPS, x
Example #25
Source File: test_dct.py From torch-dct with MIT License | 5 votes |
def test_dct_3d(): for N1 in [2, 5, 32]: for N2 in [2, 5, 32]: for N3 in [2, 5, 32]: x = np.random.normal(size=(1, N1, N2, N3)) ref = fftpack.dct(x, axis=3, type=2) ref = fftpack.dct(ref, axis=2, type=2) ref = fftpack.dct(ref, axis=1, type=2) act = dct.dct_3d(torch.tensor(x)).numpy() assert np.abs(ref - act).max() < EPS, (ref, act)
Example #26
Source File: test_dct.py From torch-dct with MIT License | 5 votes |
def test_idct_2d(): for N1 in [2, 5, 32]: for N2 in [2, 5, 32]: x = np.random.normal(size=(1, N1, N2)) X = dct.dct_2d(torch.tensor(x)) y = dct.idct_2d(X).numpy() assert np.abs(x - y).max() < EPS, x
Example #27
Source File: test_dct.py From torch-dct with MIT License | 5 votes |
def test_cuda(): if torch.cuda.is_available(): device = torch.device('cuda:0') for N in [2, 5, 32, 111]: x = np.random.normal(size=(1, N,)) ref = fftpack.dct(x, type=1) act = dct.dct1(torch.tensor(x, device=device)).cpu().numpy() assert np.abs(ref - act).max() < EPS, ref for d in [2, 3, 4]: x = np.random.normal(size=(2,) * d) ref = fftpack.dct(x, type=1) act = dct.dct1(torch.tensor(x, device=device)).cpu().numpy() assert np.abs(ref - act).max() < EPS, ref for norm in [None, 'ortho']: for N in [2, 3, 5, 32, 111]: x = np.random.normal(size=(1, N,)) ref = fftpack.dct(x, type=2, norm=norm) act = dct.dct(torch.tensor(x, device=device), norm=norm).cpu().numpy() assert np.abs(ref - act).max() < EPS, (norm, N) for d in [2, 3, 4, 11]: x = np.random.normal(size=(2,) * d) ref = fftpack.dct(x, type=2, norm=norm) act = dct.dct(torch.tensor(x, device=device), norm=norm).cpu().numpy() assert np.abs(ref - act).max() < EPS, (norm, d) for N in [5, 2, 32, 111]: x = np.random.normal(size=(1, N)) X = dct.dct(torch.tensor(x, device=device), norm=norm) y = dct.idct(X, norm=norm).cpu().numpy() assert np.abs(x - y).max() < EPS, x
Example #28
Source File: test_dct.py From torch-dct with MIT License | 5 votes |
def test_idct(): for norm in [None, 'ortho']: for N in [5, 2, 32, 111]: x = np.random.normal(size=(1, N)) X = dct.dct(torch.tensor(x), norm=norm) y = dct.idct(X, norm=norm).numpy() assert np.abs(x - y).max() < EPS, x
Example #29
Source File: test_dct.py From torch-dct with MIT License | 5 votes |
def test_dct(): for norm in [None, 'ortho']: for N in [2, 3, 5, 32, 111]: x = np.random.normal(size=(1, N,)) ref = fftpack.dct(x, type=2, norm=norm) act = dct.dct(torch.tensor(x), norm=norm).numpy() assert np.abs(ref - act).max() < EPS, (norm, N) for d in [2, 3, 4, 11]: x = np.random.normal(size=(2,) * d) ref = fftpack.dct(x, type=2, norm=norm) act = dct.dct(torch.tensor(x), norm=norm).numpy() assert np.abs(ref - act).max() < EPS, (norm, d)
Example #30
Source File: sonopy.py From sonopy with Apache License 2.0 | 5 votes |
def mfcc_spec(audio, sample_rate, window_stride=(160, 80), fft_size=512, num_filt=20, num_coeffs=13, return_parts=False): """Calculates mel frequency cepstrum coefficient spectrogram""" powers = power_spec(audio, window_stride, fft_size) if powers.size == 0: return np.empty((0, min(num_filt, num_coeffs))) filters = filterbanks(sample_rate, num_filt, powers.shape[1]) mels = safe_log(np.dot(powers, filters.T)) # Mel energies (condensed spectrogram) mfccs = dct(mels, norm='ortho')[:, :num_coeffs] # machine readable spectrogram mfccs[:, 0] = safe_log(np.sum(powers, 1)) # Replace first band with log energies if return_parts: return powers, filters, mels, mfccs else: return mfccs