Python librosa.stft() Examples
The following are 30
code examples of librosa.stft().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
librosa
, or try the search function
.
Example #1
Source File: test_rythm.py From audiomate with MIT License | 7 votes |
def test_compute_cleanup_after_one_utterance(self): test_file_path = resources.sample_wav_file('wav_1.wav') y, sr = librosa.load(test_file_path, sr=None) frames = librosa.util.frame(y, frame_length=2048, hop_length=1024).T # EXPECTED S = np.abs(librosa.stft(y, center=False, n_fft=2048, hop_length=1024)) ** 2 S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr) S = librosa.power_to_db(S) onsets = librosa.onset.onset_strength(S=S, center=False) exp_tgram = librosa.feature.tempogram(onset_envelope=onsets, sr=sr, win_length=11, center=True).T # ACTUAL tgram_step = pipeline.Tempogram(win_length=11) # FIRST RUN tgrams = tgram_step.process_frames(frames, sr, last=True) assert np.allclose(tgrams, exp_tgram) # SECOND RUN tgrams = tgram_step.process_frames(frames, sr, last=True) assert np.allclose(tgrams, exp_tgram)
Example #2
Source File: preprocess.py From Singing_Voice_Separation_RNN with MIT License | 6 votes |
def wav_to_spec_batch(wavs, n_fft, hop_length = None): # Short-time Fourier transform (STFT) for wav matrix in batch # n_fft : int > 0 [scalar] FFT window size. # hop_length : int > 0 [scalar] number audio of frames between STFT columns. If unspecified, defaults win_length / 4. assert (wavs.ndim == 2), 'Single wav uses librosa.stft() directly' stft_matrices = list() for wav in wavs: stft_matrix = librosa.stft(wav, n_fft = n_fft, hop_length = hop_length) stft_matrices.append(stft_matrix) stft_matrices = np.array(stft_matrices) return stft_matrices
Example #3
Source File: test_onset.py From audiomate with MIT License | 6 votes |
def test_compute_online(self): test_file_path = resources.sample_wav_file('wav_1.wav') y, sr = librosa.load(test_file_path, sr=None) # EXPECTED y_pad = np.pad(y, (0, 1024), mode='constant', constant_values=0) S = np.abs(librosa.stft(y_pad, center=False, n_fft=2048, hop_length=1024)) ** 2 S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr) S = librosa.power_to_db(S) exp_onsets = librosa.onset.onset_strength(S=S, center=False).T exp_onsets = exp_onsets.reshape(exp_onsets.shape[0], 1) # ACTUAL test_file = tracks.FileTrack('idx', test_file_path) onset = pipeline.OnsetStrength() onset_gen = onset.process_track_online(test_file, 2048, 1024, chunk_size=5) chunks = list(onset_gen) onsets = np.vstack(chunks) print(onsets.shape, exp_onsets.shape) assert np.allclose(onsets, exp_onsets)
Example #4
Source File: utils.py From Tacotron-pytorch with MIT License | 6 votes |
def _stft(self, x): return librosa.stft(x, n_fft=self.n_fft, hop_length=self.hop_length, win_length=self.win_length)
Example #5
Source File: test_rythm.py From audiomate with MIT License | 6 votes |
def test_compute(self): test_file_path = resources.sample_wav_file('wav_1.wav') y, sr = librosa.load(test_file_path, sr=None) frames = librosa.util.frame(y, frame_length=2048, hop_length=1024).T # EXPECTED S = np.abs(librosa.stft(y, center=False, n_fft=2048, hop_length=1024)) ** 2 S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr) S = librosa.power_to_db(S) onsets = librosa.onset.onset_strength(S=S, center=False) exp_tgram = librosa.feature.tempogram(onset_envelope=onsets, sr=sr, win_length=11, center=True).T # ACTUAL tgram_step = pipeline.Tempogram(win_length=11) tgrams = tgram_step.process_frames(frames, sr, last=True) assert np.allclose(tgrams, exp_tgram)
Example #6
Source File: test_rythm.py From audiomate with MIT License | 6 votes |
def test_compute_online(self): # Data: 41523 samples, 16 kHz # yields 40 frames with frame-size 2048 and hop-size 1024 test_file_path = resources.sample_wav_file('wav_1.wav') y, sr = librosa.load(test_file_path, sr=None) # EXPECTED y_pad = np.pad(y, (0, 1024), mode='constant', constant_values=0) S = np.abs(librosa.stft(y_pad, center=False, n_fft=2048, hop_length=1024)) ** 2 S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr) S = librosa.power_to_db(S) onsets = librosa.onset.onset_strength(S=S, center=False) exp_tgram = librosa.feature.tempogram(onset_envelope=onsets, sr=sr, win_length=4, center=True).T # ACTUAL test_file = tracks.FileTrack('idx', test_file_path) tgram_step = pipeline.Tempogram(win_length=4) tgram_gen = tgram_step.process_track_online(test_file, 2048, 1024, chunk_size=5) chunks = list(tgram_gen) tgrams = np.vstack(chunks) assert np.allclose(tgrams, exp_tgram)
Example #7
Source File: test_onset.py From audiomate with MIT License | 6 votes |
def test_compute(self): test_file_path = resources.sample_wav_file('wav_1.wav') y, sr = librosa.load(test_file_path, sr=None) frames = librosa.util.frame(y, frame_length=2048, hop_length=1024).T # EXPECTED S = np.abs(librosa.stft(y, center=False, n_fft=2048, hop_length=1024)) ** 2 S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr) S = librosa.power_to_db(S) exp_onsets = librosa.onset.onset_strength(S=S, center=False).T exp_onsets = exp_onsets.reshape(exp_onsets.shape[0], 1) # ACTUAL onset = pipeline.OnsetStrength() onsets = onset.process_frames(frames, sr, last=True) assert np.allclose(onsets, exp_onsets)
Example #8
Source File: feature_extraction_functions.py From Build-CNN-or-LSTM-or-CNNLSTM-with-speech-features with MIT License | 6 votes |
def apply_noise(y,sr,wavefile): #at random apply varying amounts of environment noise rand_scale = random.choice([0.0,0.25,0.5,0.75]) #rand_scale = 0.75 if rand_scale > 0.0: total_length = len(y)/sr y_noise,sr = librosa.load(wavefile,sr=16000) envnoise_normalized = prep_data_vad_noise.normalize(y_noise) envnoise_scaled = prep_data_vad_noise.scale_noise(envnoise_normalized,rand_scale) envnoise_matched = prep_data_vad_noise.match_length(envnoise_scaled,sr,total_length) if len(envnoise_matched) != len(y): diff = int(len(y) - len(envnoise_matched)) if diff < 0: envnoise_matched = envnoise_matched[:diff] else: envnoise_matched = np.append(envnoise_matched,np.zeros(diff,)) y += envnoise_matched return y #collects the actual features, according to the settings assigned #such as with noise, voice activity detection/beginning silence removal, etc. #mfcc, fbank, stft, delta, dom_freq
Example #9
Source File: feature_extraction_functions.py From Build-CNN-or-LSTM-or-CNNLSTM-with-speech-features with MIT License | 6 votes |
def get_stft(y,sr,window_size=None, window_shift=None): if window_size is None: n_fft = int(0.025*sr) else: n_fft = int(window_size*0.001*sr) if window_shift is None: hop_length = int(0.010*sr) else: hop_length = int(window_shift*0.001*sr) stft = np.abs(librosa.stft(y,n_fft=n_fft,hop_length=hop_length)) #comes in complex numbers.. have to take absolute value stft = np.transpose(stft) stft -= (np.mean(stft, axis=0) + 1e-8) return stft #super experimental. I wanted fundamental frequency but this was easier
Example #10
Source File: feat_ext.py From icassp19 with MIT License | 6 votes |
def get_spectrogram(y, n_fft=1024, win_length_samples=0.04, hop_length_samples=0.02, window=scipy.signal.hamming(1024, sym=False), center=True, spectrogram_type='magnitude', params_extract=None): if spectrogram_type == 'power': return np.abs(librosa.stft(y + params_extract.get('eps'), n_fft=n_fft, win_length=win_length_samples, hop_length=hop_length_samples, center=center, window=window)) ** 2
Example #11
Source File: audio_transforms.py From nupic.torch with GNU Affero General Public License v3.0 | 5 votes |
def __call__(self, data): if not should_apply_transform(): return data stft = data["stft"] hop_length = data["hop_length"] scale = random.uniform(-self.max_scale, self.max_scale) stft_stretch = librosa.core.phase_vocoder( stft, 1 + scale, hop_length=hop_length ) data["stft"] = stft_stretch return data
Example #12
Source File: dsp.py From WaveRNN with MIT License | 5 votes |
def melspectrogram(y): D = stft(y) S = amp_to_db(linear_to_mel(np.abs(D))) return normalize(S)
Example #13
Source File: dsp.py From WaveRNN with MIT License | 5 votes |
def spectrogram(y): D = stft(y) S = amp_to_db(np.abs(D)) - ref_level_db return normalize(S)
Example #14
Source File: audio_transforms.py From nupic.torch with GNU Affero General Public License v3.0 | 5 votes |
def __call__(self, data): samples = data["samples"] data["n_fft"] = self.n_fft data["hop_length"] = self.hop_length data["stft"] = librosa.stft( samples, n_fft=self.n_fft, hop_length=self.hop_length ) data["stft_shape"] = data["stft"].shape return data
Example #15
Source File: prep_noise.py From Build-CNN-or-LSTM-or-CNNLSTM-with-speech-features with MIT License | 5 votes |
def get_energy_rms(stft_matrix): #stft.shape[1] == bandwidths/frequencies #stft.shape[0] pertains to the time domain rms_list = [np.sqrt(sum(np.abs(stft_matrix[row])**2)/stft_matrix.shape[1]) for row in range(len(stft_matrix))] return rms_list
Example #16
Source File: prep_noise.py From Build-CNN-or-LSTM-or-CNNLSTM-with-speech-features with MIT License | 5 votes |
def stft2power(stft_matrix): if stft_matrix is not None: if len(stft_matrix) > 0: stft = stft_matrix.copy() power = np.abs(stft)**2 return power else: raise TypeError("STFT Matrix is empty. Function 'stft2power' needs a non-empty matrix.") else: raise TypeError("STFT Matrix does not exist. Function 'stft2power' needs an existing matrix.") return None
Example #17
Source File: prep_noise.py From Build-CNN-or-LSTM-or-CNNLSTM-with-speech-features with MIT License | 5 votes |
def stft2samps(stft,len_origsamp): #print("shape of stft: {}".format(stft.shape)) istft = np.transpose(stft.copy()) ##print("transposed shape: {}".format(istft.shape)) samples = librosa.istft(istft,length=len_origsamp) return samples
Example #18
Source File: prep_noise.py From Build-CNN-or-LSTM-or-CNNLSTM-with-speech-features with MIT License | 5 votes |
def samps2stft(y, sr): if len(y)%2 != 0: y = y[:-1] #print("shape of samples: {}".format(y.shape)) stft = librosa.stft(y) #print("shape of stft: {}".format(stft.shape)) stft = np.transpose(stft) #print("transposed shape: {}".format(stft.shape)) return stft
Example #19
Source File: prep_noise.py From Build-CNN-or-LSTM-or-CNNLSTM-with-speech-features with MIT License | 5 votes |
def get_speech_samples(samples, sr): try: signal_length = len(samples) stft = wave2stft(samples,sr) energy = get_energy(stft) energy_mean = get_energy_mean(energy) beg = sound_index(energy,energy_mean,start=True) end = sound_index(energy,energy_mean,start=False) if beg[1] == False or end[1] == False: raise NoSpeechDetected("No speech detected") perc_start = beg[0]/len(energy) perc_end = end[0]/len(energy) sample_start = int(perc_start*signal_length) sample_end = int(perc_end*signal_length) samples_speech = samples[sample_start:sample_end] return samples_speech, True except NoSpeechDetected as e: pass return samples, False ###### #noise reduction
Example #20
Source File: prep_noise.py From Build-CNN-or-LSTM-or-CNNLSTM-with-speech-features with MIT License | 5 votes |
def wave2stft(np_array,sr): stft = librosa.stft(np_array,hop_length=int(0.01*sr),n_fft=int(0.025*sr)) stft = np.transpose(stft) return stft
Example #21
Source File: audio_transforms.py From nupic.torch with GNU Affero General Public License v3.0 | 5 votes |
def __call__(self, data): del data["stft"] return data
Example #22
Source File: audio_transforms.py From nupic.torch with GNU Affero General Public License v3.0 | 5 votes |
def __call__(self, data): stft = data["stft"] data["istft_samples"] = librosa.core.istft(stft, dtype=data["samples"].dtype) return data
Example #23
Source File: feature_extraction_functions.py From Build-CNN-or-LSTM-or-CNNLSTM-with-speech-features with MIT License | 5 votes |
def get_mel_spectrogram(y,sr,num_mels = None,window_size=None, window_shift=None): ''' set values: default for mel spectrogram calculation (FBANK) - windows of 25ms - window shifts of 10ms ''' if num_mels is None: num_mels = 40 if window_size is None: n_fft = int(0.025*sr) else: n_fft = int(window_size*0.001*sr) if window_shift is None: hop_length = int(0.010*sr) else: hop_length = int(window_shift*0.001*sr) fbank = librosa.feature.melspectrogram(y,sr,n_fft=n_fft,hop_length=hop_length,n_mels=num_mels) fbank = np.transpose(fbank) fbank -= (np.mean(fbank, axis=0) + 1e-8) return fbank #get stft and adjust settings if you'd like #note: I have not messed around with the window_size or shift here #if you change these, you might have to adjust the default number of feature #columns assigned to stft in the main module (see right below def main())
Example #24
Source File: audio_transforms.py From nupic.torch with GNU Affero General Public License v3.0 | 5 votes |
def __call__(self, data): samples = data["samples"] data["n_fft"] = self.n_fft data["hop_length"] = self.hop_length data["stft"] = librosa.stft( samples, n_fft=self.n_fft, hop_length=self.hop_length ) data["stft_shape"] = data["stft"].shape return data
Example #25
Source File: audio.py From libfaceid with MIT License | 5 votes |
def _stft_tensorflow(signals): n_fft, hop_length, win_length = _stft_parameters() return tf.contrib.signal.stft(signals, win_length, hop_length, n_fft, pad_end=False)
Example #26
Source File: audio.py From libfaceid with MIT License | 5 votes |
def _stft(y): n_fft, hop_length, win_length = _stft_parameters() return librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length)
Example #27
Source File: audio.py From libfaceid with MIT License | 5 votes |
def _griffin_lim_tensorflow(S): '''TensorFlow implementation of Griffin-Lim Based on https://github.com/Kyubyong/tensorflow-exercises/blob/master/Audio_Processing.ipynb ''' with tf.variable_scope('griffinlim'): # TensorFlow's stft and istft operate on a batch of spectrograms; create batch of size 1 S = tf.expand_dims(S, 0) S_complex = tf.identity(tf.cast(S, dtype=tf.complex64)) y = _istft_tensorflow(S_complex) for i in range(hparams.griffin_lim_iters): est = _stft_tensorflow(y) angles = est / tf.cast(tf.maximum(1e-8, tf.abs(est)), tf.complex64) y = _istft_tensorflow(S_complex * angles) return tf.squeeze(y, 0)
Example #28
Source File: preprocess.py From Singing_Voice_Separation_RNN with MIT License | 5 votes |
def spec_to_wav_batch(stft_matrices, hop_length = None): # Every stft matrix in stft matrices may have complex numbers assert (stft_matrices.ndim == 3), 'Single stft maxtrix uses librosa.istft() directly' wavs = list() for stft_matrix in stft_matrices: wav = librosa.istft(stft_matrix, hop_length = hop_length) wavs.append(wav) wavs = np.array(wavs) return wavs
Example #29
Source File: audio_transforms.py From nupic.torch with GNU Affero General Public License v3.0 | 5 votes |
def __call__(self, data): if not should_apply_transform(): return data stft = data["stft"] hop_length = data["hop_length"] scale = random.uniform(-self.max_scale, self.max_scale) stft_stretch = librosa.core.phase_vocoder( stft, 1 + scale, hop_length=hop_length ) data["stft"] = stft_stretch return data
Example #30
Source File: utils.py From tacotron with Apache License 2.0 | 5 votes |
def griffin_lim(spectrogram): '''Applies Griffin-Lim's raw. ''' X_best = copy.deepcopy(spectrogram) for i in range(hp.n_iter): X_t = invert_spectrogram(X_best) est = librosa.stft(X_t, hp.n_fft, hp.hop_length, win_length=hp.win_length) phase = est / np.maximum(1e-8, np.abs(est)) X_best = spectrogram * phase X_t = invert_spectrogram(X_best) y = np.real(X_t) return y