Python librosa.stft() Examples

The following are 30 code examples of librosa.stft(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module librosa , or try the search function .
Example #1
Source File: test_rythm.py    From audiomate with MIT License 7 votes vote down vote up
def test_compute_cleanup_after_one_utterance(self):
        test_file_path = resources.sample_wav_file('wav_1.wav')
        y, sr = librosa.load(test_file_path, sr=None)
        frames = librosa.util.frame(y, frame_length=2048, hop_length=1024).T

        # EXPECTED
        S = np.abs(librosa.stft(y, center=False, n_fft=2048, hop_length=1024)) ** 2
        S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr)
        S = librosa.power_to_db(S)
        onsets = librosa.onset.onset_strength(S=S, center=False)
        exp_tgram = librosa.feature.tempogram(onset_envelope=onsets, sr=sr, win_length=11, center=True).T

        # ACTUAL
        tgram_step = pipeline.Tempogram(win_length=11)

        # FIRST RUN
        tgrams = tgram_step.process_frames(frames, sr, last=True)

        assert np.allclose(tgrams, exp_tgram)

        # SECOND RUN
        tgrams = tgram_step.process_frames(frames, sr, last=True)

        assert np.allclose(tgrams, exp_tgram) 
Example #2
Source File: preprocess.py    From Singing_Voice_Separation_RNN with MIT License 6 votes vote down vote up
def wav_to_spec_batch(wavs, n_fft, hop_length = None):

    # Short-time Fourier transform (STFT) for wav matrix in batch
    # n_fft : int > 0 [scalar] FFT window size.
    # hop_length : int > 0 [scalar] number audio of frames between STFT columns. If unspecified, defaults win_length / 4.

    assert (wavs.ndim == 2), 'Single wav uses librosa.stft() directly'

    stft_matrices = list()

    for wav in wavs:
        stft_matrix = librosa.stft(wav, n_fft = n_fft, hop_length = hop_length)
        stft_matrices.append(stft_matrix)

    stft_matrices = np.array(stft_matrices)

    return stft_matrices 
Example #3
Source File: test_onset.py    From audiomate with MIT License 6 votes vote down vote up
def test_compute_online(self):
        test_file_path = resources.sample_wav_file('wav_1.wav')
        y, sr = librosa.load(test_file_path, sr=None)

        # EXPECTED
        y_pad = np.pad(y, (0, 1024), mode='constant', constant_values=0)
        S = np.abs(librosa.stft(y_pad, center=False, n_fft=2048, hop_length=1024)) ** 2
        S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr)
        S = librosa.power_to_db(S)
        exp_onsets = librosa.onset.onset_strength(S=S, center=False).T
        exp_onsets = exp_onsets.reshape(exp_onsets.shape[0], 1)

        # ACTUAL
        test_file = tracks.FileTrack('idx', test_file_path)
        onset = pipeline.OnsetStrength()
        onset_gen = onset.process_track_online(test_file, 2048, 1024, chunk_size=5)

        chunks = list(onset_gen)
        onsets = np.vstack(chunks)

        print(onsets.shape, exp_onsets.shape)

        assert np.allclose(onsets, exp_onsets) 
Example #4
Source File: utils.py    From Tacotron-pytorch with MIT License 6 votes vote down vote up
def _stft(self, x):
        return librosa.stft(x, n_fft=self.n_fft, hop_length=self.hop_length, win_length=self.win_length) 
Example #5
Source File: test_rythm.py    From audiomate with MIT License 6 votes vote down vote up
def test_compute(self):
        test_file_path = resources.sample_wav_file('wav_1.wav')
        y, sr = librosa.load(test_file_path, sr=None)
        frames = librosa.util.frame(y, frame_length=2048, hop_length=1024).T

        # EXPECTED
        S = np.abs(librosa.stft(y, center=False, n_fft=2048, hop_length=1024)) ** 2
        S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr)
        S = librosa.power_to_db(S)
        onsets = librosa.onset.onset_strength(S=S, center=False)
        exp_tgram = librosa.feature.tempogram(onset_envelope=onsets, sr=sr, win_length=11, center=True).T

        # ACTUAL
        tgram_step = pipeline.Tempogram(win_length=11)
        tgrams = tgram_step.process_frames(frames, sr, last=True)

        assert np.allclose(tgrams, exp_tgram) 
Example #6
Source File: test_rythm.py    From audiomate with MIT License 6 votes vote down vote up
def test_compute_online(self):
        # Data: 41523 samples, 16 kHz
        # yields 40 frames with frame-size 2048 and hop-size 1024
        test_file_path = resources.sample_wav_file('wav_1.wav')
        y, sr = librosa.load(test_file_path, sr=None)

        # EXPECTED
        y_pad = np.pad(y, (0, 1024), mode='constant', constant_values=0)
        S = np.abs(librosa.stft(y_pad, center=False, n_fft=2048, hop_length=1024)) ** 2
        S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr)
        S = librosa.power_to_db(S)
        onsets = librosa.onset.onset_strength(S=S, center=False)
        exp_tgram = librosa.feature.tempogram(onset_envelope=onsets, sr=sr, win_length=4, center=True).T

        # ACTUAL
        test_file = tracks.FileTrack('idx', test_file_path)
        tgram_step = pipeline.Tempogram(win_length=4)
        tgram_gen = tgram_step.process_track_online(test_file, 2048, 1024, chunk_size=5)

        chunks = list(tgram_gen)
        tgrams = np.vstack(chunks)

        assert np.allclose(tgrams, exp_tgram) 
Example #7
Source File: test_onset.py    From audiomate with MIT License 6 votes vote down vote up
def test_compute(self):
        test_file_path = resources.sample_wav_file('wav_1.wav')
        y, sr = librosa.load(test_file_path, sr=None)
        frames = librosa.util.frame(y, frame_length=2048, hop_length=1024).T

        # EXPECTED
        S = np.abs(librosa.stft(y, center=False, n_fft=2048, hop_length=1024)) ** 2
        S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr)
        S = librosa.power_to_db(S)
        exp_onsets = librosa.onset.onset_strength(S=S, center=False).T
        exp_onsets = exp_onsets.reshape(exp_onsets.shape[0], 1)

        # ACTUAL
        onset = pipeline.OnsetStrength()
        onsets = onset.process_frames(frames, sr, last=True)

        assert np.allclose(onsets, exp_onsets) 
Example #8
Source File: feature_extraction_functions.py    From Build-CNN-or-LSTM-or-CNNLSTM-with-speech-features with MIT License 6 votes vote down vote up
def apply_noise(y,sr,wavefile):
    #at random apply varying amounts of environment noise
    rand_scale = random.choice([0.0,0.25,0.5,0.75])
    #rand_scale = 0.75
    if rand_scale > 0.0:
        total_length = len(y)/sr
        y_noise,sr = librosa.load(wavefile,sr=16000)
        envnoise_normalized = prep_data_vad_noise.normalize(y_noise)
        envnoise_scaled = prep_data_vad_noise.scale_noise(envnoise_normalized,rand_scale)
        envnoise_matched = prep_data_vad_noise.match_length(envnoise_scaled,sr,total_length)
        if len(envnoise_matched) != len(y):
            diff = int(len(y) - len(envnoise_matched))
            if diff < 0:
                envnoise_matched = envnoise_matched[:diff]
            else:
                envnoise_matched = np.append(envnoise_matched,np.zeros(diff,))
        y += envnoise_matched

    return y

#collects the actual features, according to the settings assigned
#such as with noise, voice activity detection/beginning silence removal, etc.
#mfcc, fbank, stft, delta, dom_freq 
Example #9
Source File: feature_extraction_functions.py    From Build-CNN-or-LSTM-or-CNNLSTM-with-speech-features with MIT License 6 votes vote down vote up
def get_stft(y,sr,window_size=None, window_shift=None):
    if window_size is None:
        n_fft = int(0.025*sr)
    else:
        n_fft = int(window_size*0.001*sr)
    if window_shift is None:
        hop_length = int(0.010*sr)
    else:
        hop_length = int(window_shift*0.001*sr)
    stft = np.abs(librosa.stft(y,n_fft=n_fft,hop_length=hop_length)) #comes in complex numbers.. have to take absolute value
    stft = np.transpose(stft)
    stft -= (np.mean(stft, axis=0) + 1e-8)
    
    return stft

#super experimental. I wanted fundamental frequency but this was easier 
Example #10
Source File: feat_ext.py    From icassp19 with MIT License 6 votes vote down vote up
def get_spectrogram(y,
                    n_fft=1024,
                    win_length_samples=0.04,
                    hop_length_samples=0.02,
                    window=scipy.signal.hamming(1024, sym=False),
                    center=True,
                    spectrogram_type='magnitude',
                    params_extract=None):

    if spectrogram_type == 'power':
        return np.abs(librosa.stft(y + params_extract.get('eps'),
                                   n_fft=n_fft,
                                   win_length=win_length_samples,
                                   hop_length=hop_length_samples,
                                   center=center,
                                   window=window)) ** 2 
Example #11
Source File: audio_transforms.py    From nupic.torch with GNU Affero General Public License v3.0 5 votes vote down vote up
def __call__(self, data):
        if not should_apply_transform():
            return data

        stft = data["stft"]
        hop_length = data["hop_length"]
        scale = random.uniform(-self.max_scale, self.max_scale)
        stft_stretch = librosa.core.phase_vocoder(
            stft, 1 + scale, hop_length=hop_length
        )
        data["stft"] = stft_stretch
        return data 
Example #12
Source File: dsp.py    From WaveRNN with MIT License 5 votes vote down vote up
def melspectrogram(y):
    D = stft(y)
    S = amp_to_db(linear_to_mel(np.abs(D)))
    return normalize(S) 
Example #13
Source File: dsp.py    From WaveRNN with MIT License 5 votes vote down vote up
def spectrogram(y):
    D = stft(y)
    S = amp_to_db(np.abs(D)) - ref_level_db
    return normalize(S) 
Example #14
Source File: audio_transforms.py    From nupic.torch with GNU Affero General Public License v3.0 5 votes vote down vote up
def __call__(self, data):
        samples = data["samples"]
        data["n_fft"] = self.n_fft
        data["hop_length"] = self.hop_length
        data["stft"] = librosa.stft(
            samples, n_fft=self.n_fft, hop_length=self.hop_length
        )
        data["stft_shape"] = data["stft"].shape
        return data 
Example #15
Source File: prep_noise.py    From Build-CNN-or-LSTM-or-CNNLSTM-with-speech-features with MIT License 5 votes vote down vote up
def get_energy_rms(stft_matrix):
    #stft.shape[1] == bandwidths/frequencies
    #stft.shape[0] pertains to the time domain
    rms_list = [np.sqrt(sum(np.abs(stft_matrix[row])**2)/stft_matrix.shape[1]) for row in range(len(stft_matrix))]
    return rms_list 
Example #16
Source File: prep_noise.py    From Build-CNN-or-LSTM-or-CNNLSTM-with-speech-features with MIT License 5 votes vote down vote up
def stft2power(stft_matrix):
    if stft_matrix is not None:
        if len(stft_matrix) > 0:
            stft = stft_matrix.copy()
            power = np.abs(stft)**2
            return power
        else:    
            raise TypeError("STFT Matrix is empty. Function 'stft2power' needs a non-empty matrix.")
    else:
        raise TypeError("STFT Matrix does not exist. Function 'stft2power' needs an existing matrix.")
    return None 
Example #17
Source File: prep_noise.py    From Build-CNN-or-LSTM-or-CNNLSTM-with-speech-features with MIT License 5 votes vote down vote up
def stft2samps(stft,len_origsamp):
    #print("shape of stft: {}".format(stft.shape))
    istft = np.transpose(stft.copy())
    ##print("transposed shape: {}".format(istft.shape))
    samples = librosa.istft(istft,length=len_origsamp)
    return samples 
Example #18
Source File: prep_noise.py    From Build-CNN-or-LSTM-or-CNNLSTM-with-speech-features with MIT License 5 votes vote down vote up
def samps2stft(y, sr):
    if len(y)%2 != 0:
        y = y[:-1]
    #print("shape of samples: {}".format(y.shape))
    stft = librosa.stft(y)
    #print("shape of stft: {}".format(stft.shape))
    stft = np.transpose(stft)
    #print("transposed shape: {}".format(stft.shape))
    return stft 
Example #19
Source File: prep_noise.py    From Build-CNN-or-LSTM-or-CNNLSTM-with-speech-features with MIT License 5 votes vote down vote up
def get_speech_samples(samples, sr):
    try:
        signal_length = len(samples)
        stft = wave2stft(samples,sr)
        energy = get_energy(stft)
        energy_mean = get_energy_mean(energy)
        beg = sound_index(energy,energy_mean,start=True)
        end = sound_index(energy,energy_mean,start=False)
        if beg[1] == False or end[1] == False:
            raise NoSpeechDetected("No speech detected")
        perc_start = beg[0]/len(energy)
        perc_end = end[0]/len(energy)
        sample_start = int(perc_start*signal_length)
        sample_end = int(perc_end*signal_length)
        samples_speech = samples[sample_start:sample_end]
        
        return samples_speech, True
    
    except NoSpeechDetected as e:
        pass
        
    return samples, False
    
######

#noise reduction 
Example #20
Source File: prep_noise.py    From Build-CNN-or-LSTM-or-CNNLSTM-with-speech-features with MIT License 5 votes vote down vote up
def wave2stft(np_array,sr):
    stft = librosa.stft(np_array,hop_length=int(0.01*sr),n_fft=int(0.025*sr))
    stft = np.transpose(stft)
    return stft 
Example #21
Source File: audio_transforms.py    From nupic.torch with GNU Affero General Public License v3.0 5 votes vote down vote up
def __call__(self, data):
        del data["stft"]
        return data 
Example #22
Source File: audio_transforms.py    From nupic.torch with GNU Affero General Public License v3.0 5 votes vote down vote up
def __call__(self, data):
        stft = data["stft"]
        data["istft_samples"] = librosa.core.istft(stft, dtype=data["samples"].dtype)
        return data 
Example #23
Source File: feature_extraction_functions.py    From Build-CNN-or-LSTM-or-CNNLSTM-with-speech-features with MIT License 5 votes vote down vote up
def get_mel_spectrogram(y,sr,num_mels = None,window_size=None, window_shift=None):
    '''
    set values: default for mel spectrogram calculation (FBANK)
    - windows of 25ms 
    - window shifts of 10ms
    '''
    if num_mels is None:
        num_mels = 40
    if window_size is None:
        n_fft = int(0.025*sr)
    else:
        n_fft = int(window_size*0.001*sr)
    if window_shift is None:
        hop_length = int(0.010*sr)
    else:
        hop_length = int(window_shift*0.001*sr)
        
    fbank = librosa.feature.melspectrogram(y,sr,n_fft=n_fft,hop_length=hop_length,n_mels=num_mels)
    fbank = np.transpose(fbank)
    fbank -= (np.mean(fbank, axis=0) + 1e-8)
    
    return fbank

#get stft and adjust settings if you'd like 
#note: I have not messed around with the window_size or shift here
#if you change these, you might have to adjust the default number of feature 
#columns assigned to stft in the main module (see right below def main()) 
Example #24
Source File: audio_transforms.py    From nupic.torch with GNU Affero General Public License v3.0 5 votes vote down vote up
def __call__(self, data):
        samples = data["samples"]
        data["n_fft"] = self.n_fft
        data["hop_length"] = self.hop_length
        data["stft"] = librosa.stft(
            samples, n_fft=self.n_fft, hop_length=self.hop_length
        )
        data["stft_shape"] = data["stft"].shape
        return data 
Example #25
Source File: audio.py    From libfaceid with MIT License 5 votes vote down vote up
def _stft_tensorflow(signals):
  n_fft, hop_length, win_length = _stft_parameters()
  return tf.contrib.signal.stft(signals, win_length, hop_length, n_fft, pad_end=False) 
Example #26
Source File: audio.py    From libfaceid with MIT License 5 votes vote down vote up
def _stft(y):
  n_fft, hop_length, win_length = _stft_parameters()
  return librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length) 
Example #27
Source File: audio.py    From libfaceid with MIT License 5 votes vote down vote up
def _griffin_lim_tensorflow(S):
  '''TensorFlow implementation of Griffin-Lim
  Based on https://github.com/Kyubyong/tensorflow-exercises/blob/master/Audio_Processing.ipynb
  '''
  with tf.variable_scope('griffinlim'):
    # TensorFlow's stft and istft operate on a batch of spectrograms; create batch of size 1
    S = tf.expand_dims(S, 0)
    S_complex = tf.identity(tf.cast(S, dtype=tf.complex64))
    y = _istft_tensorflow(S_complex)
    for i in range(hparams.griffin_lim_iters):
      est = _stft_tensorflow(y)
      angles = est / tf.cast(tf.maximum(1e-8, tf.abs(est)), tf.complex64)
      y = _istft_tensorflow(S_complex * angles)
    return tf.squeeze(y, 0) 
Example #28
Source File: preprocess.py    From Singing_Voice_Separation_RNN with MIT License 5 votes vote down vote up
def spec_to_wav_batch(stft_matrices, hop_length = None):

    # Every stft matrix in stft matrices may have complex numbers

    assert (stft_matrices.ndim == 3), 'Single stft maxtrix uses librosa.istft() directly'

    wavs = list()

    for stft_matrix in stft_matrices:
        wav = librosa.istft(stft_matrix, hop_length = hop_length)
        wavs.append(wav)

    wavs = np.array(wavs)

    return wavs 
Example #29
Source File: audio_transforms.py    From nupic.torch with GNU Affero General Public License v3.0 5 votes vote down vote up
def __call__(self, data):
        if not should_apply_transform():
            return data

        stft = data["stft"]
        hop_length = data["hop_length"]
        scale = random.uniform(-self.max_scale, self.max_scale)
        stft_stretch = librosa.core.phase_vocoder(
            stft, 1 + scale, hop_length=hop_length
        )
        data["stft"] = stft_stretch
        return data 
Example #30
Source File: utils.py    From tacotron with Apache License 2.0 5 votes vote down vote up
def griffin_lim(spectrogram):
    '''Applies Griffin-Lim's raw.
    '''
    X_best = copy.deepcopy(spectrogram)
    for i in range(hp.n_iter):
        X_t = invert_spectrogram(X_best)
        est = librosa.stft(X_t, hp.n_fft, hp.hop_length, win_length=hp.win_length)
        phase = est / np.maximum(1e-8, np.abs(est))
        X_best = spectrogram * phase
    X_t = invert_spectrogram(X_best)
    y = np.real(X_t)

    return y