Python librosa.power_to_db() Examples

The following are 30 code examples of librosa.power_to_db(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module librosa , or try the search function .
Example #1
Source File: test_rythm.py    From audiomate with MIT License 7 votes vote down vote up
def test_compute_cleanup_after_one_utterance(self):
        test_file_path = resources.sample_wav_file('wav_1.wav')
        y, sr = librosa.load(test_file_path, sr=None)
        frames = librosa.util.frame(y, frame_length=2048, hop_length=1024).T

        # EXPECTED
        S = np.abs(librosa.stft(y, center=False, n_fft=2048, hop_length=1024)) ** 2
        S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr)
        S = librosa.power_to_db(S)
        onsets = librosa.onset.onset_strength(S=S, center=False)
        exp_tgram = librosa.feature.tempogram(onset_envelope=onsets, sr=sr, win_length=11, center=True).T

        # ACTUAL
        tgram_step = pipeline.Tempogram(win_length=11)

        # FIRST RUN
        tgrams = tgram_step.process_frames(frames, sr, last=True)

        assert np.allclose(tgrams, exp_tgram)

        # SECOND RUN
        tgrams = tgram_step.process_frames(frames, sr, last=True)

        assert np.allclose(tgrams, exp_tgram) 
Example #2
Source File: test_onset.py    From audiomate with MIT License 6 votes vote down vote up
def test_compute(self):
        test_file_path = resources.sample_wav_file('wav_1.wav')
        y, sr = librosa.load(test_file_path, sr=None)
        frames = librosa.util.frame(y, frame_length=2048, hop_length=1024).T

        # EXPECTED
        S = np.abs(librosa.stft(y, center=False, n_fft=2048, hop_length=1024)) ** 2
        S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr)
        S = librosa.power_to_db(S)
        exp_onsets = librosa.onset.onset_strength(S=S, center=False).T
        exp_onsets = exp_onsets.reshape(exp_onsets.shape[0], 1)

        # ACTUAL
        onset = pipeline.OnsetStrength()
        onsets = onset.process_frames(frames, sr, last=True)

        assert np.allclose(onsets, exp_onsets) 
Example #3
Source File: data.py    From magenta with Apache License 2.0 6 votes vote down vote up
def wav_to_spec(wav_audio, hparams):
  """Transforms the contents of a wav file into a series of spectrograms."""
  if hparams.spec_type == 'raw':
    spec = _wav_to_framed_samples(wav_audio, hparams)
  else:
    if hparams.spec_type == 'cqt':
      spec = _wav_to_cqt(wav_audio, hparams)
    elif hparams.spec_type == 'mel':
      spec = _wav_to_mel(wav_audio, hparams)
    else:
      raise ValueError('Invalid spec_type: {}'.format(hparams.spec_type))

    if hparams.spec_log_amplitude:
      spec = librosa.power_to_db(spec)

  return spec 
Example #4
Source File: spec_augment_tensorflow.py    From Speech-Transformer with MIT License 6 votes vote down vote up
def visualization_tensor_spectrogram(mel_spectrogram, title):
    """visualizing first one result of SpecAugment
    # Arguments:
      mel_spectrogram(ndarray): mel_spectrogram to visualize.
      title(String): plot figure's title
    """

    # session for plotting
    sess = tf.InteractiveSession()
    mel_spectrogram = mel_spectrogram.eval()

    # Show mel-spectrogram using librosa's specshow.
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :, 0], ref=np.max), y_axis='mel', fmax=8000,
                             x_axis='time')
    # plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.tight_layout()
    plt.show() 
Example #5
Source File: preprocessing.py    From speechT with Apache License 2.0 6 votes vote down vote up
def calc_power_spectrogram(audio_data, samplerate, n_mels=128, n_fft=512, hop_length=160):
  """
  Calculate power spectrogram from the given raw audio data

  Args:
    audio_data: numpyarray of raw audio wave
    samplerate: the sample rate of the `audio_data`
    n_mels: the number of mels to generate
    n_fft: the window size of the fft
    hop_length: the hop length for the window

  Returns: the spectrogram in the form [time, n_mels]

  """
  spectrogram = librosa.feature.melspectrogram(audio_data, sr=samplerate, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)

  # convert to log scale (dB)
  log_spectrogram = librosa.power_to_db(spectrogram, ref=np.max)

  # normalize
  normalized_spectrogram = normalize(log_spectrogram)

  return normalized_spectrogram.T 
Example #6
Source File: test_onset.py    From audiomate with MIT License 6 votes vote down vote up
def test_compute_online(self):
        test_file_path = resources.sample_wav_file('wav_1.wav')
        y, sr = librosa.load(test_file_path, sr=None)

        # EXPECTED
        y_pad = np.pad(y, (0, 1024), mode='constant', constant_values=0)
        S = np.abs(librosa.stft(y_pad, center=False, n_fft=2048, hop_length=1024)) ** 2
        S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr)
        S = librosa.power_to_db(S)
        exp_onsets = librosa.onset.onset_strength(S=S, center=False).T
        exp_onsets = exp_onsets.reshape(exp_onsets.shape[0], 1)

        # ACTUAL
        test_file = tracks.FileTrack('idx', test_file_path)
        onset = pipeline.OnsetStrength()
        onset_gen = onset.process_track_online(test_file, 2048, 1024, chunk_size=5)

        chunks = list(onset_gen)
        onsets = np.vstack(chunks)

        print(onsets.shape, exp_onsets.shape)

        assert np.allclose(onsets, exp_onsets) 
Example #7
Source File: test_rythm.py    From audiomate with MIT License 6 votes vote down vote up
def test_compute_online(self):
        # Data: 41523 samples, 16 kHz
        # yields 40 frames with frame-size 2048 and hop-size 1024
        test_file_path = resources.sample_wav_file('wav_1.wav')
        y, sr = librosa.load(test_file_path, sr=None)

        # EXPECTED
        y_pad = np.pad(y, (0, 1024), mode='constant', constant_values=0)
        S = np.abs(librosa.stft(y_pad, center=False, n_fft=2048, hop_length=1024)) ** 2
        S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr)
        S = librosa.power_to_db(S)
        onsets = librosa.onset.onset_strength(S=S, center=False)
        exp_tgram = librosa.feature.tempogram(onset_envelope=onsets, sr=sr, win_length=4, center=True).T

        # ACTUAL
        test_file = tracks.FileTrack('idx', test_file_path)
        tgram_step = pipeline.Tempogram(win_length=4)
        tgram_gen = tgram_step.process_track_online(test_file, 2048, 1024, chunk_size=5)

        chunks = list(tgram_gen)
        tgrams = np.vstack(chunks)

        assert np.allclose(tgrams, exp_tgram) 
Example #8
Source File: plotting.py    From DeepSpectrum with GNU General Public License v3.0 6 votes vote down vote up
def plot_mel_spectrogram(audio_data,
                         sr,
                         nfft=None,
                         melbands=64,
                         delta=None,
                         **kwargs):
    spectrogram = y_limited_spectrogram(audio_data,
                                        sr=sr,
                                        nfft=nfft,
                                        ylim=kwargs['ylim'])
    kwargs['scale'] = 'mel'
    if delta:
        spectrogram = librosa.feature.delta(spectrogram, order=delta)
    spectrogram = librosa.feature.melspectrogram(S=np.abs(spectrogram)**2,
                                                 sr=sr,
                                                 n_mels=melbands)
    spectrogram = librosa.power_to_db(spectrogram, ref=np.max, top_db=None)
    return _create_plot(spectrogram, sr, nfft, **kwargs) 
Example #9
Source File: onset.py    From audiomate with MIT License 6 votes vote down vote up
def compute(self, chunk, sampling_rate, corpus=None, utterance=None):
        # Compute mel-spetrogram
        power_spec = np.abs(spectral.stft_from_frames(chunk.data.T)) ** 2
        mel = np.abs(librosa.feature.melspectrogram(S=power_spec, n_mels=self.n_mels, sr=sampling_rate))
        mel_power = librosa.power_to_db(mel)

        # Compute onset strengths
        oenv = librosa.onset.onset_strength(S=mel_power, center=False)

        # Switch dimensions and add dimension to have frames
        oenv = oenv.T.reshape(oenv.shape[0], -1)

        # Remove context
        oenv = oenv[chunk.left_context:oenv.shape[0] - chunk.right_context]

        return oenv 
Example #10
Source File: feature_extraction.py    From Sound-Recognition-Tutorial with Apache License 2.0 6 votes vote down vote up
def extract_mfcc(y, sr, size=3):
    """
    extract MFCC feature
    :param y: np.ndarray [shape=(n,)], real-valued the input signal (audio time series)
    :param sr: sample rate of 'y'
    :param size: the length (seconds) of random crop from original audio, default as 3 seconds
    :return: MFCC feature
    """
    # normalization
    y = y.astype(np.float32)
    normalization_factor = 1 / np.max(np.abs(y))
    y = y * normalization_factor

    # random crop
    start = random.randint(0, len(y) - size * sr)
    y = y[start: start + size * sr]

    # extract log mel spectrogram #####
    melspectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=1024)
    mfcc = librosa.feature.mfcc(S=librosa.power_to_db(melspectrogram), n_mfcc=20)
    mfcc_delta = librosa.feature.delta(mfcc)
    mfcc_delta_delta = librosa.feature.delta(mfcc_delta)
    mfcc_comb = np.concatenate([mfcc, mfcc_delta, mfcc_delta_delta], axis=0)

    return mfcc_comb 
Example #11
Source File: feature_extraction.py    From Sound-Recognition-Tutorial with Apache License 2.0 6 votes vote down vote up
def extract_logmel(y, sr, size=3):
    """
    extract log mel spectrogram feature
    :param y: the input signal (audio time series)
    :param sr: sample rate of 'y'
    :param size: the length (seconds) of random crop from original audio, default as 3 seconds
    :return: log-mel spectrogram feature
    """
    # normalization
    y = y.astype(np.float32)
    normalization_factor = 1 / np.max(np.abs(y))
    y = y * normalization_factor

    # random crop
    start = random.randint(0, len(y) - size * sr)
    y = y[start: start + size * sr]

    # extract log mel spectrogram #####
    melspectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=1024, n_mels=60)
    logmelspec = librosa.power_to_db(melspectrogram)

    return logmelspec 
Example #12
Source File: create_patches.py    From tartarus with MIT License 6 votes vote down vote up
def prepare_testset(dataset_name):
    spec_folder=common.SPECTRO_PATH+SPECTRO_FOLDER+"/"
    test_folder=common.DATA_DIR+'/spectro_%s_testset/' % dataset_name
    if not os.path.exists(test_folder):
        os.makedirs(test_folder)
    items = open(common.DATASETS_DIR+'/items_index_test_%s.tsv' % dataset_name).read().splitlines()
    testset = []
    testset_index = []
    for t,track_id in enumerate(items):
        if MSD:
            msd_folder = track_id[2]+"/"+track_id[3]+"/"+track_id[4]+"/"
        else:
            msd_folder = ""
        file = spec_folder+msd_folder+track_id+".pk"
        try:
            spec = pickle.load(open(file))
            spec = librosa.power_to_db(np.abs(spec) ** 2,ref=np.max).T
            pickle.dump(spec, open(test_folder+track_id+".pk","wb"))
            testset.append(track_id)
            testset_index.append(t)
            if t%1000==0:
                print t
        except:
            print "no exist", file 
Example #13
Source File: singlelayer.py    From EUSIPCO2017 with GNU Affero General Public License v3.0 6 votes vote down vote up
def compute_spectrograms(filename):
    out_rate = 12000
    N_FFT = 512
    HOP_LEN = 256

    frames, rate = librosa.load(filename, sr=out_rate, mono=True)
    if len(frames) < out_rate*3:
        # if less then 3 second - can't process
        raise Exception("Audio duration is too short")

    logam = librosa.power_to_db
    melgram = librosa.feature.melspectrogram
    x = logam(melgram(y=frames, sr=out_rate, hop_length=HOP_LEN,
                      n_fft=N_FFT, n_mels=N_MEL_BANDS) ** 2,
              ref=1.0)

    # now going through spectrogram with the stride of the segment duration
    for start_idx in range(0, x.shape[1] - SEGMENT_DUR + 1, SEGMENT_DUR):
        yield x[:, start_idx:start_idx + SEGMENT_DUR] 
Example #14
Source File: melgram.py    From mxnet-audio with MIT License 5 votes vote down vote up
def melgram_v1(audio_file_path, to_file):
    sig, fs = librosa.load(audio_file_path)

    pylab.axis('off')  # no axis
    pylab.axes([0., 0., 1., 1.], frameon=False, xticks=[], yticks=[])  # Remove the white edge
    S = librosa.feature.melspectrogram(y=sig, sr=fs)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    pylab.savefig(to_file, bbox_inches=None, pad_inches=0)
    pylab.close() 
Example #15
Source File: audio_transforms.py    From htmpapers with GNU Affero General Public License v3.0 5 votes vote down vote up
def __call__(self, data):
        stft = data['stft']
        sample_rate = data['sample_rate']
        n_fft = data['n_fft']
        mel_basis = librosa.filters.mel(sample_rate, n_fft, self.n_mels)
        s = np.dot(mel_basis, np.abs(stft)**2.0)
        data['mel_spectrogram'] = librosa.power_to_db(s, ref=np.max)
        return data 
Example #16
Source File: audio_transforms.py    From htmpapers with GNU Affero General Public License v3.0 5 votes vote down vote up
def __call__(self, data):
        samples = data['samples']
        sample_rate = data['sample_rate']
        s = librosa.feature.melspectrogram(samples, sr=sample_rate, n_mels=self.n_mels)
        data['mel_spectrogram'] = librosa.power_to_db(s, ref=np.max)
        return data 
Example #17
Source File: features.py    From dcase_util with MIT License 5 votes vote down vote up
def extract(self, y):
        """Extract features for the audio signal.

        Parameters
        ----------
        y : numpy.ndarray [shape=(n,)]
            Audio signal

        Returns
        -------
        numpy.ndarray [shape=(n_mels, t)]
            mfccs

        """

        spectrogram = self.get_spectrogram(
            y=y,
            n_fft=self.n_fft,
            win_length_samples=self.win_length_samples,
            hop_length_samples=self.hop_length_samples,
            spectrogram_type=self.spectrogram_type,
            center=True,
            window=self.window
        )

        mel_spectrum = numpy.dot(self.mel_basis, spectrogram)
        mfccs = librosa.feature.mfcc(
            S=librosa.power_to_db(mel_spectrum),
            n_mfcc=self.n_mfcc
        )

        if self.omit_zeroth:
            # Remove first coefficient
            mfccs = mfccs[1:, :]

        return mfccs 
Example #18
Source File: melgram.py    From keras-audio with MIT License 5 votes vote down vote up
def melgram_v1(audio_file_path, to_file):
    sig, fs = librosa.load(audio_file_path)

    pylab.axis('off')  # no axis
    pylab.axes([0., 0., 1., 1.], frameon=False, xticks=[], yticks=[])  # Remove the white edge
    S = librosa.feature.melspectrogram(y=sig, sr=fs)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    pylab.savefig(to_file, bbox_inches=None, pad_inches=0)
    pylab.close() 
Example #19
Source File: Plot.py    From Wave-U-Net with MIT License 5 votes vote down vote up
def draw_spectrogram(example_wav="musb_005_angela thomas wade_audio_model_without_context_cut_28234samples_61002samples_93770samples_126538.wav"):
    y, sr = Utils.load(example_wav, sr=None)
    spec = np.abs(librosa.stft(y, 512, 256, 512))
    norm_spec = librosa.power_to_db(spec**2)
    black_time_frames = np.array([28234, 61002, 93770, 126538]) / 256.0

    fig, ax = plt.subplots()
    img = ax.imshow(norm_spec)
    plt.vlines(black_time_frames, [0, 0, 0, 0], [10, 10, 10, 10], colors="red", lw=2, alpha=0.5)
    plt.vlines(black_time_frames, [256, 256, 256, 256], [246, 246, 246, 246], colors="red", lw=2, alpha=0.5)

    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.1)
    plt.colorbar(img, cax=cax)

    ax.xaxis.set_label_position("bottom")
    #ticks_x = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x * 256.0 / sr))
    #ax.xaxis.set_major_formatter(ticks_x)
    ax.xaxis.set_major_locator(ticker.FixedLocator(([i * sr / 256. for i in range(len(y)//sr + 1)])))
    ax.xaxis.set_major_formatter(ticker.FixedFormatter(([str(i) for i in range(len(y)//sr + 1)])))

    ax.yaxis.set_major_locator(ticker.FixedLocator(([float(i) * 2000.0 / (sr/2.0) * 256. for i in range(6)])))
    ax.yaxis.set_major_formatter(ticker.FixedFormatter([str(i*2) for i in range(6)]))

    ax.set_xlabel("t (s)")
    ax.set_ylabel('f (KHz)')

    fig.set_size_inches(7., 3.)
    fig.savefig("spectrogram_example.pdf", bbox_inches='tight') 
Example #20
Source File: spectrogram.py    From cocktail-party with MIT License 5 votes vote down vote up
def signal_to_mel_spectrogram(self, audio_signal, log=True, get_phase=False):
		signal = audio_signal.get_data(channel_index=0)
		D = librosa.core.stft(signal, n_fft=self._N_FFT, hop_length=self._HOP_LENGTH)
		magnitude, phase = librosa.core.magphase(D)

		mel_spectrogram = np.dot(self._MEL_FILTER, magnitude)

		mel_spectrogram = mel_spectrogram ** 2
		if log:
			mel_spectrogram = librosa.power_to_db(mel_spectrogram)

		if get_phase:
			return mel_spectrogram, phase
		else:
			return mel_spectrogram 
Example #21
Source File: spec_augment_pytorch.py    From Speech-Transformer with MIT License 5 votes vote down vote up
def visualization_spectrogram(mel_spectrogram, title):
    """visualizing result of specAugment
    # Arguments:
      mel_spectrogram(ndarray): mel_spectrogram to visualize.
      title(String): plot figure's title
    """
    # Show mel-spectrogram using librosa's specshow.
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :], ref=np.max), y_axis='mel', fmax=8000,
                             x_axis='time')
    # plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.tight_layout()
    plt.show() 
Example #22
Source File: feature_extraction.py    From deepsaber with GNU General Public License v3.0 5 votes vote down vote up
def extract_features_mel(y, sr, hop,mel_dim=100):
    mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=mel_dim, hop_length=hop)  # C2 is 65.4 Hz
    features = librosa.power_to_db(mel, ref=np.max)
    return features 
Example #23
Source File: spec_augment_tensorflow.py    From Speech-Transformer with MIT License 5 votes vote down vote up
def visualization_spectrogram(mel_spectrogram, title):
    """visualizing first one result of SpecAugment
    # Arguments:
      mel_spectrogram(ndarray): mel_spectrogram to visualize.
      title(String): plot figure's title
    """
    # Show mel-spectrogram using librosa's specshow.
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :, 0], ref=np.max), y_axis='mel', fmax=8000,
                             x_axis='time')
    plt.title(title)
    plt.tight_layout()
    plt.show() 
Example #24
Source File: audio.py    From MelNet with MIT License 5 votes vote down vote up
def pre_spec(self, x):
        return self.normalize(librosa.power_to_db(x) - self.hp.audio.ref_level_db) 
Example #25
Source File: feature_extraction.py    From deepsaber with GNU General Public License v3.0 5 votes vote down vote up
def extract_features_multi_mel(y, sr=44100.0, hop=512, nffts=[1024, 2048, 4096], mel_dim=100):
    featuress = []
    for nfft in nffts:
        mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=mel_dim, n_fft=nfft, hop_length=hop)  # C2 is 65.4 Hz
        features = librosa.power_to_db(mel, ref=np.max)
        featuress.append(features)
    features = np.stack(featuress, axis=1)
    return features 
Example #26
Source File: spec_augment_pytorch.py    From SpecAugment with Apache License 2.0 5 votes vote down vote up
def visualization_spectrogram(mel_spectrogram, title):
    """visualizing result of SpecAugment
    # Arguments:
      mel_spectrogram(ndarray): mel_spectrogram to visualize.
      title(String): plot figure's title
    """
    # Show mel-spectrogram using librosa's specshow.
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :], ref=np.max), y_axis='mel', fmax=8000, x_axis='time')
    # plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.tight_layout()
    plt.show() 
Example #27
Source File: audio.py    From argus-freesound with MIT License 5 votes vote down vote up
def audio_to_melspectrogram(audio):
    spectrogram = librosa.feature.melspectrogram(audio,
                                                 sr=config.sampling_rate,
                                                 n_mels=config.n_mels,
                                                 hop_length=config.hop_length,
                                                 n_fft=config.n_fft,
                                                 fmin=config.fmin,
                                                 fmax=config.fmax)
    spectrogram = librosa.power_to_db(spectrogram)
    spectrogram = spectrogram.astype(np.float32)
    return spectrogram 
Example #28
Source File: spectral.py    From audiomate with MIT License 5 votes vote down vote up
def compute(self, chunk, sampling_rate, corpus=None, utterance=None):
        power_spec = np.abs(stft_from_frames(chunk.data.T)) ** 2

        mel = librosa.feature.melspectrogram(S=power_spec, n_mels=self.n_mels, sr=sampling_rate)
        mel_power = librosa.power_to_db(mel)
        mfcc = librosa.feature.mfcc(S=mel_power, n_mfcc=self.n_mfcc)

        return mfcc.T 
Example #29
Source File: magnitude_scaling.py    From audiomate with MIT License 5 votes vote down vote up
def compute(self, chunk, sampling_rate, corpus=None, utterance=None):
        return librosa.power_to_db(chunk.data.T, ref=self.ref, amin=self.amin, top_db=self.top_db).T 
Example #30
Source File: test_spectral.py    From audiomate with MIT License 5 votes vote down vote up
def test_compute(self):
        samples = np.arange(8096).astype(np.float32)
        D = np.abs(librosa.core.stft(samples, n_fft=2048, hop_length=512, center=False)) ** 2
        mel = librosa.feature.melspectrogram(S=D, sr=16000, n_mels=128)
        expected = librosa.feature.mfcc(S=librosa.power_to_db(mel), n_mfcc=13).T

        frames = librosa.util.frame(samples, frame_length=2048, hop_length=512).T
        mfcc = pipeline.MFCC(n_mfcc=13, n_mels=128)
        res = mfcc.process_frames(frames, sampling_rate=16000)

        assert np.array_equal(expected, res)