Python librosa.amplitude_to_db() Examples

The following are 30 code examples of librosa.amplitude_to_db(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module librosa , or try the search function

Example #1

Source File: audio.py From parallel-wavenet-vocoder with MIT License

6 votes

def wav2melspec_db(wav, sr, n_fft, win_length, hop_length, n_mels, max_db=None, min_db=None,
                   time_first=True, **kwargs):
    # Mel-spectrogram
    mel_spec = wav2melspec(wav, sr, n_fft, win_length, hop_length, n_mels, time_first=False, **kwargs)

    # Decibel
    mel_db = librosa.amplitude_to_db(mel_spec)

    # Normalization
    mel_db = normalize_db(mel_db, max_db, min_db) if max_db and min_db else mel_db

    # Time-axis first
    if time_first:
        mel_db = mel_db.T  # (t, n_mels)

    return mel_db

Example #2

Source File: features.py From msaf with MIT License

6 votes

def compute_features(self):
        """Actual implementation of the features.

        Returns
        -------
        mfcc: np.array(N, F)
            The features, each row representing a feature vector for a give
            time frame/beat.
        """
        S = librosa.feature.melspectrogram(self._audio,
                                           sr=self.sr,
                                           n_fft=self.n_fft,
                                           hop_length=self.hop_length,
                                           n_mels=self.n_mels)
        log_S = librosa.amplitude_to_db(S, ref=self.ref_power)
        mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=self.n_mfcc).T
        return mfcc

Example #3

Source File: attn_visualize.py From KoSpeech with Apache License 2.0

6 votes

def parse_audio(audio_path):
    sound = load_audio(audio_path, DEL_SILENCE)

    spectrogram = librosa.feature.melspectrogram(sound, SAMPLE_RATE, n_mels=N_MELS, n_fft=N_FFT, hop_length=HOP_LENGTH)
    spectrogram = librosa.amplitude_to_db(spectrogram, ref=np.max)

    if NORMALIZE:
        mean = np.mean(spectrogram)
        std = np.std(spectrogram)
        spectrogram -= mean
        spectrogram /= std

    spectrogram = spectrogram[:, ::-1]

    spectrogram = torch.FloatTensor(np.ascontiguousarray(np.swapaxes(spectrogram, 0, 1)))

    return spectrogram

Example #4

Source File: feature.py From KoSpeech with Apache License 2.0

6 votes

def __init__(self, sample_rate=16000, n_mels=80, window_size=20, stride=10, feature_extract_by='librosa'):
        self.sample_rate = sample_rate
        self.n_mels = n_mels
        self.n_fft = int(sample_rate * 0.001 * window_size)
        self.hop_length = int(sample_rate * 0.001 * stride)
        self.feature_extract_by = feature_extract_by.lower()

        if self.feature_extract_by == 'torchaudio':
            self.transforms = torchaudio.transforms.MelSpectrogram(
                sample_rate=sample_rate,
                win_length=window_size,
                hop_length=self.hop_length,
                n_fft=self.n_fft,
                n_mels=n_mels
            )
            self.amplitude_to_db = torchaudio.transforms.AmplitudeToDB()

Example #5

Source File: feature.py From KoSpeech with Apache License 2.0

6 votes

def __call__(self, signal):
        if self.feature_extract_by == 'torchaudio':
            melspectrogram = self.transforms(torch.FloatTensor(signal))
            melspectrogram = self.amplitude_to_db(melspectrogram)
            melspectrogram = melspectrogram.numpy()

        elif self.feature_extract_by == 'librosa':
            melspectrogram = librosa.feature.melspectrogram(
                y=signal,
                sr=self.sample_rate,
                n_mels=self.n_mels,
                n_fft=self.n_fft,
                hop_length=self.hop_length
            )
            melspectrogram = librosa.amplitude_to_db(melspectrogram, ref=np.max)

        else:
            raise ValueError("Unsupported library : {0}".format(self.feature_extract_by))

        return melspectrogram

Example #6

Source File: audio.py From deep-voice-conversion with MIT License

6 votes

def wav2melspec_db(wav, sr, n_fft, win_length, hop_length, n_mels, normalize=False, max_db=None, min_db=None,
                   time_first=True, **kwargs):
    # Mel-spectrogram
    mel_spec = wav2melspec(wav, sr, n_fft, win_length, hop_length, n_mels, time_first=False, **kwargs)

    # Decibel
    mel_db = librosa.amplitude_to_db(mel_spec)

    # Normalization
    mel_db = normalize_db(mel_db, max_db, min_db) if normalize else mel_db

    # Time-axis first
    if time_first:
        mel_db = mel_db.T  # (t, n_mels)

    return mel_db

Example #7

Source File: test_audio.py From emlearn with MIT License

6 votes

def test_melfilter_librosa():
    filename = librosa.util.example_audio_file()
    y, sr = librosa.load(filename, offset=1.0, duration=0.3)
    n_fft = 1024
    hop_length = 256
    fmin = 500
    fmax = 5000
    n_mels = 16

    spec = numpy.abs(librosa.core.stft(y, n_fft=n_fft, hop_length=hop_length))**2
    spec1 = spec[:,0]

    ref = librosa.feature.melspectrogram(S=spec1, sr=sr, norm=None, htk=True, n_fft=n_fft, n_mels=n_mels, fmin=fmin, fmax=fmax)
    out = eml_audio.melfilter(spec1, sr, n_fft, n_mels, fmin, fmax)

    fig, (ref_ax, out_ax) = plt.subplots(2)
    def specshow(d, ax):
        s = librosa.amplitude_to_db(d, ref=numpy.max)
        librosa.display.specshow(s, ax=ax, x_axis='time')
    specshow(ref.reshape(-1, 1), ax=ref_ax)
    specshow(out.reshape(-1, 1), ax=out_ax)
    fig.savefig('melfilter.librosa.png')

    assert ref.shape == out.shape
    numpy.testing.assert_allclose(ref, out, rtol=0.01)

Example #8

Source File: feature_extract.py From voice-vector with MIT License

6 votes

def wav2melspec_db(wav, sr, n_fft, win_length, hop_length, n_mels, normalize=False, max_db=None, min_db=None, time_first=True, **kwargs):

    # Mel-spectrogram
    mel_spec = wav2melspec(wav, sr, n_fft, win_length, hop_length, n_mels, time_first=False, **kwargs)

    # Decibel
    mel_db = librosa.amplitude_to_db(mel_spec)

    # Normalization
    mel_db = normalize_db(mel_db, max_db, min_db) if normalize else mel_db

    # Time-axis first
    if time_first:
        mel_db = mel_db.T  # (t, n_mels)

    return mel_db

Example #9

Source File: audio.py From voice-vector with MIT License

6 votes

def wav2melspec_db(wav, sr, n_fft, win_length, hop_length, n_mels, normalize=False, max_db=None, min_db=None, time_first=True, **kwargs):

    # Mel-spectrogram
    mel_spec = wav2melspec(wav, sr, n_fft, win_length, hop_length, n_mels, time_first=False, **kwargs)

    # Decibel
    mel_db = librosa.amplitude_to_db(mel_spec)

    # Normalization
    mel_db = normalize_db(mel_db, max_db, min_db) if normalize else mel_db

    # Time-axis first
    if time_first:
        mel_db = mel_db.T  # (t, n_mels)

    return mel_db

Example #10

Source File: utils.py From nussl with MIT License

6 votes

def visualize_spectrogram(audio_signal, ch=0, do_mono=False, x_axis='time', 
                          y_axis='linear',  **kwargs):
    """
    Wrapper around `librosa.display.specshow` for usage with AudioSignals.
    
    Args:
        audio_signal (AudioSignal): AudioSignal to plot
        ch (int, optional): Which channel to plot. Defaults to 0.
        do_mono (bool, optional): Make the AudioSignal mono. Defaults to False.
        x_axis (str, optional): x_axis argument to librosa.display.specshow. Defaults to 'time'.
        y_axis (str, optional): y_axis argument to librosa.display.specshow. Defaults to 'linear'.
        kwargs: Additional keyword arguments to librosa.display.specshow.
    """
    import librosa.display

    if do_mono:
        audio_signal = audio_signal.to_mono(overwrite=False)
    
    data = librosa.amplitude_to_db(np.abs(audio_signal.stft()), ref=np.max)
    librosa.display.specshow(data[..., ch], x_axis=x_axis, y_axis=y_axis, 
        sr=audio_signal.sample_rate, hop_length=audio_signal.stft_params.hop_length,
        **kwargs)

Example #11

Source File: audio.py From Multilingual_Text_to_Speech with MIT License

5 votes

def spectrogram(y, mel=False):
    """Convert waveform to log-magnitude spectrogram."""
    if hp.use_preemphasis: y = preemphasis(y)
    wf = ms_to_frames(hp.stft_window_ms)
    hf = ms_to_frames(hp.stft_shift_ms)
    S = np.abs(librosa.stft(y, n_fft=hp.num_fft, hop_length=hf, win_length=wf))
    if mel: S = librosa.feature.melspectrogram(S=S, sr=hp.sample_rate, n_mels=hp.num_mels)
    return amplitude_to_db(S)

Example #12

Source File: audio.py From Multilingual_Text_to_Speech with MIT License

5 votes

def amplitude_to_db(x):
    """Convert amplitude to decibels."""
    return librosa.amplitude_to_db(x, ref=np.max, top_db=None)

Example #13

Source File: utils.py From PyTorch_Speaker_Verification with BSD 3-Clause "New" or "Revised" License

5 votes

def mfccs_and_spec(wav_file, wav_process = False, calc_mfccs=False, calc_mag_db=False):    
    sound_file, _ = librosa.core.load(wav_file, sr=hp.data.sr)
    window_length = int(hp.data.window*hp.data.sr)
    hop_length = int(hp.data.hop*hp.data.sr)
    duration = hp.data.tisv_frame * hp.data.hop + hp.data.window
    
    # Cut silence and fix length
    if wav_process == True:
        sound_file, index = librosa.effects.trim(sound_file, frame_length=window_length, hop_length=hop_length)
        length = int(hp.data.sr * duration)
        sound_file = librosa.util.fix_length(sound_file, length)
        
    spec = librosa.stft(sound_file, n_fft=hp.data.nfft, hop_length=hop_length, win_length=window_length)
    mag_spec = np.abs(spec)
    
    mel_basis = librosa.filters.mel(hp.data.sr, hp.data.nfft, n_mels=hp.data.nmels)
    mel_spec = np.dot(mel_basis, mag_spec)
    
    mag_db = librosa.amplitude_to_db(mag_spec)
    #db mel spectrogram
    mel_db = librosa.amplitude_to_db(mel_spec).T
    
    mfccs = None
    if calc_mfccs:
        mfccs = np.dot(librosa.filters.dct(40, mel_db.shape[0]), mel_db).T
    
    return mfccs, mel_db, mag_db

Example #14

Source File: data_tools.py From Speech-enhancement with MIT License

5 votes

def audio_to_magnitude_db_and_phase(n_fft, hop_length_fft, audio):
    """This function takes an audio and convert into spectrogram,
       it returns the magnitude in dB and the phase"""

    stftaudio = librosa.stft(audio, n_fft=n_fft, hop_length=hop_length_fft)
    stftaudio_magnitude, stftaudio_phase = librosa.magphase(stftaudio)

    stftaudio_magnitude_db = librosa.amplitude_to_db(
        stftaudio_magnitude, ref=np.max)

    return stftaudio_magnitude_db, stftaudio_phase

Example #15

Source File: plotting.py From DeepSpectrum with GNU General Public License v3.0

5 votes

def plot_spectrogram(audio_data, sr, nfft=None, delta=None, **kwargs):
    spectrogram = librosa.stft(audio_data,
                               n_fft=nfft,
                               hop_length=int(nfft / 2),
                               center=False)
    if delta:
        spectrogram = librosa.feature.delta(spectrogram, order=delta)
    spectrogram = librosa.amplitude_to_db(spectrogram, ref=np.max, top_db=None)
    return _create_plot(spectrogram, sr, nfft, **kwargs)

Example #16

Source File: datautils.py From panotti with MIT License

5 votes

def make_melgram(mono_sig, sr, n_mels=128):   # @keunwoochoi upgraded form 96 to 128 mel bins in kapre
    #melgram = librosa.logamplitude(librosa.feature.melspectrogram(mono_sig,  # latest librosa deprecated logamplitude in favor of amplitude_to_db
    #    sr=sr, n_mels=96),ref_power=1.0)[np.newaxis,np.newaxis,:,:]

    melgram = librosa.amplitude_to_db(librosa.feature.melspectrogram(mono_sig,
        sr=sr, n_mels=n_mels))[np.newaxis,:,:,np.newaxis]     # last newaxis is b/c tensorflow wants 'channels_last' order

    '''
    # librosa docs also include a perceptual CQT example:
    CQT = librosa.cqt(mono_sig, sr=sr, fmin=librosa.note_to_hz('A1'))
    freqs = librosa.cqt_frequencies(CQT.shape[0], fmin=librosa.note_to_hz('A1'))
    perceptual_CQT = librosa.perceptual_weighting(CQT**2, freqs, ref=np.max)
    melgram = perceptual_CQT[np.newaxis,np.newaxis,:,:]
    '''
    return melgram

Example #17

Source File: audio.py From deep-voice-conversion with MIT License

5 votes

def amp2db(amp):
    return librosa.amplitude_to_db(amp)

Example #18

Source File: audio.py From Multilingual_Text_to_Speech with MIT License

5 votes

def linear_to_mel(S):
    """Convert linear to mel spectrogram (this does not return the same spec. as mel_spec. method due to the db->amplitude conversion)."""
    S = db_to_amplitude(S)
    S = librosa.feature.melspectrogram(S=S, sr=hp.sample_rate, n_mels=hp.num_mels)
    return amplitude_to_db(S)

Example #19

Source File: audio.py From parallel-wavenet-vocoder with MIT License

5 votes

def amp2db(amp):
    return librosa.amplitude_to_db(amp)

Example #20

Source File: preprocessing.py From rnnt-speech-recognition with MIT License

5 votes

def plot_spec(spec, sr, transcription, name):

    spec_db = librosa.amplitude_to_db(spec, ref=np.max)

    plt.figure(figsize=(12,4))
    librosa.display.specshow(spec_db, sr=sr,
        x_axis='time', y_axis='mel',
        hop_length=sr * 0.01)
    plt.colorbar(format='%+02.0f dB')
    plt.savefig('figs/{}.png'.format(name))
    plt.clf()

Example #21

Source File: feat_ext.py From LIVE_SER with Apache License 2.0

5 votes

def extract_log_spectrogram_frame(self, frames, file = None, sr = 16000, n_fft=512, hop_length=512):

        #spec = librosa.feature.logfsgram(y=frames, sr=sr, S=None, n_fft=n_fft, hop_length=hop_length)
        spec = np.abs(librosa.stft(frames, n_fft = n_fft))
        log_spec = librosa.amplitude_to_db(spec**2)
        log_spec = log_spec.T

        if file != None:
            np.savetxt(file, log_spec, fmt='%.8e', delimiter=';', newline='\n', header='', footer='')

        return log_spec

Example #22

Source File: features.py From msaf with MIT License

5 votes

def compute_features(self):
        """Actual implementation of the features.

        Returns
        -------
        cqt: np.array(N, F)
            The features, each row representing a feature vector for a give
            time frame/beat.
        """
        linear_cqt = np.abs(librosa.cqt(
            self._audio, sr=self.sr, hop_length=self.hop_length,
            n_bins=self.n_bins, norm=self.norm, filter_scale=self.filter_scale)
                            ) ** 2
        cqt = librosa.amplitude_to_db(linear_cqt, ref=self.ref_power).T
        return cqt

Example #23

Source File: audio.py From BirdCLEF-Baseline with MIT License

5 votes

def stft(sig, rate, shape=(128, 256), fmin=500, fmax=15000, normalize=True):

    # shape = (height, width) in pixels

    # STFT-Spec parameters
    N_FFT = int((rate * shape[0] * 2) / abs(fmax - fmin)) + 1
    P_MIN = int(float(N_FFT / 2) / rate * fmin) + 1
    P_MAX = int(float(N_FFT / 2) / rate * fmax) + 1    
    HOP_LEN = len(sig) // (shape[1] - 1)

    # Librosa stft-spectrum
    spec = librosa.core.stft(sig, hop_length=HOP_LEN, n_fft=N_FFT, window='hamm')

    # Convert power spec to dB scale (compute dB relative to peak power)
    spec = librosa.amplitude_to_db(librosa.core.magphase(spec)[0], ref=np.max, top_db=80)

    # Trim to desired shape using cutoff frequencies
    spec = spec[P_MIN:P_MAX, :shape[1]]

    # Flip spectrum vertically (only for better visialization, low freq. at bottom)
    spec = spec[::-1, ...]    

    # Normalize values between 0 and 1
    if normalize:
        spec -= spec.min()
        if not spec.max() == 0:
            spec /= spec.max()
        else:
            spec = np.clip(spec, 0, 1)    
    
    return spec.astype('float32')

Example #24

Source File: audio.py From BirdCLEF-Baseline with MIT License

5 votes

def melspec(sig, rate, shape=(128, 256), fmin=500, fmax=15000, normalize=True, preemphasis=0.95):

    # shape = (height, width) in pixels

    # Mel-Spec parameters
    SAMPLE_RATE = rate
    N_FFT = shape[0] * 8 # = window length
    N_MELS = shape[0]
    HOP_LEN = len(sig) // (shape[1] - 1)    
    FMAX = fmax
    FMIN = fmin

    # Preemphasis as in python_speech_features by James Lyons
    if preemphasis:
        sig = np.append(sig[0], sig[1:] - preemphasis * sig[:-1])

    # Librosa mel-spectrum
    melspec = librosa.feature.melspectrogram(y=sig, sr=SAMPLE_RATE, hop_length=HOP_LEN, n_fft=N_FFT, n_mels=N_MELS, fmax=FMAX, fmin=FMIN, power=1.0)
    
    # Convert power spec to dB scale (compute dB relative to peak power)
    melspec = librosa.amplitude_to_db(melspec, ref=np.max, top_db=80)

    # Flip spectrum vertically (only for better visialization, low freq. at bottom)
    melspec = melspec[::-1, ...]

    # Trim to desired shape if too large
    melspec = melspec[:shape[0], :shape[1]]

    # Normalize values between 0 and 1
    if normalize:
        melspec -= melspec.min()
        if not melspec.max() == 0:
            melspec /= melspec.max()
        else:
            mlspec = np.clip(melspec, 0, 1)

    return melspec.astype('float32')

Example #25

Source File: mel.py From pumpp with ISC License

5 votes

def transform_audio(self, y):
        '''Compute the Mel spectrogram

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape=(n_frames, n_mels)
                The Mel spectrogram
        '''
        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        mel = np.sqrt(melspectrogram(y=y, sr=self.sr,
                                     n_fft=self.n_fft,
                                     hop_length=self.hop_length,
                                     n_mels=self.n_mels,
                                     fmax=self.fmax))

        mel = fix_length(mel, n_frames)

        if self.log:
            mel = amplitude_to_db(mel, ref=np.max)

        # Type convert
        mel = to_dtype(mel, self.dtype)

        return {'mag': mel.T[self.idx]}

Example #26

Source File: cqt.py From pumpp with ISC License

5 votes

def transform_audio(self, y):
        '''Compute the HCQT

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape = (n_frames, n_bins, n_harmonics)
                The CQT magnitude

            data['phase']: np.ndarray, shape = mag.shape
                The CQT phase
        '''
        cqtm, phase = [], []

        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        for h in self.harmonics:
            C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
                    fmin=self.fmin * h,
                    n_bins=(self.n_octaves * self.over_sample * 12),
                    bins_per_octave=(self.over_sample * 12))

            C = fix_length(C, n_frames)

            C, P = magphase(C)
            if self.log:
                C = amplitude_to_db(C, ref=np.max)
            cqtm.append(C)
            phase.append(P)

        cqtm = to_dtype(np.asarray(cqtm), self.dtype)
        phase = to_dtype(np.angle(np.asarray(phase)), self.dtype)

        return {'mag': self._index(cqtm),
                'phase': self._index(phase)}

Example #27

Source File: cqt.py From pumpp with ISC License

5 votes

def transform_audio(self, y):
        '''Compute the CQT

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape = (n_frames, n_bins)
                The CQT magnitude

            data['phase']: np.ndarray, shape = mag.shape
                The CQT phase
        '''
        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
                fmin=self.fmin,
                n_bins=(self.n_octaves * self.over_sample * 12),
                bins_per_octave=(self.over_sample * 12))

        C = fix_length(C, n_frames)

        cqtm, phase = magphase(C)
        if self.log:
            cqtm = amplitude_to_db(cqtm, ref=np.max)

        dphase = phase_diff(np.angle(phase).T[self.idx], self.conv)

        return {'mag': to_dtype(cqtm.T[self.idx], self.dtype),
                'dphase': to_dtype(dphase, self.dtype)}

Example #28

Source File: cqt.py From pumpp with ISC License

5 votes

def transform_audio(self, y):
        '''Compute the CQT

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape = (n_frames, n_bins)
                The CQT magnitude

            data['phase']: np.ndarray, shape = mag.shape
                The CQT phase
        '''
        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
                fmin=self.fmin,
                n_bins=(self.n_octaves * self.over_sample * 12),
                bins_per_octave=(self.over_sample * 12))

        C = fix_length(C, n_frames)

        cqtm, phase = magphase(C)
        if self.log:
            cqtm = amplitude_to_db(cqtm, ref=np.max)

        return {'mag': to_dtype(cqtm.T[self.idx], self.dtype),
                'phase': to_dtype(np.angle(phase).T[self.idx], self.dtype)}

Example #29

Source File: fft.py From pumpp with ISC License

5 votes

def transform_audio(self, y):
        '''Compute the STFT magnitude and phase differential.

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
                STFT magnitude

            data['dphase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
                STFT phase
        '''
        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        D = stft(y, hop_length=self.hop_length,
                 n_fft=self.n_fft)

        D = fix_length(D, n_frames)

        mag, phase = magphase(D)
        if self.log:
            mag = amplitude_to_db(mag, ref=np.max)

        phase = phase_diff(np.angle(phase.T)[self.idx], self.conv)

        return {'mag': to_dtype(mag.T[self.idx], self.dtype),
                'dphase': to_dtype(phase, self.dtype)}

Example #30

Source File: fft.py From pumpp with ISC License

5 votes

def transform_audio(self, y):
        '''Compute the STFT magnitude and phase.

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
                STFT magnitude

            data['phase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
                STFT phase
        '''
        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        D = stft(y, hop_length=self.hop_length,
                 n_fft=self.n_fft)

        D = fix_length(D, n_frames)

        mag, phase = magphase(D)
        if self.log:
            mag = amplitude_to_db(mag, ref=np.max)

        return {'mag': to_dtype(mag.T[self.idx], self.dtype),
                'phase': to_dtype(np.angle(phase.T)[self.idx], self.dtype)}