Python librosa.to_mono() Examples

The following are 15 code examples of librosa.to_mono(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module librosa , or try the search function

Example #1

Source File: test_deformation.py From amen with BSD 2-Clause "Simplified" License

6 votes

def test_pitch_shift():
    shift_amount = 4
    step_size = 24
    pitch_shift_audio = pitch_shift(mono_audio, shift_amount, step_size=step_size)
    test_pitch_shift = librosa.effects.pitch_shift(
        librosa.to_mono(mono_audio.raw_samples),
        mono_audio.sample_rate,
        shift_amount,
        bins_per_octave=step_size,
    )
    test_pitch_shift_audio = Audio(
        raw_samples=test_pitch_shift, sample_rate=mono_audio.sample_rate
    )
    assert np.allclose(
        pitch_shift_audio.raw_samples,
        test_pitch_shift_audio.raw_samples,
        rtol=1e-3,
        atol=1e-4,
    )

Example #2

Source File: deformation.py From amen with BSD 2-Clause "Simplified" License

6 votes

def harmonic_separation(audio, margin=3.0):
    """
    Wraps librosa's `harmonic` function, and returns a new Audio object.
    Note that this folds to mono.

    Parameters
    ---------
    audio : Audio
        The Audio object to act on.

    margin : float
        The larger the margin, the larger the separation.
        The default is `3.0`.
    """
    harmonic = librosa.effects.harmonic(
        librosa.to_mono(audio.raw_samples), margin=margin
    )
    harmonic_audio = Audio(raw_samples=harmonic, sample_rate=audio.sample_rate)

    return harmonic_audio

Example #3

Source File: deformation.py From amen with BSD 2-Clause "Simplified" License

6 votes

def percussive_separation(audio, margin=3.0):
    """
    Wraps librosa's `percussive` function, and returns a new Audio object.
    Note that this folds to mono.

    Parameters
    ---------
    audio : Audio
        The Audio object to act on.

    margin : float
        The larger the margin, the larger the separation.
        The default is `3.0`.
    """
    percussive = librosa.effects.percussive(
        librosa.to_mono(audio.raw_samples), margin=margin
    )
    percussive_audio = Audio(raw_samples=percussive, sample_rate=audio.sample_rate)

    return percussive_audio

Example #4

Source File: audio.py From audiomate with MIT License

6 votes

def process_buffer(buffer, n_channels):
    """
    Merge the read blocks and resample if necessary.

    Args:
        buffer (list): A list of blocks of samples.
        n_channels (int): The number of channels of the input data.

    Returns:
        np.array: The samples
    """
    samples = np.concatenate(buffer)

    if n_channels > 1:
        samples = samples.reshape((-1, n_channels)).T
        samples = librosa.to_mono(samples)

    return samples

Example #5

Source File: preprocess.py From Singing_Voice_Separation_RNN with MIT License

6 votes

def load_wavs(filenames, sr):

    wavs_mono = list()
    wavs_src1 = list()
    wavs_src2 = list()

    for filename in filenames:
        wav, _ = librosa.load(filename, sr = sr, mono = False)
        assert (wav.ndim == 2) and (wav.shape[0] == 2), 'Require wav to have two channels'
        wav_mono = librosa.to_mono(wav) * 2 # Cancelling average
        wav_src1 = wav[0, :]
        wav_src2 = wav[1, :]
        wavs_mono.append(wav_mono)
        wavs_src1.append(wav_src1)
        wavs_src2.append(wav_src2)

    return wavs_mono, wavs_src1, wavs_src2

Example #6

Source File: preprocess.py From Singing_Voice_Separation_RNN with MIT License

6 votes

def get_random_wav(filename, sr, duration):

    # Get a random range from wav

    wav, _ = librosa.load(filename, sr = sr, mono = False)
    print(wav)
    assert (wav.ndim == 2) and (wav.shape[0] == 2), 'Require wav to have two channels'

    wav_pad = pad_wav(wav = wav, sr = sr, duration = duration)
    wav_sample = sample_range(wav = wav, sr = sr, duration = duration)

    wav_sample_mono = librosa.to_mono(wav_sample)
    wav_sample_src1 = wav_sample[0, :]
    wav_sample_src2 = wav_sample[1, :]

    return wav_sample_mono, wav_sample_src1, wav_sample_src2

Example #7

Source File: test_deformation.py From amen with BSD 2-Clause "Simplified" License

5 votes

def test_time_stretch():
    stretch_amount = 1.5
    time_stretch_audio = time_stretch(mono_audio, stretch_amount)
    test_time_stretch = librosa.effects.time_stretch(
        librosa.to_mono(mono_audio.raw_samples), stretch_amount
    )
    test_time_stretch_audio = Audio(
        raw_samples=test_time_stretch, sample_rate=mono_audio.sample_rate
    )
    assert np.allclose(
        time_stretch_audio.raw_samples,
        test_time_stretch_audio.raw_samples,
        rtol=1e-3,
        atol=1e-4,
    )

Example #8

Source File: test_deformation.py From amen with BSD 2-Clause "Simplified" License

5 votes

def test_harmonic():
    harmonic_audio = harmonic_separation(mono_audio)
    test_harmonic = librosa.effects.harmonic(
        librosa.to_mono(mono_audio.raw_samples), margin=3.0
    )
    test_harmonic_audio = Audio(
        raw_samples=test_harmonic, sample_rate=mono_audio.sample_rate
    )
    assert np.allclose(
        harmonic_audio.raw_samples,
        test_harmonic_audio.raw_samples,
        rtol=1e-3,
        atol=1e-4,
    )

Example #9

Source File: test_deformation.py From amen with BSD 2-Clause "Simplified" License

5 votes

def test_percussive():
    percussive_audio = percussive_separation(mono_audio)
    test_percussive = librosa.effects.percussive(
        librosa.to_mono(mono_audio.raw_samples), margin=3.0
    )
    test_percussive_audio = Audio(
        raw_samples=test_percussive, sample_rate=mono_audio.sample_rate
    )
    assert np.allclose(
        percussive_audio.raw_samples,
        test_percussive_audio.raw_samples,
        rtol=1e-3,
        atol=1e-4,
    )

Example #10

Source File: deformation.py From amen with BSD 2-Clause "Simplified" License

5 votes

def pitch_shift(audio, steps, step_size=12):
    """
    Wraps librosa's `pitch_shift` function, and returns a new Audio object.
    Note that this folds to mono.

    Parameters
    ---------
    audio : Audio
        The Audio object to act on.

    steps : float
        The pitch shift amount.
        The default unit is semitones, as set by `step_size`.

    step_size : float > 0
        The number of equal-tempered steps per octave.
        The default is semitones, as set by `step_size=12`.
        Quarter-tones, for example, would be `step_size=24`.
    """
    shifted = librosa.effects.pitch_shift(
        librosa.to_mono(audio.raw_samples),
        audio.sample_rate,
        steps,
        bins_per_octave=step_size,
    )
    stretched_audio = Audio(raw_samples=shifted, sample_rate=audio.sample_rate)

    return stretched_audio

Example #11

Source File: audio.py From amen with BSD 2-Clause "Simplified" License

4 votes

def __init__(
        self,
        file_path=None,
        raw_samples=None,
        convert_to_mono=False,
        sample_rate=44100,
        analysis_sample_rate=22050,
    ):
        """
        Audio constructor.
        Opens a file path, loads the audio with librosa, and prepares the features

        Parameters
        ----------

        file_path: string
            path to the audio file to load

        raw_samples: np.array
            samples to use for audio output

        convert_to_mono: boolean
            (optional) converts the file to mono on loading

        sample_rate: number > 0 [scalar]
            (optional) sample rate to pass to librosa.


        Returns
        ------
        An Audio object
        """

        if file_path:
            y, sr = librosa.load(file_path, mono=convert_to_mono, sr=sample_rate)
        elif raw_samples is not None:
            # This assumes that we're passing in raw_samples
            # directly from another Audio's raw_samples.
            y = raw_samples
            sr = sample_rate

        self.file_path = file_path
        self.sample_rate = float(sr)
        self.analysis_sample_rate = float(analysis_sample_rate)
        self.num_channels = y.ndim
        self.duration = librosa.get_duration(y=y, sr=sr)

        self.analysis_samples = librosa.resample(
            librosa.to_mono(y), sr, self.analysis_sample_rate, res_type='kaiser_best'
        )
        self.raw_samples = np.atleast_2d(y)

        self.zero_indexes = self._create_zero_indexes()
        self.features = self._create_features()
        self.timings = self._create_timings()

Example #12

Source File: pncc.py From PNCC with MIT License

4 votes

def pncc(audio_wave, n_fft=512, sr=16000, winlen=0.020, winstep=0.010,
         n_mels=128, n_pncc=13, weight_N=4, power=2):

    pre_emphasis_signal = scipy.signal.lfilter([1.0, -0.97], 1, audio_wave)
    mono_wave = to_mono(pre_emphasis_signal.T)
    stft_pre_emphasis_signal = np.abs(stft(mono_wave,
                                           n_fft=n_fft,
                                           hop_length=int(sr * winstep),
                                           win_length=int(sr * winlen),
                                           window=np.ones(int(sr * winlen)),
                                           center=False)) ** power

    mel_filter = np.abs(filters.mel(sr, n_fft=n_fft, n_mels=n_mels)) ** power
    power_stft_signal = np.dot(stft_pre_emphasis_signal.T, mel_filter.T)

    medium_time_power = medium_time_power_calculation(power_stft_signal)

    lower_envelope = asymmetric_lawpass_filtering(
        medium_time_power, 0.999, 0.5)

    subtracted_lower_envelope = medium_time_power - lower_envelope

    rectified_signal = halfwave_rectification(subtracted_lower_envelope)

    floor_level = asymmetric_lawpass_filtering(rectified_signal)

    temporal_masked_signal = temporal_masking(rectified_signal)

    final_output = switch_excitation_or_non_excitation(
        temporal_masked_signal, floor_level, lower_envelope,
        medium_time_power)

    spectral_weight_smoothing = weight_smoothing(
        final_output, medium_time_power, L=n_mels)

    transfer_function = time_frequency_normalization(
        power_stft_signal,
        spectral_weight_smoothing)

    normalized_power = mean_power_normalization(
        transfer_function, final_output, L=n_mels)

    power_law_nonlinearity = power_function_nonlinearity(normalized_power)

    dct = np.dot(power_law_nonlinearity, filters.dct(
        n_pncc, power_law_nonlinearity.shape[1]).T)

    return dct

Example #13

Source File: audio_io.py From synvae with MIT License

4 votes

def wav_data_to_samples(wav_data, sample_rate):
  """Read PCM-formatted WAV data and return a NumPy array of samples.

  Uses scipy to read and librosa to process WAV data. Audio will be converted to
  mono if necessary.

  Args:
    wav_data: WAV audio data to read.
    sample_rate: The number of samples per second at which the audio will be
        returned. Resampling will be performed if necessary.

  Returns:
    A numpy array of audio samples, single-channel (mono) and sampled at the
    specified rate, in float32 format.

  Raises:
    AudioIOReadError: If scipy is unable to read the WAV data.
    AudioIOError: If audio processing fails.
  """
  try:
    # Read the wav file, converting sample rate & number of channels.
    native_sr, y = scipy.io.wavfile.read(six.BytesIO(wav_data))
  except Exception as e:  # pylint: disable=broad-except
    raise AudioIOReadError(e)

  if y.dtype == np.int16:
    # Convert to float32.
    y = int16_samples_to_float32(y)
  elif y.dtype == np.float32:
    # Already float32.
    pass
  else:
    raise AudioIOError(
        'WAV file not 16-bit or 32-bit float PCM, unsupported')
  try:
    # Convert to mono and the desired sample rate.
    if y.ndim == 2 and y.shape[1] == 2:
      y = y.T
      y = librosa.to_mono(y)
    if native_sr != sample_rate:
      y = librosa.resample(y, native_sr, sample_rate)
  except Exception as e:  # pylint: disable=broad-except
    raise AudioIOError(e)
  return y

Example #14

Source File: sox.py From muda with ISC License

4 votes

def __sox(y, sr, *args):
    """Execute sox

    Parameters
    ----------
    y : np.ndarray
        Audio time series

    sr : int > 0
        Sampling rate of `y`

    *args
        Additional arguments to sox

    Returns
    -------
    y_out : np.ndarray
        `y` after sox transformation
    """

    assert sr > 0

    fdesc, infile = tempfile.mkstemp(suffix=".wav")
    os.close(fdesc)
    fdesc, outfile = tempfile.mkstemp(suffix=".wav")
    os.close(fdesc)

    # Dump the audio
    psf.write(infile, y, sr)

    try:
        arguments = ["sox", infile, outfile, "-q"]
        arguments.extend(args)

        subprocess.check_call(arguments)

        y_out, sr = psf.read(outfile)
        y_out = y_out.T
        if y.ndim == 1:
            y_out = librosa.to_mono(y_out)

    finally:
        os.unlink(infile)
        os.unlink(outfile)

    return y_out

Example #15

Source File: background.py From muda with ISC License

4 votes

def slice_clip(filename, start, stop, n_samples, sr, mono=True):
    """Slice a fragment of audio from a file.

    This uses pysoundfile to efficiently seek without
    loading the entire stream.

    Parameters
    ----------
    filename : str
        Path to the input file

    start : int
        The sample index of `filename` at which the audio fragment should start

    stop : int
        The sample index of `filename` at which the audio fragment should stop (e.g. y = audio[start:stop])

    n_samples : int > 0
        The number of samples to load

    sr : int > 0
        The target sampling rate

    mono : bool
        Ensure monophonic audio

    Returns
    -------
    y : np.ndarray [shape=(n_samples,)]
        A fragment of audio sampled from `filename`

    Raises
    ------
    ValueError
        If the source file is shorter than the requested length

    """

    with psf.SoundFile(str(filename), mode="r") as soundf:
        n_target = stop - start

        soundf.seek(start)

        y = soundf.read(n_target).T

        if mono:
            y = librosa.to_mono(y)

        # Resample to initial sr
        y = librosa.resample(y, soundf.samplerate, sr)

        # Clip to the target length exactly
        y = librosa.util.fix_length(y, n_samples)

        return y