Python scipy.io.wavfile.read() Examples

The following are 30 code examples of scipy.io.wavfile.read(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.io.wavfile , or try the search function

Example #1

Source File: vggish_input.py From MAX-Audio-Classifier with Apache License 2.0

7 votes

def wavfile_to_examples(wav_file):
    """Convenience wrapper around waveform_to_examples() for a common WAV format.

    Args:
      wav_file: String path to a file, or a file-like object. The file
      is assumed to contain WAV audio data with signed 16-bit PCM samples.

    Returns:
      See waveform_to_examples.
    """
    try:
        wav_file = BytesIO(wav_file)
        sr, wav_data = wavfile.read(wav_file)
    except IOError:
        print("Error reading WAV file!")
        print("The specified WAV file type is not supported by scipy.io.wavfile.read()")
        sys.exit(1)

    if wav_data.dtype != np.int16:
        raise TypeError('Bad sample type: %r' % wav_data.dtype)
    samples = wav_data / 32768.0  # Convert to [-1.0, +1.0]
    return waveform_to_examples(samples, sr)

Example #2

Source File: feat_extract.py From persephone with Apache License 2.0

7 votes

def fbank(wav_path, flat=True):
    """ Currently grabs log Mel filterbank, deltas and double deltas."""

    (rate, sig) = wav.read(wav_path)
    if len(sig) == 0:
        logger.warning("Empty wav: {}".format(wav_path))
    fbank_feat = python_speech_features.logfbank(sig, rate, nfilt=40)
    energy = extract_energy(rate, sig)
    feat = np.hstack([energy, fbank_feat])
    delta_feat = python_speech_features.delta(feat, 2)
    delta_delta_feat = python_speech_features.delta(delta_feat, 2)
    all_feats = [feat, delta_feat, delta_delta_feat]
    if not flat:
        all_feats = np.array(all_feats)
        # Make time the first dimension for easy length normalization padding
        # later.
        all_feats = np.swapaxes(all_feats, 0, 1)
        all_feats = np.swapaxes(all_feats, 1, 2)
    else:
        all_feats = np.concatenate(all_feats, axis=1)

    # Log Mel Filterbank, with delta, and double delta
    feat_fn = wav_path[:-3] + "fbank.npy"
    np.save(feat_fn, all_feats)

Example #3

Source File: test_wavfile.py From Computable with MIT License

6 votes

def test_read_1():
    for mmap in [False, True]:
        warn_ctx = WarningManager()
        warn_ctx.__enter__()
        try:
            warnings.simplefilter('ignore', wavfile.WavFileWarning)
            rate, data = wavfile.read(datafile('test-44100-le-1ch-4bytes.wav'),
                                      mmap=mmap)
        finally:
            warn_ctx.__exit__()

        assert_equal(rate, 44100)
        assert_(np.issubdtype(data.dtype, np.int32))
        assert_equal(data.shape, (4410,))

        del data

Example #4

Source File: audio.py From signaltrain with GNU General Public License v3.0

6 votes

def triangle(t, randfunc=np.random.rand, t0_fac=None): # ramp up then down
    height = (0.4 * randfunc() + 0.4) * np.random.choice([-1,1])
    width = randfunc()/4 * t[-1]     # half-width actually
    t0 = 2*width + 0.4 * randfunc()*t[-1] if t0_fac is None else t0_fac*t[-1]
    x = height * (1 - np.abs(t-t0)/width)
    x[np.where(t < (t0-width))] = 0
    x[np.where(t > (t0+width))] = 0
    amp_n = (0.1*randfunc()+0.02)   # add noise
    return x + amp_n*pinknoise(t.shape[0])


# Prelude to read_audio_file
# Tried lots of ways of doing this.. most are slow.
#signal, rate = librosa.load(filename, sr=sr, mono=True, res_type='kaiser_fast') # Librosa's reader is incredibly slow. do not use
#signal, rate = torchaudio.load(filename)#, normalization=True)   # Torchaudio's reader is pretty fast but normalization is a problem
#signal = signal.numpy().flatten()
#reader = io_methods.AudioIO   # Stylios' file reader. Haven't gotten it working yet
#signal, rate = reader.audioRead(filename, mono=True)
#signal, rate = sf.read('existing_file.wav')

Example #5

Source File: audio.py From anica with MIT License

6 votes

def get_data():
    """The data files come from the TSP dataset 16k set"""
    _, man = wavfile.read('./wav_data/MA02_04.wav')
    rate, woman = wavfile.read('./wav_data/FA01_03.wav')
    man = man.astype('float32')
    woman = woman.astype('float32')
    man_max = np.max(man)
    woman_max = np.max(woman)
    man /= man_max
    woman /= woman_max
    shortest = min(len(man), len(woman))
    woman = woman[:shortest]
    man = man[:shortest]
    np.random.seed(101)
    noise = np.random.uniform(-1, 1, len(man))
    sources = np.stack((woman, man, noise))
    A = np.random.uniform(-1, 1, (3, 3))
    linear_mix = np.dot(A, sources)
    pnl_mix = linear_mix.copy()
    pnl_mix[0] = np.tanh(pnl_mix[0])
    pnl_mix[1] = (pnl_mix[1] + pnl_mix[1]**3) / 2
    pnl_mix[2] = np.exp(pnl_mix[2])
    return linear_mix, pnl_mix, A, sources

Example #6

Source File: audio.py From signaltrain with GNU General Public License v3.0

6 votes

def __init__(self, path, sr=44100, ):
        super(FileEffect, self).__init__()
        print("  FileEffect: path = ",path)
        if (path is None) or (not glob.glob(path+"/Train/target*")) \
            or (not glob.glob(path+"/Val/target*")) or ((not glob.glob(path+"/effect_info.ini"))):
            print(f"Error: can't file target output files or effect_info.ini in path = {path}")
            sys.exit(1)   # Yea, this is fatal

        self.sr = sr
        # read the effect info config file  "effect_info.ini"
        config = configparser.ConfigParser()
        config.read(path+'/effect_info.ini')
        self.name = config['effect']['name']+"(files)"   # tack on "(files)" to the effect name
        #TODO: note that use of 'eval' below could be a potential security issue
        self.knob_names = eval(config.get("effect","knob_names"))
        self.knob_ranges = np.array(eval(config.get("effect","knob_ranges")))
        try:
            self.is_inverse = (True == bool(config['effect']['inverse']) )
            self.name = "De-"+self.name
        except:
            pass   # Ignore errors we don't require that 'inverse' be defined anywhere in the file

Example #7

Source File: highest_peak_method.py From Automated_Music_Transcription with MIT License

6 votes

def detect_MIDI_notes(self):
        """
            The algorithm for calculating midi notes from a given wav file.
        """

        (framerate, sample) = wav.read(self.wav_file)
        # We need to change the 2 channels into one because STFT works only
        # for 1 channel. We could also do STFT for each channel separately.
        monoChannel = sample.mean(axis=1)
        duration = getDuration(self.wav_file)
        midi_notes = []

        # Consider only files with a duration longer than 0.2 seconds.
        if duration > 0.18:
            frequency_power = self.calculateFFT(duration, framerate, monoChannel)
            filtered_frequencies = [f for (f, p) in frequency_power]
            #self.plot_power_spectrum(frequency_power)
            #self.plot_power_spectrum_dB(frequency_power)
            f0_candidates = self.get_pitch_candidates_remove_highest_peak(frequency_power)
            midi_notes = self.matchWithMIDINotes(f0_candidates)
        return midi_notes

Example #8

Source File: mfcc_pca_feature.py From Speech_Signal_Processing_and_Classification with MIT License

6 votes

def mainParkinson():
	general_feature_list = []
	general_label_list = []
	folder = raw_input('Give the name of the folder that you want to read data: ')
	if(folder == 'PD'):
		healthyCases = os.listdir('/home/gionanide/Theses_2017-2018_2519/Gkagkos/Audio_Files/PD')
		for x in healthyCases:
			wav = '/'+folder+'/'+str(x)
			mfcc_features,inputWav = mfcc_features_extraction(wav)
			mean_features(mfcc_features,inputWav,folder,general_feature_list,general_label_list)
		folder = raw_input('Give the name of the folder that you want to read data: ')
		if(folder == 'HC'):
			parkinsonCases = os.listdir('/home/gionanide/Theses_2017-2018_2519/Gkagkos/Audio_Files/HC')
			for x in parkinsonCases:
				wav = '/'+folder+'/'+str(x)
				mfcc_features,inputWav = mfcc_features_extraction(wav)
				mean_features(mfcc_features,inputWav,folder,general_feature_list,general_label_list)
		#print general_feature_list, general_label_list
		#writeFeatures(general_feature_list,general_label_list,wav,folder)
		classifyPHC(general_feature_list,general_label_list)

Example #9

Source File: helpers_legacy.py From opensauce-python with Apache License 2.0

6 votes

def wavread(fn):
    """Emulate the parts of the matlab wavread function that we need.

        y, Fs = wavread(fn)

    y is the vector of audio samples, Fs is the frame rate.

    Matlab's wavread is used by voicesauce to read in the wav files for
    processing.  As a consequence, all the translated algorithms assume the
    data from the wav file is in matlab form, which in this case means a double
    precision float between -1 and 1.  The corresponding scipy function returns
    the actual integer PCM values from the file, which range between -32768 and
    32767.  (matlab's wavread *can* return the integers, but does not by
    default and voicesauce uses the default).  Consequently, after reading the
    data using scipy's io.wavfile, we convert to float by dividing each integer
    by 32768.

    """
    # For reference, I figured this out from:
    # http://mirlab.org/jang/books/audiosignalprocessing/matlab4waveRead.asp?title=4-2%20Reading%20Wave%20Files
    # XXX: if we need to handle 8 bit files we'll need to detect them and
    # special case them here.
    Fs, y = wavfile.read(fn)
    return y/numpy.float64(32768.0), Fs

Example #10

Source File: io.py From pydiogment with BSD 3-Clause "New" or "Revised" License

6 votes

def read_file(filename):
    """
    Read wave file as mono.

    Args:
        - filename (str) : wave file / path.

    Returns:
        tuple of sampling rate and audio data.
    """
    fs, sig = read(filename=filename)
    if (sig.ndim == 1):
        samples = sig
    else:
        samples = sig[:, 0]
    return fs, samples

Example #11

Source File: create_spectrograms.py From Spoken-language-identification with MIT License

6 votes

def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="gray", channel=0, name='tmp.png', alpha=1, offset=0):
    samplerate, samples = wav.read(audiopath)
    samples = samples[:, channel]
    s = stft(samples, binsize)

    sshow, freq = logscale_spec(s, factor=1, sr=samplerate, alpha=alpha)
    sshow = sshow[2:, :]
    ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel
    timebins, freqbins = np.shape(ims)
    
    ims = np.transpose(ims)
    # ims = ims[0:256, offset:offset+768] # 0-11khz, ~9s interval
    ims = ims[0:256, :] # 0-11khz, ~10s interval
    #print "ims.shape", ims.shape
    
    image = Image.fromarray(ims) 
    image = image.convert('L')
    image.save(name)

Example #12

Source File: utils.py From sklearn-audio-transfer-learning with ISC License

6 votes

def wavefile_to_waveform(wav_file, features_type):
    data, sr = sf.read(wav_file)
    if features_type == 'vggish':
        tmp_name = str(int(np.random.rand(1)*1000000)) + '.wav'
        sf.write(tmp_name, data, sr, subtype='PCM_16')
        sr, wav_data = wavfile.read(tmp_name)
        os.remove(tmp_name)
        # sr, wav_data = wavfile.read(wav_file) # as done in VGGish Audioset
        assert wav_data.dtype == np.int16, 'Bad sample type: %r' % wav_data.dtype
        data = wav_data / 32768.0  # Convert to [-1.0, +1.0]
  
    # at least one second of samples, if not repead-pad
    src_repeat = data
    while (src_repeat.shape[0] < sr): 
        src_repeat = np.concatenate((src_repeat, data), axis=0)
        data = src_repeat[:sr]

    return data, sr

Example #13

Source File: mgca.py From Speech_Signal_Processing_and_Classification with MIT License

5 votes

def readWavFile(wav):
	#given a path from the keyboard to read a .wav file
	#wav = raw_input('Give me the path of the .wav file you want to read: ')
	inputWav = 'PATH_TO_WAV'+wav
	return inputWav

#reading the .wav file (signal file) and extract the information we need

Example #14

Source File: mfcc.py From Speech_Signal_Processing_and_Classification with MIT License

5 votes

def main():
	folder = raw_input('Give the name of the folder that you want to read data: ')
	amount = raw_input('Give the number of samples in the specific folder: ')
	for x in range(1,int(amount)):
		wav = '/'+folder+'/'+str(x)+'.wav'
		print wav
		mfcc_features,inputWav = mfcc_features_extraction(wav)
		mean_features(mfcc_features,inputWav)

Example #15

Source File: mfcc.py From Speech_Signal_Processing_and_Classification with MIT License

5 votes

def readWavFile(wav):
	#given a path from the keyboard to read a .wav file
	#wav = raw_input('Give me the path of the .wav file you want to read: ')
	inputWav = 'PATH_TO_WAV'+wav
	return inputWav,wav

Example #16

Source File: main.py From uncaptcha with MIT License

5 votes

def get_numbers(audio_file, parent_dir):
    global AMP_THRESHOLD
    mp3_file = audio_file + ".mp3"
    wav_file = audio_file + ".wav"
    print("converting from " + mp3_file + " to " + wav_file)
    os.system("echo 'y' | ffmpeg -i "+mp3_file+" "+wav_file + "&> /dev/null")
    # split audio file on silence
    os.system("sox -V3 "+wav_file+" "+audio_file+"_.wav silence -l 0 1 0.5 0.1% : newfile : restart &> /dev/null")
    files = [f for f in os.listdir(parent_dir) if "_0" in f]
    audio_filenames = []
    # remove audio files that are only silence
    for f in files:
        _, snd = wavfile.read(TASK_PATH + "/" + f)
        amp = max(snd)
        print(f + ":" + str(amp))
        if amp > AMP_THRESHOLD: # skip this file
            audio_filenames.append(parent_dir+f)
        else:
            os.system("rm " + parent_dir+f)
    # run speech recognition on the individual numbers
    # num_str = ""
    # for f in sorted(audio_filenames):
    #     print f
    #     num_str += str(audio.getNum(f))
    # print(num_str)
    return audio.getNums(TASK_PATH, audio_filenames)

Example #17

Source File: utilities.py From speech-emotion-recognition with MIT License

5 votes

def get_feature_vector_from_mfcc(file_path: str, flatten: bool,
                                 mfcc_len: int = 39) -> np.ndarray:
    """
    Make feature vector from MFCC for the given wav file.

    Args:
        file_path (str): path to the .wav file that needs to be read.
        flatten (bool) : Boolean indicating whether to flatten mfcc obtained.
        mfcc_len (int): Number of cepestral co efficients to be consider.

    Returns:
        numpy.ndarray: feature vector of the wav file made from mfcc.
    """
    fs, signal = wav.read(file_path)
    s_len = len(signal)
    # pad the signals to have same size if lesser than required
    # else slice them
    if s_len < mean_signal_length:
        pad_len = mean_signal_length - s_len
        pad_rem = pad_len % 2
        pad_len //= 2
        signal = np.pad(signal, (pad_len, pad_len + pad_rem),
                        'constant', constant_values=0)
    else:
        pad_len = s_len - mean_signal_length
        pad_len //= 2
        signal = signal[pad_len:pad_len + mean_signal_length]
    mel_coefficients = mfcc(signal, fs, num_cepstral=mfcc_len)
    if flatten:
        # Flatten the data
        mel_coefficients = np.ravel(mel_coefficients)
    return mel_coefficients

Example #18

Source File: mgca.py From Speech_Signal_Processing_and_Classification with MIT License

5 votes

def initialize(inputWav):
	rate , signal  = wav.read(readWavFile(inputWav)) # returns a wave_read object , rate: sampling frequency 
	sig = wave.open(readWavFile(inputWav))
	# signal is the numpy 2D array with the date of the .wav file
	# len(signal) number of samples
	sampwidth = sig.getsampwidth()
	print 'The sample rate of the audio is: ',rate
	print 'Sampwidth: ',sampwidth	
	return signal ,  rate 

#implementation of the low-pass filter

Example #19

Source File: prepare_data.py From se_relativisticgan with MIT License

5 votes

def prepare_sliced_data1d(opts):
    wavfolder = opts['wavfolder']
    window_size = opts['window_size']
    stride = opts['stride']
    minlength = opts['minlength']
    filenames = opts['filenames']
    
    full_sliced = [] # initialize empty list
    dfi = []
    dfi_begin = 0
    with open(filenames) as f:
        wav_files = f.read().splitlines() # to get rid of the \n while using readlines()
    print ("**** Reading from " + wavfolder)
    print ("**** The folder has " + str(len(wav_files)) + " files.")
    for ind, wav_file in enumerate(wav_files):
        if ind % 10 == 0 :
            print("Processing " + str(ind) + " of " + str(len(wav_files)) + " files.")
        wavfilename = os.path.join(wavfolder, wav_file)
        sliced = read_and_slice1d(wavfilename, window_size, minlength, stride=stride)
        full_sliced.append(sliced)
        dfi.append(np.array([[dfi_begin, dfi_begin + sliced.shape[0]]]))
        dfi_begin += sliced.shape[0]

    full_slicedstack = np.vstack(full_sliced)
    dfistack = np.vstack(dfi)

    return full_slicedstack, dfistack.astype('int')

Example #20

Source File: mfcc_pca_feature.py From Speech_Signal_Processing_and_Classification with MIT License

5 votes

def mainMaleFemale():
	folder = raw_input('Give the name of the folder that you want to read data: ')
	amount = raw_input('Give the number of samples in the specific folder: ')
	for x in range(1,int(amount)+1):
		wav = '/'+folder+'/'+str(x)+'.wav'
		print wav
		mfcc_features,inputWav = mfcc_features_extraction(wav)
		mean_features(mfcc_features,inputWav,folder)

Example #21

Source File: run_audio_attack.py From Black-Box-Audio with MIT License

5 votes

def load_wav(input_wav_file):
    # Load the inputs that we're given
    fs, audio = wav.read(input_wav_file)
    assert fs == 16000
    print('source dB', db(audio))
    return audio

Example #22

Source File: least_squares_first_peaks_2.py From Automated_Music_Transcription with MIT License

5 votes

def Pertusa_Inesta_Algorithm(self):
        (framerate, sample) = wav.read(self.wav_file)
        monoChannel = sample.mean(axis=1)
        stft = self.STFT(monoChannel, framerate, self.HAN_WINDOW, self.HOP_SIZE)
        (stft_bin_freqs, stft_magnitudes) = self.get_stft_bin_freqs(stft, framerate)
        print stft_bin_freqs
        #print self.get_candidates_with_partials(stft_bin_freqs[0], stft_magnitudes[0])

        '''
        midi_notes = []
        for freqs in stftBinFrequencies:
            midi_notes.append(self.matchWithMIDINotes(sorted(freqs.keys())))
        print midi_notes[0]
        return midi_notes
        '''

Example #23

Source File: audio_read.py From audfprint with MIT License

5 votes

def read_data(self, timeout=10.0):
        """Read blocks of raw PCM data from the file."""
        # Read from stdout in a separate thread and consume data from
        # the queue.
        start_time = time.time()
        while True:
            # Wait for data to be available or a timeout.
            data = None
            try:
                data = self.stdout_reader.queue.get(timeout=timeout)
                if data:
                    yield data
                else:
                    # End of file.
                    break
            except queue.Empty:
                # Queue read timed out.
                end_time = time.time()
                if not data:
                    if end_time - start_time >= timeout:
                        # Nothing interesting has happened for a while --
                        # FFmpeg is probably hanging.
                        raise ValueError('ffmpeg output: {}'.format(
                                ''.join(self.stderr_reader.queue.queue)
                        ))
                    else:
                        start_time = end_time
                        # Keep waiting.
                        continue

Example #24

Source File: audio_read.py From audfprint with MIT License

5 votes

def __init__(self, filename, channels=None, sample_rate=None, block_size=4096):
        if not os.path.isfile(filename):
            raise ValueError(filename + " not found.")
        popen_args = ['ffmpeg', '-i', filename, '-f', 's16le']
        self.channels = channels
        self.sample_rate = sample_rate
        if channels:
            popen_args.extend(['-ac', str(channels)])
        if sample_rate:
            popen_args.extend(['-ar', str(sample_rate)])
        popen_args.append('-')
        self.proc = subprocess.Popen(
                popen_args,
                stdout=subprocess.PIPE, stderr=subprocess.PIPE
        )

        # Start another thread to consume the standard output of the
        # process, which contains raw audio data.
        self.stdout_reader = QueueReaderThread(self.proc.stdout, block_size)
        self.stdout_reader.start()

        # Read relevant information from stderr.
        try:
            self._get_info()
        except ValueError:
            raise ValueError("Error reading header info from " + filename)

        # Start a separate thread to read the rest of the data from
        # stderr. This (a) avoids filling up the OS buffer and (b)
        # collects the error output for diagnosis.
        self.stderr_reader = QueueReaderThread(self.proc.stderr)
        self.stderr_reader.start()

Example #25

Source File: audio_read.py From audfprint with MIT License

5 votes

def run(self):
        while True:
            data = self.fh.read(self.blocksize)
            if not self.discard:
                self.queue.put(data)
            if not data:
                # Stream closed (EOF).
                break

Example #26

Source File: audio_read.py From audfprint with MIT License

5 votes

def wavread(filename):
  """Read in audio data from a wav file.  Return d, sr."""
  # Read in wav file.
  samplerate, wave_data = wav.read(filename)
  # Normalize short ints to floats in range [-1..1).
  data = np.asfarray(wave_data) / 32768.0
  return data, samplerate

Example #27

Source File: demo.py From audiomentations with MIT License

5 votes

def load_wav_file(sound_file_path):
    sample_rate, sound_np = wavfile.read(sound_file_path)
    if sample_rate != SAMPLE_RATE:
        raise Exception(
            "Unexpected sample rate {} (expected {})".format(sample_rate, SAMPLE_RATE)
        )

    if sound_np.dtype != np.float32:
        assert sound_np.dtype == np.int16
        sound_np = (sound_np / 32767).astype(np.float32)  # ends up roughly between -1 and 1

    return sound_np

Example #28

Source File: feat_extract.py From persephone with Apache License 2.0

5 votes

def mfcc(wav_path):
    """ Grabs MFCC features with energy and derivates. """

    (rate, sig) = wav.read(wav_path)
    feat = python_speech_features.mfcc(sig, rate, appendEnergy=True)
    delta_feat = python_speech_features.delta(feat, 2)
    all_feats = [feat, delta_feat]
    all_feats = np.array(all_feats)
    # Make time the first dimension for easy length normalization padding later.
    all_feats = np.swapaxes(all_feats, 0, 1)
    all_feats = np.swapaxes(all_feats, 1, 2)

    feat_fn = wav_path[:-3] + "mfcc13_d.npy"
    np.save(feat_fn, all_feats)

Example #29

Source File: test_wavfile.py From Computable with MIT License

5 votes

def _check_roundtrip(realfile, rate, dtype, channels):
    if realfile:
        fd, tmpfile = tempfile.mkstemp(suffix='.wav')
        os.close(fd)
    else:
        tmpfile = BytesIO()
    try:
        data = np.random.rand(100, channels)
        if channels == 1:
            data = data[:,0]
        if dtype.kind == 'f':
            # The range of the float type should be in [-1, 1]
            data = data.astype(dtype)
        else:
            data = (data*128).astype(dtype)

        wavfile.write(tmpfile, rate, data)

        for mmap in [False, True]:
            rate2, data2 = wavfile.read(tmpfile, mmap=mmap)

            assert_equal(rate, rate2)
            assert_(data2.dtype.byteorder in ('<', '=', '|'), msg=data2.dtype)
            assert_array_equal(data, data2)

            del data2
    finally:
        if realfile:
            os.unlink(tmpfile)

Example #30

Source File: test_wavfile.py From Computable with MIT License

5 votes

def test_read_fail():
    for mmap in [False, True]:
        fp = open(datafile('example_1.nc'))
        assert_raises(ValueError, wavfile.read, fp, mmap=mmap)
        fp.close()