Python scipy.io.wavfile.read() Examples
The following are 30
code examples of scipy.io.wavfile.read().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.io.wavfile
, or try the search function
.
Example #1
Source File: vggish_input.py From MAX-Audio-Classifier with Apache License 2.0 | 7 votes |
def wavfile_to_examples(wav_file): """Convenience wrapper around waveform_to_examples() for a common WAV format. Args: wav_file: String path to a file, or a file-like object. The file is assumed to contain WAV audio data with signed 16-bit PCM samples. Returns: See waveform_to_examples. """ try: wav_file = BytesIO(wav_file) sr, wav_data = wavfile.read(wav_file) except IOError: print("Error reading WAV file!") print("The specified WAV file type is not supported by scipy.io.wavfile.read()") sys.exit(1) if wav_data.dtype != np.int16: raise TypeError('Bad sample type: %r' % wav_data.dtype) samples = wav_data / 32768.0 # Convert to [-1.0, +1.0] return waveform_to_examples(samples, sr)
Example #2
Source File: feat_extract.py From persephone with Apache License 2.0 | 7 votes |
def fbank(wav_path, flat=True): """ Currently grabs log Mel filterbank, deltas and double deltas.""" (rate, sig) = wav.read(wav_path) if len(sig) == 0: logger.warning("Empty wav: {}".format(wav_path)) fbank_feat = python_speech_features.logfbank(sig, rate, nfilt=40) energy = extract_energy(rate, sig) feat = np.hstack([energy, fbank_feat]) delta_feat = python_speech_features.delta(feat, 2) delta_delta_feat = python_speech_features.delta(delta_feat, 2) all_feats = [feat, delta_feat, delta_delta_feat] if not flat: all_feats = np.array(all_feats) # Make time the first dimension for easy length normalization padding # later. all_feats = np.swapaxes(all_feats, 0, 1) all_feats = np.swapaxes(all_feats, 1, 2) else: all_feats = np.concatenate(all_feats, axis=1) # Log Mel Filterbank, with delta, and double delta feat_fn = wav_path[:-3] + "fbank.npy" np.save(feat_fn, all_feats)
Example #3
Source File: test_wavfile.py From Computable with MIT License | 6 votes |
def test_read_1(): for mmap in [False, True]: warn_ctx = WarningManager() warn_ctx.__enter__() try: warnings.simplefilter('ignore', wavfile.WavFileWarning) rate, data = wavfile.read(datafile('test-44100-le-1ch-4bytes.wav'), mmap=mmap) finally: warn_ctx.__exit__() assert_equal(rate, 44100) assert_(np.issubdtype(data.dtype, np.int32)) assert_equal(data.shape, (4410,)) del data
Example #4
Source File: audio.py From signaltrain with GNU General Public License v3.0 | 6 votes |
def triangle(t, randfunc=np.random.rand, t0_fac=None): # ramp up then down height = (0.4 * randfunc() + 0.4) * np.random.choice([-1,1]) width = randfunc()/4 * t[-1] # half-width actually t0 = 2*width + 0.4 * randfunc()*t[-1] if t0_fac is None else t0_fac*t[-1] x = height * (1 - np.abs(t-t0)/width) x[np.where(t < (t0-width))] = 0 x[np.where(t > (t0+width))] = 0 amp_n = (0.1*randfunc()+0.02) # add noise return x + amp_n*pinknoise(t.shape[0]) # Prelude to read_audio_file # Tried lots of ways of doing this.. most are slow. #signal, rate = librosa.load(filename, sr=sr, mono=True, res_type='kaiser_fast') # Librosa's reader is incredibly slow. do not use #signal, rate = torchaudio.load(filename)#, normalization=True) # Torchaudio's reader is pretty fast but normalization is a problem #signal = signal.numpy().flatten() #reader = io_methods.AudioIO # Stylios' file reader. Haven't gotten it working yet #signal, rate = reader.audioRead(filename, mono=True) #signal, rate = sf.read('existing_file.wav')
Example #5
Source File: audio.py From anica with MIT License | 6 votes |
def get_data(): """The data files come from the TSP dataset 16k set""" _, man = wavfile.read('./wav_data/MA02_04.wav') rate, woman = wavfile.read('./wav_data/FA01_03.wav') man = man.astype('float32') woman = woman.astype('float32') man_max = np.max(man) woman_max = np.max(woman) man /= man_max woman /= woman_max shortest = min(len(man), len(woman)) woman = woman[:shortest] man = man[:shortest] np.random.seed(101) noise = np.random.uniform(-1, 1, len(man)) sources = np.stack((woman, man, noise)) A = np.random.uniform(-1, 1, (3, 3)) linear_mix = np.dot(A, sources) pnl_mix = linear_mix.copy() pnl_mix[0] = np.tanh(pnl_mix[0]) pnl_mix[1] = (pnl_mix[1] + pnl_mix[1]**3) / 2 pnl_mix[2] = np.exp(pnl_mix[2]) return linear_mix, pnl_mix, A, sources
Example #6
Source File: audio.py From signaltrain with GNU General Public License v3.0 | 6 votes |
def __init__(self, path, sr=44100, ): super(FileEffect, self).__init__() print(" FileEffect: path = ",path) if (path is None) or (not glob.glob(path+"/Train/target*")) \ or (not glob.glob(path+"/Val/target*")) or ((not glob.glob(path+"/effect_info.ini"))): print(f"Error: can't file target output files or effect_info.ini in path = {path}") sys.exit(1) # Yea, this is fatal self.sr = sr # read the effect info config file "effect_info.ini" config = configparser.ConfigParser() config.read(path+'/effect_info.ini') self.name = config['effect']['name']+"(files)" # tack on "(files)" to the effect name #TODO: note that use of 'eval' below could be a potential security issue self.knob_names = eval(config.get("effect","knob_names")) self.knob_ranges = np.array(eval(config.get("effect","knob_ranges"))) try: self.is_inverse = (True == bool(config['effect']['inverse']) ) self.name = "De-"+self.name except: pass # Ignore errors we don't require that 'inverse' be defined anywhere in the file
Example #7
Source File: highest_peak_method.py From Automated_Music_Transcription with MIT License | 6 votes |
def detect_MIDI_notes(self): """ The algorithm for calculating midi notes from a given wav file. """ (framerate, sample) = wav.read(self.wav_file) # We need to change the 2 channels into one because STFT works only # for 1 channel. We could also do STFT for each channel separately. monoChannel = sample.mean(axis=1) duration = getDuration(self.wav_file) midi_notes = [] # Consider only files with a duration longer than 0.2 seconds. if duration > 0.18: frequency_power = self.calculateFFT(duration, framerate, monoChannel) filtered_frequencies = [f for (f, p) in frequency_power] #self.plot_power_spectrum(frequency_power) #self.plot_power_spectrum_dB(frequency_power) f0_candidates = self.get_pitch_candidates_remove_highest_peak(frequency_power) midi_notes = self.matchWithMIDINotes(f0_candidates) return midi_notes
Example #8
Source File: mfcc_pca_feature.py From Speech_Signal_Processing_and_Classification with MIT License | 6 votes |
def mainParkinson(): general_feature_list = [] general_label_list = [] folder = raw_input('Give the name of the folder that you want to read data: ') if(folder == 'PD'): healthyCases = os.listdir('/home/gionanide/Theses_2017-2018_2519/Gkagkos/Audio_Files/PD') for x in healthyCases: wav = '/'+folder+'/'+str(x) mfcc_features,inputWav = mfcc_features_extraction(wav) mean_features(mfcc_features,inputWav,folder,general_feature_list,general_label_list) folder = raw_input('Give the name of the folder that you want to read data: ') if(folder == 'HC'): parkinsonCases = os.listdir('/home/gionanide/Theses_2017-2018_2519/Gkagkos/Audio_Files/HC') for x in parkinsonCases: wav = '/'+folder+'/'+str(x) mfcc_features,inputWav = mfcc_features_extraction(wav) mean_features(mfcc_features,inputWav,folder,general_feature_list,general_label_list) #print general_feature_list, general_label_list #writeFeatures(general_feature_list,general_label_list,wav,folder) classifyPHC(general_feature_list,general_label_list)
Example #9
Source File: helpers_legacy.py From opensauce-python with Apache License 2.0 | 6 votes |
def wavread(fn): """Emulate the parts of the matlab wavread function that we need. y, Fs = wavread(fn) y is the vector of audio samples, Fs is the frame rate. Matlab's wavread is used by voicesauce to read in the wav files for processing. As a consequence, all the translated algorithms assume the data from the wav file is in matlab form, which in this case means a double precision float between -1 and 1. The corresponding scipy function returns the actual integer PCM values from the file, which range between -32768 and 32767. (matlab's wavread *can* return the integers, but does not by default and voicesauce uses the default). Consequently, after reading the data using scipy's io.wavfile, we convert to float by dividing each integer by 32768. """ # For reference, I figured this out from: # http://mirlab.org/jang/books/audiosignalprocessing/matlab4waveRead.asp?title=4-2%20Reading%20Wave%20Files # XXX: if we need to handle 8 bit files we'll need to detect them and # special case them here. Fs, y = wavfile.read(fn) return y/numpy.float64(32768.0), Fs
Example #10
Source File: io.py From pydiogment with BSD 3-Clause "New" or "Revised" License | 6 votes |
def read_file(filename): """ Read wave file as mono. Args: - filename (str) : wave file / path. Returns: tuple of sampling rate and audio data. """ fs, sig = read(filename=filename) if (sig.ndim == 1): samples = sig else: samples = sig[:, 0] return fs, samples
Example #11
Source File: create_spectrograms.py From Spoken-language-identification with MIT License | 6 votes |
def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="gray", channel=0, name='tmp.png', alpha=1, offset=0): samplerate, samples = wav.read(audiopath) samples = samples[:, channel] s = stft(samples, binsize) sshow, freq = logscale_spec(s, factor=1, sr=samplerate, alpha=alpha) sshow = sshow[2:, :] ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel timebins, freqbins = np.shape(ims) ims = np.transpose(ims) # ims = ims[0:256, offset:offset+768] # 0-11khz, ~9s interval ims = ims[0:256, :] # 0-11khz, ~10s interval #print "ims.shape", ims.shape image = Image.fromarray(ims) image = image.convert('L') image.save(name)
Example #12
Source File: utils.py From sklearn-audio-transfer-learning with ISC License | 6 votes |
def wavefile_to_waveform(wav_file, features_type): data, sr = sf.read(wav_file) if features_type == 'vggish': tmp_name = str(int(np.random.rand(1)*1000000)) + '.wav' sf.write(tmp_name, data, sr, subtype='PCM_16') sr, wav_data = wavfile.read(tmp_name) os.remove(tmp_name) # sr, wav_data = wavfile.read(wav_file) # as done in VGGish Audioset assert wav_data.dtype == np.int16, 'Bad sample type: %r' % wav_data.dtype data = wav_data / 32768.0 # Convert to [-1.0, +1.0] # at least one second of samples, if not repead-pad src_repeat = data while (src_repeat.shape[0] < sr): src_repeat = np.concatenate((src_repeat, data), axis=0) data = src_repeat[:sr] return data, sr
Example #13
Source File: mgca.py From Speech_Signal_Processing_and_Classification with MIT License | 5 votes |
def readWavFile(wav): #given a path from the keyboard to read a .wav file #wav = raw_input('Give me the path of the .wav file you want to read: ') inputWav = 'PATH_TO_WAV'+wav return inputWav #reading the .wav file (signal file) and extract the information we need
Example #14
Source File: mfcc.py From Speech_Signal_Processing_and_Classification with MIT License | 5 votes |
def main(): folder = raw_input('Give the name of the folder that you want to read data: ') amount = raw_input('Give the number of samples in the specific folder: ') for x in range(1,int(amount)): wav = '/'+folder+'/'+str(x)+'.wav' print wav mfcc_features,inputWav = mfcc_features_extraction(wav) mean_features(mfcc_features,inputWav)
Example #15
Source File: mfcc.py From Speech_Signal_Processing_and_Classification with MIT License | 5 votes |
def readWavFile(wav): #given a path from the keyboard to read a .wav file #wav = raw_input('Give me the path of the .wav file you want to read: ') inputWav = 'PATH_TO_WAV'+wav return inputWav,wav
Example #16
Source File: main.py From uncaptcha with MIT License | 5 votes |
def get_numbers(audio_file, parent_dir): global AMP_THRESHOLD mp3_file = audio_file + ".mp3" wav_file = audio_file + ".wav" print("converting from " + mp3_file + " to " + wav_file) os.system("echo 'y' | ffmpeg -i "+mp3_file+" "+wav_file + "&> /dev/null") # split audio file on silence os.system("sox -V3 "+wav_file+" "+audio_file+"_.wav silence -l 0 1 0.5 0.1% : newfile : restart &> /dev/null") files = [f for f in os.listdir(parent_dir) if "_0" in f] audio_filenames = [] # remove audio files that are only silence for f in files: _, snd = wavfile.read(TASK_PATH + "/" + f) amp = max(snd) print(f + ":" + str(amp)) if amp > AMP_THRESHOLD: # skip this file audio_filenames.append(parent_dir+f) else: os.system("rm " + parent_dir+f) # run speech recognition on the individual numbers # num_str = "" # for f in sorted(audio_filenames): # print f # num_str += str(audio.getNum(f)) # print(num_str) return audio.getNums(TASK_PATH, audio_filenames)
Example #17
Source File: utilities.py From speech-emotion-recognition with MIT License | 5 votes |
def get_feature_vector_from_mfcc(file_path: str, flatten: bool, mfcc_len: int = 39) -> np.ndarray: """ Make feature vector from MFCC for the given wav file. Args: file_path (str): path to the .wav file that needs to be read. flatten (bool) : Boolean indicating whether to flatten mfcc obtained. mfcc_len (int): Number of cepestral co efficients to be consider. Returns: numpy.ndarray: feature vector of the wav file made from mfcc. """ fs, signal = wav.read(file_path) s_len = len(signal) # pad the signals to have same size if lesser than required # else slice them if s_len < mean_signal_length: pad_len = mean_signal_length - s_len pad_rem = pad_len % 2 pad_len //= 2 signal = np.pad(signal, (pad_len, pad_len + pad_rem), 'constant', constant_values=0) else: pad_len = s_len - mean_signal_length pad_len //= 2 signal = signal[pad_len:pad_len + mean_signal_length] mel_coefficients = mfcc(signal, fs, num_cepstral=mfcc_len) if flatten: # Flatten the data mel_coefficients = np.ravel(mel_coefficients) return mel_coefficients
Example #18
Source File: mgca.py From Speech_Signal_Processing_and_Classification with MIT License | 5 votes |
def initialize(inputWav): rate , signal = wav.read(readWavFile(inputWav)) # returns a wave_read object , rate: sampling frequency sig = wave.open(readWavFile(inputWav)) # signal is the numpy 2D array with the date of the .wav file # len(signal) number of samples sampwidth = sig.getsampwidth() print 'The sample rate of the audio is: ',rate print 'Sampwidth: ',sampwidth return signal , rate #implementation of the low-pass filter
Example #19
Source File: prepare_data.py From se_relativisticgan with MIT License | 5 votes |
def prepare_sliced_data1d(opts): wavfolder = opts['wavfolder'] window_size = opts['window_size'] stride = opts['stride'] minlength = opts['minlength'] filenames = opts['filenames'] full_sliced = [] # initialize empty list dfi = [] dfi_begin = 0 with open(filenames) as f: wav_files = f.read().splitlines() # to get rid of the \n while using readlines() print ("**** Reading from " + wavfolder) print ("**** The folder has " + str(len(wav_files)) + " files.") for ind, wav_file in enumerate(wav_files): if ind % 10 == 0 : print("Processing " + str(ind) + " of " + str(len(wav_files)) + " files.") wavfilename = os.path.join(wavfolder, wav_file) sliced = read_and_slice1d(wavfilename, window_size, minlength, stride=stride) full_sliced.append(sliced) dfi.append(np.array([[dfi_begin, dfi_begin + sliced.shape[0]]])) dfi_begin += sliced.shape[0] full_slicedstack = np.vstack(full_sliced) dfistack = np.vstack(dfi) return full_slicedstack, dfistack.astype('int')
Example #20
Source File: mfcc_pca_feature.py From Speech_Signal_Processing_and_Classification with MIT License | 5 votes |
def mainMaleFemale(): folder = raw_input('Give the name of the folder that you want to read data: ') amount = raw_input('Give the number of samples in the specific folder: ') for x in range(1,int(amount)+1): wav = '/'+folder+'/'+str(x)+'.wav' print wav mfcc_features,inputWav = mfcc_features_extraction(wav) mean_features(mfcc_features,inputWav,folder)
Example #21
Source File: run_audio_attack.py From Black-Box-Audio with MIT License | 5 votes |
def load_wav(input_wav_file): # Load the inputs that we're given fs, audio = wav.read(input_wav_file) assert fs == 16000 print('source dB', db(audio)) return audio
Example #22
Source File: least_squares_first_peaks_2.py From Automated_Music_Transcription with MIT License | 5 votes |
def Pertusa_Inesta_Algorithm(self): (framerate, sample) = wav.read(self.wav_file) monoChannel = sample.mean(axis=1) stft = self.STFT(monoChannel, framerate, self.HAN_WINDOW, self.HOP_SIZE) (stft_bin_freqs, stft_magnitudes) = self.get_stft_bin_freqs(stft, framerate) print stft_bin_freqs #print self.get_candidates_with_partials(stft_bin_freqs[0], stft_magnitudes[0]) ''' midi_notes = [] for freqs in stftBinFrequencies: midi_notes.append(self.matchWithMIDINotes(sorted(freqs.keys()))) print midi_notes[0] return midi_notes '''
Example #23
Source File: audio_read.py From audfprint with MIT License | 5 votes |
def read_data(self, timeout=10.0): """Read blocks of raw PCM data from the file.""" # Read from stdout in a separate thread and consume data from # the queue. start_time = time.time() while True: # Wait for data to be available or a timeout. data = None try: data = self.stdout_reader.queue.get(timeout=timeout) if data: yield data else: # End of file. break except queue.Empty: # Queue read timed out. end_time = time.time() if not data: if end_time - start_time >= timeout: # Nothing interesting has happened for a while -- # FFmpeg is probably hanging. raise ValueError('ffmpeg output: {}'.format( ''.join(self.stderr_reader.queue.queue) )) else: start_time = end_time # Keep waiting. continue
Example #24
Source File: audio_read.py From audfprint with MIT License | 5 votes |
def __init__(self, filename, channels=None, sample_rate=None, block_size=4096): if not os.path.isfile(filename): raise ValueError(filename + " not found.") popen_args = ['ffmpeg', '-i', filename, '-f', 's16le'] self.channels = channels self.sample_rate = sample_rate if channels: popen_args.extend(['-ac', str(channels)]) if sample_rate: popen_args.extend(['-ar', str(sample_rate)]) popen_args.append('-') self.proc = subprocess.Popen( popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) # Start another thread to consume the standard output of the # process, which contains raw audio data. self.stdout_reader = QueueReaderThread(self.proc.stdout, block_size) self.stdout_reader.start() # Read relevant information from stderr. try: self._get_info() except ValueError: raise ValueError("Error reading header info from " + filename) # Start a separate thread to read the rest of the data from # stderr. This (a) avoids filling up the OS buffer and (b) # collects the error output for diagnosis. self.stderr_reader = QueueReaderThread(self.proc.stderr) self.stderr_reader.start()
Example #25
Source File: audio_read.py From audfprint with MIT License | 5 votes |
def run(self): while True: data = self.fh.read(self.blocksize) if not self.discard: self.queue.put(data) if not data: # Stream closed (EOF). break
Example #26
Source File: audio_read.py From audfprint with MIT License | 5 votes |
def wavread(filename): """Read in audio data from a wav file. Return d, sr.""" # Read in wav file. samplerate, wave_data = wav.read(filename) # Normalize short ints to floats in range [-1..1). data = np.asfarray(wave_data) / 32768.0 return data, samplerate
Example #27
Source File: demo.py From audiomentations with MIT License | 5 votes |
def load_wav_file(sound_file_path): sample_rate, sound_np = wavfile.read(sound_file_path) if sample_rate != SAMPLE_RATE: raise Exception( "Unexpected sample rate {} (expected {})".format(sample_rate, SAMPLE_RATE) ) if sound_np.dtype != np.float32: assert sound_np.dtype == np.int16 sound_np = (sound_np / 32767).astype(np.float32) # ends up roughly between -1 and 1 return sound_np
Example #28
Source File: feat_extract.py From persephone with Apache License 2.0 | 5 votes |
def mfcc(wav_path): """ Grabs MFCC features with energy and derivates. """ (rate, sig) = wav.read(wav_path) feat = python_speech_features.mfcc(sig, rate, appendEnergy=True) delta_feat = python_speech_features.delta(feat, 2) all_feats = [feat, delta_feat] all_feats = np.array(all_feats) # Make time the first dimension for easy length normalization padding later. all_feats = np.swapaxes(all_feats, 0, 1) all_feats = np.swapaxes(all_feats, 1, 2) feat_fn = wav_path[:-3] + "mfcc13_d.npy" np.save(feat_fn, all_feats)
Example #29
Source File: test_wavfile.py From Computable with MIT License | 5 votes |
def _check_roundtrip(realfile, rate, dtype, channels): if realfile: fd, tmpfile = tempfile.mkstemp(suffix='.wav') os.close(fd) else: tmpfile = BytesIO() try: data = np.random.rand(100, channels) if channels == 1: data = data[:,0] if dtype.kind == 'f': # The range of the float type should be in [-1, 1] data = data.astype(dtype) else: data = (data*128).astype(dtype) wavfile.write(tmpfile, rate, data) for mmap in [False, True]: rate2, data2 = wavfile.read(tmpfile, mmap=mmap) assert_equal(rate, rate2) assert_(data2.dtype.byteorder in ('<', '=', '|'), msg=data2.dtype) assert_array_equal(data, data2) del data2 finally: if realfile: os.unlink(tmpfile)
Example #30
Source File: test_wavfile.py From Computable with MIT License | 5 votes |
def test_read_fail(): for mmap in [False, True]: fp = open(datafile('example_1.nc')) assert_raises(ValueError, wavfile.read, fp, mmap=mmap) fp.close()