Python scipy.io.wavfile.read() Examples

The following are code examples for showing how to use scipy.io.wavfile.read(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: multilabel-cnn   Author: hollygrimm   File: utils.py    Apache License 2.0 7 votes vote down vote up
def load_audio(filename, b_normalize=True):
    """Load the audiofile at the provided filename using scipy.io.wavfile.
    
    Optionally normalizes the audio to the maximum value.
    
    Parameters
    ----------
    filename : str
        File to load.
    b_normalize : bool, optional
        Normalize to the maximum value.
    
    Returns
    -------
    TYPE
        Description
    """
    sr, s = wavfile.read(filename)
    if b_normalize:
        s = s.astype(np.float32)
        s = (s / np.max(np.abs(s)))
        s -= np.mean(s)
    return s 
Example 2
Project: Spoken-language-identification   Author: YerevaNN   File: augment_data.py    MIT License 6 votes vote down vote up
def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="gray", channel=0, name='tmp.png', alpha=1, offset=0):
    samplerate, samples = wav.read(audiopath)
    samples = samples[:, channel]
    s = stft(samples, binsize)

    sshow, freq = logscale_spec(s, factor=1, sr=samplerate, alpha=alpha)
    sshow = sshow[2:, :]
    ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel
    timebins, freqbins = np.shape(ims)
    
    ims = np.transpose(ims)
    ims = ims[0:256, offset:offset+768] # 0-11khz, ~9s interval
    #print "ims.shape", ims.shape
    
    image = Image.fromarray(ims) 
    image = image.convert('L')
    image.save(name) 
Example 3
Project: Spoken-language-identification   Author: YerevaNN   File: create_spectrograms.py    MIT License 6 votes vote down vote up
def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="gray", channel=0, name='tmp.png', alpha=1, offset=0):
    samplerate, samples = wav.read(audiopath)
    samples = samples[:, channel]
    s = stft(samples, binsize)

    sshow, freq = logscale_spec(s, factor=1, sr=samplerate, alpha=alpha)
    sshow = sshow[2:, :]
    ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel
    timebins, freqbins = np.shape(ims)
    
    ims = np.transpose(ims)
    # ims = ims[0:256, offset:offset+768] # 0-11khz, ~9s interval
    ims = ims[0:256, :] # 0-11khz, ~10s interval
    #print "ims.shape", ims.shape
    
    image = Image.fromarray(ims) 
    image = image.convert('L')
    image.save(name) 
Example 4
Project: stalta   Author: PALab   File: datainput.py    MIT License 6 votes vote down vote up
def __readwavefile(filein, firstsample = 0, maxsamples = -1):
    '''return a dictionary of the Pulses in the wav file/stream filein
    or None'''

    samplerate, data = wav.read(filein)
    nsamples = len(data)

    firstsample = min(nsamples - 1, firstsample)
    if maxsamples < 0:
        maxsamples = nsamples
    #  samplesin = min(nsamples - firstsample, maxsamples)

    nch = 1

    r = []
    for chno in range(nch):
        r.append([data[firstsample:maxsamples],
                  1.0 / samplerate,
                  firstsample / samplerate,
                  "wav %d" % (chno,)]
                 )
    return r 
Example 5
Project: Pitch   Author: ParthMehta15   File: __init__.py    MIT License 6 votes vote down vote up
def find_pitch(audiofile):
    fs,x = wav.read(audiofile)
    ms20=int((fs/50))
    ms2=int(fs/500)

    x=[i/32767 for i in x]

    y=plt.acorr(x,maxlags=ms20,normed=True)

    y=y[1]
    z=y[round(len(y)/2):]
    z=z[ms2:ms20]
    zmax=max(z)

    index=np.where(z==zmax)
    index=index[0][0]

    pitch=fs/(ms2+index+2)

    return pitch 
Example 6
Project: ultrastar_pitch   Author: paradigmn   File: project_parser.py    GNU General Public License v2.0 6 votes vote down vote up
def process_audio(self, sample_rate=16000):
        """ convert and resample audio file before dividing it into audio segments\n
        @param sample_rate  sampling frequency for the conversion
        @return  list of audio segments
        """
        err_str1 = "{0} has to be an positive integer value"
        assert (isinstance(sample_rate, int) and sample_rate > 0), err_str1.format("sample_rate")
        # convert mp3 to temporary mono wav file
        audio_path = os.path.join(self.__proj_dir, self.__meta["#MP3"])
        wav_path = os.path.join(self.__proj_dir, "tmp.wav")
        subprocess.run([self.__FFMPEG, '-i', audio_path, '-y',
                        '-ac', '1', '-ar', str(sample_rate), wav_path],
                       stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
        # load wav into numpy array for processing and discard file
        _, samples_mono = wavfile.read(wav_path)
        os.remove(wav_path)
        audio_segments = []
        for segment in self.__singable:
            start_sample = int(round((segment["t_start"] * sample_rate) / 1000))
            end_sample = int(round((segment["t_end"] * sample_rate) / 1000))
            audio_segments.append(samples_mono[start_sample:end_sample])
        return audio_segments 
Example 7
Project: CRAFT   Author: dafyddg   File: modgpyrapt.py    GNU General Public License v3.0 6 votes vote down vote up
def _first_pass_nccf(audio, raptparam):
    # Runs normalized cross correlation function (NCCF) on downsampled audio,
    # outputting a set of potential F0 candidates that could be used to
    # determine the pitch at each given frame of the audio sample.

    nccfparam = _get_nccf_params(audio, raptparam, True)
    params = (raptparam, nccfparam)

    # Difference between "K-1" and starting value of "k"
    lag_range = ((params[1].longest_lag_per_frame - 1) -
                 params[1].shortest_lag_per_frame)

    # TODO: Re-read discussion of using double-precision arithmetic in rapt 3.3

    # NOTE: Because we are using max_frame_count exclusively for array size,
    # we do not run into issues with using xrange to iterate thru each frame, i

    candidates = [None] * params[1].max_frame_count

    for i in xrange(0, params[1].max_frame_count):
        candidates[i] = _get_firstpass_frame_results(
            audio, i, lag_range, params)

    return candidates 
Example 8
Project: AudioEmotionDetection   Author: DefinitelyNotTim   File: emotionProcessor-threaded.py    MIT License 6 votes vote down vote up
def volumeProc(self):
        freq, snd = wavfile.read(self.fname)
        snd = snd/(2.**15)
        s1 = snd[:]
        n = len(s1)
        p = fft(s1) #take the fourier transform
        unique = int(math.ceil((n+1)/2.0))
        p = p[0:unique]
        p=abs(p)
        p = p/float(n)
        p=p**2
        if n%2>0:
            p[1:len(p)]=p[1:len(p)]*2
        else:
            p[1:len(p)-1]=p[1:len(p)-1]*2
        freqArray = numpy.arange(0,unique,1.0)*(freq/n)
        #numpy.set_printoptions(threshold = numpy.nan)
        #rms_val = sqrt(mean(s1**2))
        return(freqArray) 
Example 9
Project: osbpy   Author: KawaiiWafu   File: osbpy.py    MIT License 6 votes vote down vote up
def end_minimize(cls, osb_file):
        if os.path.isfile(osb_file):
            os.remove(osb_file)
        cls.end(osb_file + ".bkp")
        short_names = []
        c = 1
        cls.minim_duplicate.sort(key=len, reverse=True)
        for i in itertools.islice(variable_name(), len(cls.minim_duplicate)):
            short_names.append(i)
        short_names.reverse()
        minim_dict = dict(zip(cls.minim_duplicate, short_names))
        with open(osb_file + ".bkp") as backup_text:
            temporary_text = backup_text.read()
            with open(osb_file, "a") as text:
                text.write("[Variables]\n")
                for key, val in minim_dict.items():
                    text.write("$%s=%s\n" % (val, key))
                    temporary_text = temporary_text.replace(key, "".join(["$", val]))
                    print("%s/%s" % (c, len(minim_dict)))
                    c += 1
                text.write(temporary_text) 
Example 10
Project: signaltrain   Author: drscotthawley   File: audio.py    GNU General Public License v3.0 6 votes vote down vote up
def triangle(t, randfunc=np.random.rand, t0_fac=None): # ramp up then down
    height = (0.4 * randfunc() + 0.4) * np.random.choice([-1,1])
    width = randfunc()/4 * t[-1]     # half-width actually
    t0 = 2*width + 0.4 * randfunc()*t[-1] if t0_fac is None else t0_fac*t[-1]
    x = height * (1 - np.abs(t-t0)/width)
    x[np.where(t < (t0-width))] = 0
    x[np.where(t > (t0+width))] = 0
    amp_n = (0.1*randfunc()+0.02)   # add noise
    return x + amp_n*pinknoise(t.shape[0])


# Prelude to read_audio_file
# Tried lots of ways of doing this.. most are slow.
#signal, rate = librosa.load(filename, sr=sr, mono=True, res_type='kaiser_fast') # Librosa's reader is incredibly slow. do not use
#signal, rate = torchaudio.load(filename)#, normalization=True)   # Torchaudio's reader is pretty fast but normalization is a problem
#signal = signal.numpy().flatten()
#reader = io_methods.AudioIO   # Stylios' file reader. Haven't gotten it working yet
#signal, rate = reader.audioRead(filename, mono=True)
#signal, rate = sf.read('existing_file.wav') 
Example 11
Project: signaltrain   Author: drscotthawley   File: audio.py    GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, path, sr=44100, ):
        super(FileEffect, self).__init__()
        print("  FileEffect: path = ",path)
        if (path is None) or (not glob.glob(path+"/Train/target*")) \
            or (not glob.glob(path+"/Val/target*")) or ((not glob.glob(path+"/effect_info.ini"))):
            print(f"Error: can't file target output files or effect_info.ini in path = {path}")
            sys.exit(1)   # Yea, this is fatal

        self.sr = sr
        # read the effect info config file  "effect_info.ini"
        config = configparser.ConfigParser()
        config.read(path+'/effect_info.ini')
        self.name = config['effect']['name']+"(files)"   # tack on "(files)" to the effect name
        #TODO: note that use of 'eval' below could be a potential security issue
        self.knob_names = eval(config.get("effect","knob_names"))
        self.knob_ranges = np.array(eval(config.get("effect","knob_ranges")))
        try:
            self.is_inverse = (True == bool(config['effect']['inverse']) )
            self.name = "De-"+self.name
        except:
            pass   # Ignore errors we don't require that 'inverse' be defined anywhere in the file 
Example 12
Project: CR0FT   Author: dafyddg   File: modgpyrapt.py    GNU General Public License v3.0 6 votes vote down vote up
def _first_pass_nccf(audio, raptparam):
    # Runs normalized cross correlation function (NCCF) on downsampled audio,
    # outputting a set of potential F0 candidates that could be used to
    # determine the pitch at each given frame of the audio sample.

    nccfparam = _get_nccf_params(audio, raptparam, True)
    params = (raptparam, nccfparam)

    # Difference between "K-1" and starting value of "k"
    lag_range = ((params[1].longest_lag_per_frame - 1) -
                 params[1].shortest_lag_per_frame)

    # TODO: Re-read discussion of using double-precision arithmetic in rapt 3.3

    # NOTE: Because we are using max_frame_count exclusively for array size,
    # we do not run into issues with using xrange to iterate thru each frame, i

    candidates = [None] * params[1].max_frame_count

    for i in xrange(0, params[1].max_frame_count):
        candidates[i] = _get_firstpass_frame_results(
            audio, i, lag_range, params)

    return candidates 
Example 13
Project: gm-cml   Author: wangyida   File: utils.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def load_audio(filename, b_normalize=True):
    """Load the audiofile at the provided filename using scipy.io.wavfile.

    Optionally normalizes the audio to the maximum value.

    Parameters
    ----------
    filename : str
        File to load.
    b_normalize : bool, optional
        Normalize to the maximum value.
    """
    sr, s = wavfile.read(filename)
    if b_normalize:
        s = s.astype(np.float32)
        s = (s / np.max(np.abs(s)))
        s -= np.mean(s)
    return s 
Example 14
Project: Black-Box-Audio   Author: rtaori   File: run_audio_attack.py    MIT License 5 votes vote down vote up
def load_wav(input_wav_file):
    # Load the inputs that we're given
    fs, audio = wav.read(input_wav_file)
    assert fs == 16000
    print('source dB', db(audio))
    return audio 
Example 15
Project: voice-recognition   Author: golabies   File: read_data.py    MIT License 5 votes vote down vote up
def recording(self, duration=5):
        # read data from microphone
        # duration is the length of time you want to record
        self.duration = duration
        self.voice = sd.rec(self.duration * self.fs, samplerate=self.fs, channels=self.ch, dtype='float64')
        sd.wait()
        self.voice = self.voice.T.copy() 
Example 16
Project: voice-recognition   Author: golabies   File: read_data.py    MIT License 5 votes vote down vote up
def read_wave(self, name='out_put_0.wav'):
        self.name = name
        _, self.voice = wavfile.read(self.name)
        self.voice: np.ndarray
        self.voice = np.array([self.voice]) 
Example 17
Project: Deep_VoiceChanger   Author: pstuvwx   File: gla_gpu.py    MIT License 5 votes vote down vote up
def load(path):
        bps, data = wav.read(path)
        if len(data.shape) != 1:
            data = data[:,0] + data[:,1]
        return bps, data 
Example 18
Project: Deep_VoiceChanger   Author: pstuvwx   File: gla_util.py    MIT License 5 votes vote down vote up
def load(path):
        bps, data = wav.read(path)
        if len(data.shape) != 1:
            data = data[:,0] + data[:,1]
        return bps, data 
Example 19
Project: Deep_VoiceChanger   Author: pstuvwx   File: dataset.py    MIT License 5 votes vote down vote up
def load(path):
    bps, data = wav.read(path)
    if len(data.shape) != 1:
        data = data[:,0] + data[:,1]
    return bps, data 
Example 20
Project: pnp   Author: HazardDede   File: sound.py    MIT License 5 votes vote down vote up
def _load_wav_fft(self):
        import scipy.io.wavfile as wavfile
        _LOGGER.debug("Loading wav file from '%s'", self.abs_path)
        sample_rate, signal = wavfile.read(self.abs_path)
        N, secs, signal_fft = perform_fft(signal, sample_rate)
        _LOGGER.debug("Loaded %s seconds wav file @ %s hz", secs, sample_rate)
        return N, signal_fft 
Example 21
Project: fine-lm   Author: akzaidi   File: speech_recognition.py    MIT License 5 votes vote down vote up
def encode(self, s):
    """Transform a string with a filename into a list of float32.

    Args:
      s: path to the file with a waveform.

    Returns:
      samples: list of int16s
    """
    # Make sure that the data is a single channel, 16bit, 16kHz wave.
    # TODO(chorowski): the directory may not be writable, this should fallback
    # to a temp path, and provide instructions for installing sox.
    if s.endswith(".mp3"):
      # TODO(dliebling) On Linux, check if libsox-fmt-mp3 is installed.
      out_filepath = s[:-4] + ".wav"
      call([
          "sox", "--guard", s, "-r", "16k", "-b", "16", "-c", "1", out_filepath
      ])
      s = out_filepath
    elif not s.endswith(".wav"):
      out_filepath = s + ".wav"
      if not os.path.exists(out_filepath):
        call(["sox", "-r", "16k", "-b", "16", "-c", "1", s, out_filepath])
      s = out_filepath
    rate, data = wavfile.read(s)
    assert rate == self._sample_rate
    assert len(data.shape) == 1
    if data.dtype not in [np.float32, np.float64]:
      data = data.astype(np.float32) / np.iinfo(data.dtype).max
    return data.tolist() 
Example 22
Project: Tensorflow-Audio-Classification   Author: luuil   File: vggish_input.py    Apache License 2.0 5 votes vote down vote up
def wavfile_to_examples(wav_file):
  """Convenience wrapper around waveform_to_examples() for a common WAV format.

  Args:
    wav_file: String path to a file, or a file-like object. The file
    is assumed to contain WAV audio data with signed 16-bit PCM samples.

  Returns:
    See waveform_to_examples.
  """
  sr, wav_data = wavfile.read(wav_file)
  assert wav_data.dtype == np.int16, 'Bad sample type: %r' % wav_data.dtype
  samples = wav_data / 32768.0  # Convert to [-1.0, +1.0]
  return waveform_to_examples(samples, sr) 
Example 23
Project: LaserTOF   Author: kyleuckert   File: test_wavfile.py    MIT License 5 votes vote down vote up
def test_read_1():
    for mmap in [False, True]:
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', wavfile.WavFileWarning)
            rate, data = wavfile.read(datafile('test-44100Hz-le-1ch-4bytes.wav'),
                                      mmap=mmap)

        assert_equal(rate, 44100)
        assert_(np.issubdtype(data.dtype, np.int32))
        assert_equal(data.shape, (4410,))

        del data 
Example 24
Project: LaserTOF   Author: kyleuckert   File: test_wavfile.py    MIT License 5 votes vote down vote up
def test_read_2():
    for mmap in [False, True]:
        rate, data = wavfile.read(datafile('test-8000Hz-le-2ch-1byteu.wav'),
                                  mmap=mmap)
        assert_equal(rate, 8000)
        assert_(np.issubdtype(data.dtype, np.uint8))
        assert_equal(data.shape, (800, 2))

        del data 
Example 25
Project: LaserTOF   Author: kyleuckert   File: test_wavfile.py    MIT License 5 votes vote down vote up
def test_read_3():
    for mmap in [False, True]:
        rate, data = wavfile.read(datafile('test-44100Hz-2ch-32bit-float-le.wav'),
                                  mmap=mmap)
        assert_equal(rate, 44100)
        assert_(np.issubdtype(data.dtype, np.float32))
        assert_equal(data.shape, (441, 2))

        del data 
Example 26
Project: LaserTOF   Author: kyleuckert   File: test_wavfile.py    MIT License 5 votes vote down vote up
def test_read_4():
    for mmap in [False, True]:
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', wavfile.WavFileWarning)
            rate, data = wavfile.read(datafile('test-48000Hz-2ch-64bit-float-le-wavex.wav'),
                                      mmap=mmap)

        assert_equal(rate, 48000)
        assert_(np.issubdtype(data.dtype, np.float64))
        assert_equal(data.shape, (480, 2))

        del data 
Example 27
Project: LaserTOF   Author: kyleuckert   File: test_wavfile.py    MIT License 5 votes vote down vote up
def test_read_5():
    for mmap in [False, True]:
        rate, data = wavfile.read(datafile('test-44100Hz-2ch-32bit-float-be.wav'),
                                  mmap=mmap)
        assert_equal(rate, 44100)
        assert_(np.issubdtype(data.dtype, np.float32))
        assert_(data.dtype.byteorder == '>' or (sys.byteorder == 'big' and
                                                data.dtype.byteorder == '='))
        assert_equal(data.shape, (441, 2))

        del data 
Example 28
Project: LaserTOF   Author: kyleuckert   File: test_wavfile.py    MIT License 5 votes vote down vote up
def test_read_early_eof():
    for mmap in [False, True]:
        fp = open(datafile('test-44100Hz-le-1ch-4bytes-early-eof.wav'), 'rb')
        assert_raises(ValueError, wavfile.read, fp, mmap=mmap)
        fp.close() 
Example 29
Project: LaserTOF   Author: kyleuckert   File: test_wavfile.py    MIT License 5 votes vote down vote up
def test_read_incomplete_chunk():
    for mmap in [False, True]:
        fp = open(datafile('test-44100Hz-le-1ch-4bytes-incomplete-chunk.wav'), 'rb')
        assert_raises(ValueError, wavfile.read, fp, mmap=mmap)
        fp.close() 
Example 30
Project: LaserTOF   Author: kyleuckert   File: test_wavfile.py    MIT License 5 votes vote down vote up
def _check_roundtrip(realfile, rate, dtype, channels):
    if realfile:
        fd, tmpfile = tempfile.mkstemp(suffix='.wav')
        os.close(fd)
    else:
        tmpfile = BytesIO()
    try:
        data = np.random.rand(100, channels)
        if channels == 1:
            data = data[:,0]
        if dtype.kind == 'f':
            # The range of the float type should be in [-1, 1]
            data = data.astype(dtype)
        else:
            data = (data*128).astype(dtype)

        wavfile.write(tmpfile, rate, data)

        for mmap in [False, True]:
            rate2, data2 = wavfile.read(tmpfile, mmap=mmap)

            assert_equal(rate, rate2)
            assert_(data2.dtype.byteorder in ('<', '=', '|'), msg=data2.dtype)
            assert_array_equal(data, data2)

            del data2
    finally:
        if realfile:
            os.unlink(tmpfile) 
Example 31
Project: devicehive-audio-analysis   Author: devicehive   File: parse_file.py    Apache License 2.0 5 votes vote down vote up
def process_file(wav_file):
    sr, data = wavfile.read(wav_file)
    if data.dtype != np.int16:
        raise TypeError('Bad sample type: %r' % data.dtype)

    # local import to reduce start-up time
    from audio.processor import WavProcessor, format_predictions

    with WavProcessor() as proc:
        predictions = proc.get_predictions(sr, data)

    print(format_predictions(predictions)) 
Example 32
Project: ultrastar_pitch   Author: paradigmn   File: project_parser.py    GNU General Public License v2.0 5 votes vote down vote up
def load_note_file(self, note_file):
        """ load metadata and notelist into iterable objects for manipulation\n
        @param  note_file USDX project file
        """
        self.__meta.clear()
        self.__singable.clear()
        self.__file_buffer.clear()
        self.__proj_dir = os.path.dirname(note_file)
        note_file = open(note_file, 'r', encoding="utf-8")
        # buffer file for later reuse
        self.__file_buffer = note_file.read().splitlines(True)
        for line in self.__file_buffer:
            # parse header
            if line.startswith('#') and not self.__singable:
                # remove trailing whitespaces
                line = line.rstrip('\r').rstrip('\n')
                key, value = line.split(':', 1)
                if key in ("#BPM", "#GAP"):
                    self.__meta[key] = float(value.replace(',', '.'))
                else:
                    self.__meta[key] = value
            # parse singable notes
            elif line.startswith((':', '*')):
                line = line.split(' ')
                pitch = int(line[3]) % 12
                # start = gap + start_beat * (1500 / bpm)
                # I have no idea where the 15000 comes from, I found it manually by trial and error
                t_start = self.__meta["#GAP"] + float(line[1]) * (15000 / self.__meta["#BPM"])
                # end = gap + (start_beat + end_beat) * (1500 / bpm)
                t_end = self.__meta["#GAP"] + (float(line[1]) + \
                                               float(line[2])) * (15000 / self.__meta["#BPM"])
                # append line data to singable list
                self.__singable.append({"t_start" : t_start, "t_end" : t_end, "pitch" : pitch}) 
Example 33
Project: pyramic-dataset   Author: fakufaku   File: segment.py    MIT License 5 votes vote down vote up
def open_segment(filename, noise_thresh=3, off_lo=50, off_hi=50, plot=False):
    global lengths, labels

    import re, os
    import numpy
    from scipy.io import wavfile

    from matplotlib import pyplot

    # the file to segment
    rate, audio = wavfile.read(filename)

    # find offset here
    noise_mean = numpy.mean(audio[:500, :], axis=0)
    noise_est = numpy.mean(numpy.std(audio[:500,:] - noise_mean[None,:], axis=0))

    # find the boundary of first sweep, with length[1]
    boundaries = [0] + find_segments(audio, lengths, noise_mean, noise_est, noise_thresh)

    # now extract
    signals = [audio[:boundaries[1]-off_lo,:]]

    # list of views
    for i in range(1, len(boundaries)-1, 2):
        b_lo, b_hi = boundaries[i], boundaries[i+1]
        signals.append(audio[b_lo-off_lo:b_hi+off_hi])

    # make a dictionary
    d = dict(zip(labels, signals))
    if plot:
        for label, signal in d.items():
            pyplot.figure()
            pyplot.plot(signal[:,0])
            pyplot.title(label)
        pyplot.show()

    # add the rate
    d['rate'] = rate

    return d 
Example 34
Project: CRAFT   Author: dafyddg   File: modgpyrapt.py    GNU General Public License v3.0 5 votes vote down vote up
def _get_audio_data(wavfile_path):
    # Read wavfile and convert to mono
    sample_rate, audio_sample = wavfile.read(wavfile_path)

    # TODO: investigate whether this type of conversion to mono is suitable:
    if len(audio_sample.shape) > 1:
        audio_sample = audio_sample[:, 0]/2.0 + audio_sample[:, 1]/2.0
        audio_sample = audio_sample.astype(int)

    return (sample_rate, audio_sample) 
Example 35
Project: AudioEmotionDetection   Author: DefinitelyNotTim   File: emotionProcessor.py    MIT License 5 votes vote down vote up
def volumeProc(self):
        freq, snd = wavfile.read(self.fname)
        snd = snd/(2.**15)
        s1 = snd[:]
        n = len(s1)
        p = fft(s1) #takes the fourier transform
        unique = int(math.ceil((n+1)/2.0))
        p = p[0:unique]
        p=abs(p)
        p = p/float(n)
        p=p**2
        if n%2>0:
            p[1:len(p)]=p[1:len(p)]*2
        else:
            p[1:len(p)-1]=p[1:len(p)-1]*2
        freqArray = numpy.arange(0,unique,1.0)*(freq/n)#stores the values from the start to finish of the audio file
        #numpy.set_printoptions(threshold = numpy.nan)
        #rms_val = sqrt(mean(s1**2))
        return(freqArray)


    
##  gapProc: function that allows the extraction of the gaps between
##  consecutive words.
##  Inputs: self
##  Output: an array containing the lengths of every gap between words
##  Related Software Requirements: FR.5
##  Author: Michael Knapp and Timmothy Lane 
Example 36
Project: AudioEmotionDetection   Author: DefinitelyNotTim   File: emotionProcessor-threaded.py    MIT License 5 votes vote down vote up
def volumeProc2(self, results_dict):
        freq, snd = wavfile.read(self.fname)
        snd = snd/(2.**15)
        s1 = snd[:]
        n = len(s1)
        p = fft(s1) #take the fourier transform
        unique = int(math.ceil((n+1)/2.0))
        p = p[0:unique]
        p=abs(p)
        p = p/float(n)
        p=p**2
        if n%2>0:
            p[1:len(p)]=p[1:len(p)]*2
        else:
            p[1:len(p)-1]=p[1:len(p)-1]*2
        freqArray = numpy.arange(0,unique,1.0)*(freq/n)
        #numpy.set_printoptions(threshold = numpy.nan)
        #rms_val = sqrt(mean(s1**2))
        results_dict["volume"] = freqArray
        return(freqArray)




    
## gapProc: function that allows the extraction of the gaps between
## consecutive words.
## Inputs: self
## Output: an array containing the lengths of every gap between words
## Written By: Michael Knapp and Timmothy Lane 
Example 37
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_wavfile.py    GNU General Public License v3.0 5 votes vote down vote up
def test_read_1():
    for mmap in [False, True]:
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', wavfile.WavFileWarning)
            rate, data = wavfile.read(datafile('test-44100-le-1ch-4bytes.wav'),
                                      mmap=mmap)

        assert_equal(rate, 44100)
        assert_(np.issubdtype(data.dtype, np.int32))
        assert_equal(data.shape, (4410,))

        del data 
Example 38
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_wavfile.py    GNU General Public License v3.0 5 votes vote down vote up
def test_read_2():
    for mmap in [False, True]:
        rate, data = wavfile.read(datafile('test-8000-le-2ch-1byteu.wav'),
                                  mmap=mmap)
        assert_equal(rate, 8000)
        assert_(np.issubdtype(data.dtype, np.uint8))
        assert_equal(data.shape, (800, 2))

        del data 
Example 39
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_wavfile.py    GNU General Public License v3.0 5 votes vote down vote up
def test_read_fail():
    for mmap in [False, True]:
        fp = open(datafile('example_1.nc'))
        assert_raises(ValueError, wavfile.read, fp, mmap=mmap)
        fp.close() 
Example 40
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_wavfile.py    GNU General Public License v3.0 5 votes vote down vote up
def _check_roundtrip(realfile, rate, dtype, channels):
    if realfile:
        fd, tmpfile = tempfile.mkstemp(suffix='.wav')
        os.close(fd)
    else:
        tmpfile = BytesIO()
    try:
        data = np.random.rand(100, channels)
        if channels == 1:
            data = data[:,0]
        if dtype.kind == 'f':
            # The range of the float type should be in [-1, 1]
            data = data.astype(dtype)
        else:
            data = (data*128).astype(dtype)

        wavfile.write(tmpfile, rate, data)

        for mmap in [False, True]:
            rate2, data2 = wavfile.read(tmpfile, mmap=mmap)

            assert_equal(rate, rate2)
            assert_(data2.dtype.byteorder in ('<', '=', '|'), msg=data2.dtype)
            assert_array_equal(data, data2)

            del data2
    finally:
        if realfile:
            os.unlink(tmpfile) 
Example 41
Project: BlockCIrculantRNN   Author: lz1313   File: spectrogram.py    Apache License 2.0 5 votes vote down vote up
def spectrogramPower(audio, window_size=0.02, window_stride=0.01):
  """ short time fourier transform

    Details:
        audio - This is the input time-domain signal you wish to find the
        spectrogram of. It can't get much simpler than that. In your case, the
                signal you want to find the spectrogram of is defined in the
                following code:
        win_length - If you recall, we decompose the image into chunks, and each
        chunk has a specified width.  window defines the width of each
                 chunkin terms of samples. As this is a discrete-time signal,
                 you know that this signal was sampled with a particular
                 sampling
                 frequency and sampling period. You can determine how large the
                 window is in terms of samples by:
                 window_samples = window_time/Ts
        hop_length - the same as stride in convolution network, overlapping
        width
    """
  samplingRate, samples = wav.read(audio)
  win_length = int(window_size * samplingRate)
  hop_length = int(window_stride * samplingRate)
  n_fft = win_length
  D = librosa.core.stft(
      samples, n_fft=n_fft, hop_length=hop_length, win_length=win_length)
  mag = np.abs(D)
  log_mag = np.log1p(mag)
  # normalization
  log_mag = preprocessing.scale(log_mag)
  # size: frequency_bins*time_len
  return log_mag 
Example 42
Project: signaltrain   Author: drscotthawley   File: io_methods.py    GNU General Public License v3.0 5 votes vote down vote up
def _loadWAVWithScipy(fileName):
		""" Load samples & sample rate from WAV file """
		inputData = read(fileName)
		samples = inputData[1]
		sampleRate = inputData[0]

		return samples, sampleRate 
Example 43
Project: dockerizeme   Author: dockerizeme   File: snippet.py    Apache License 2.0 5 votes vote down vote up
def sex_detector(filename, window_size=2**14, n=5, window_span=10):
    '''
    Given a single-channeled .wav file filename,
    return a single letter 'K' or 'M' if the voice
    is detected as a female or male
    '''
    frequency, signal = wavfile.read(filename)
    length = len(signal)
    '''overlap is how much we divide the window the the smaller overlap the
    faster but less acurate the method will be'''
    overlap = 8

    results = []
    windows = range(max(0, length//2 - window_span*window_size),
                    min(length - 1, length//2 + window_span*window_size),
                    window_size//overlap)
    '''
    We take out samples from the middle of the signal (length//2).
    We then run hps over window_span number of samples left and right,
    overlapping by window_size/overlap elements.
    '''
    results = [hps(signal[i:i+window_size], frequency, n=n) for i in windows]

    x = linspace(0, frequency, window_size, endpoint=False)[:len(results[0])]
    results = [element for element in results if len(element) == len(x)]
    results = sum(results, axis=0)

    '''Best way to determine way is sum parts of the signals between
    male/female frequencies '''
    male = sum([res for f, res in zip(x, results) if f >= M[0] and f <= M[1]])
    fem = sum([res for f, res in zip(x, results) if f >= F[0] and f <= F[1]])

    return 'K' if fem > male else 'M' 
Example 44
Project: dockerizeme   Author: dockerizeme   File: snippet.py    Apache License 2.0 5 votes vote down vote up
def note_specgram(path, ax, peak=70.0, use_cqt=True):
  # Add several samples together
  if isinstance(path, list):
    for i, p in enumerate(path):
      sr, a = readwav(f)
      audio = a if i == 0 else a + audio
  # Load one sample
  else:    
      sr, audio = readwav(f)
  audio = audio.astype(np.float32)
  if use_cqt:
    C = librosa.cqt(audio, sr=sr, hop_length=hop_length, 
                      bins_per_octave=int(notes_per_octave*over_sample), 
                      n_bins=int(octaves * notes_per_octave * over_sample),
                      real=False, 
                      filter_scale=res_factor, 
                      fmin=librosa.note_to_hz('C2'))
  else:
    C = librosa.stft(audio, n_fft=n_fft, win_length=n_fft, hop_length=hop_length, center=True)
  mag, phase = librosa.core.magphase(C)
  phase_angle = np.angle(phase)
  phase_unwrapped = np.unwrap(phase_angle)
  dphase = phase_unwrapped[:, 1:] - phase_unwrapped[:, :-1]
  dphase = np.concatenate([phase_unwrapped[:, 0:1], dphase], axis=1) / np.pi
  mag = (librosa.logamplitude(mag**2, amin=1e-13, top_db=peak, ref_power=np.max) / peak) + 1
  ax.matshow(dphase[::-1, :], cmap=plt.cm.rainbow)
  ax.matshow(mag[::-1, :], cmap=my_mask) 
Example 45
Project: fsat-decoder   Author: floripasat   File: fsat_decoder.py    GNU General Public License v3.0 5 votes vote down vote up
def on_button_decode_clicked(self, button):
        if self.filechooser_audio_file.get_filename() is None:
            error_dialog = Gtk.MessageDialog(None, 0, Gtk.MessageType.ERROR, Gtk.ButtonsType.OK, "Error loading the audio file!")
            error_dialog.format_secondary_text("No file selected!")
            error_dialog.run()
            error_dialog.destroy()
        else:
            sample_rate, data = wavfile.read(self.filechooser_audio_file.get_filename())
            self.listmodel_events.append([str(datetime.now()), "Audio file opened with a sample rate of " + str(sample_rate) + " Hz"]) 
Example 46
Project: PythonAudioEffects   Author: nextbytes   File: AudioProcessing.py    MIT License 5 votes vote down vote up
def __init__(self, input_audio_path):
		self.sample_freq, self.audio_data = read(input_audio_path)
		self.audio_data = AudioProcessing.convert_to_mono_audio(self.audio_data) 
Example 47
Project: Speech-Recognition   Author: ncble   File: draw_audio.py    Apache License 2.0 5 votes vote down vote up
def load_audio_file(filename):
	# y, sr = librosa.load(filename, sr=22050, offset=0.0, duration=None)
	sr, y = wavfile.read(filename)
	# y, sr = librosa.core.load(filename, sr=22050, offset=15.0, duration=5.0)
	return y, sr 
Example 48
Project: Bounded-Kalman-filter-method-for-motion-robust-non-contact-heart-rate-estimation   Author: DataLabPSU   File: common.py    GNU General Public License v3.0 5 votes vote down vote up
def load(filename):
    """
    Load a wave file and return the signal, sample rate and number of channels.
    Can be any format supported by the underlying library (libsndfile or SciPy)
    """
    if wav_loader == 'pysoundfile':
        sf = SoundFile(filename)
        signal = sf.read()
        channels = sf.channels
        sample_rate = sf.samplerate
        samples = len(sf)
        file_format = sf.format_info + ' ' + sf.subtype_info
        sf.close()
    elif wav_loader == 'scikits.audiolab':
        sf = Sndfile(filename, 'r')
        signal = sf.read_frames(sf.nframes)
        channels = sf.channels
        sample_rate = sf.samplerate
        samples = sf.nframes
        file_format = sf.format
        sf.close()
    elif wav_loader == 'scipy.io.wavfile':
        sample_rate, signal = read(filename)
        try:
            channels = signal.shape[1]
        except IndexError:
            channels = 1
        samples = signal.shape[0]
        file_format = str(signal.dtype)

    return signal, sample_rate, channels 
Example 49
Project: speaker-recognition-py3   Author: crouchred   File: utils.py    Apache License 2.0 5 votes vote down vote up
def read_wav(fname):
    fs, signal = wavfile.read(fname)
    if len(signal.shape) != 1:
        print("convert stereo to mono")
        signal = signal[:,0]
    return fs, signal 
Example 50
Project: tensor2tensor   Author: tensorflow   File: audio_encoder.py    Apache License 2.0 5 votes vote down vote up
def encode(self, s):
    """Transform a string with a filename into a list of float32.

    Args:
      s: path to the file with a waveform.

    Returns:
      samples: list of int16s
    """
    def convert_to_wav(in_path, out_path, extra_args=None):
      if not os.path.exists(out_path):
        # TODO(dliebling) On Linux, check if libsox-fmt-mp3 is installed.
        args = ["sox", "--rate", "16k", "--bits", "16", "--channel", "1"]
        if extra_args:
          args += extra_args
        call(args + [in_path, out_path])

    # Make sure that the data is a single channel, 16bit, 16kHz wave.
    # TODO(chorowski): the directory may not be writable, this should fallback
    # to a temp path, and provide instructions for installing sox.
    if s.endswith(".mp3"):
      out_filepath = s[:-4] + ".wav"
      convert_to_wav(s, out_filepath, ["--guard"])
      s = out_filepath
    elif not s.endswith(".wav"):
      out_filepath = s + ".wav"
      convert_to_wav(s, out_filepath)
      s = out_filepath
    rate, data = wavfile.read(s)
    assert rate == self._sample_rate
    assert len(data.shape) == 1
    if data.dtype not in [np.float32, np.float64]:
      data = data.astype(np.float32) / np.iinfo(data.dtype).max
    return data.tolist() 
Example 51
Project: CR0FT   Author: dafyddg   File: modgpyrapt.py    GNU General Public License v3.0 5 votes vote down vote up
def _get_audio_data(wavfile_path):
    # Read wavfile and convert to mono
    sample_rate, audio_sample = wavfile.read(wavfile_path)

    # TODO: investigate whether this type of conversion to mono is suitable:
    if len(audio_sample.shape) > 1:
        audio_sample = audio_sample[:, 0]/2.0 + audio_sample[:, 1]/2.0
        audio_sample = audio_sample.astype(int)

    return (sample_rate, audio_sample) 
Example 52
Project: tensorflow-speech-recognition-pai   Author: super13   File: load_audio_to_mem.py    MIT License 5 votes vote down vote up
def load_wavfile(wavfile):
    """
    Read a wav file using scipy.io.wavfile
    """
    rate, sig = wav.read(wavfile)
    data_name = os.path.splitext(os.path.basename(wavfile))[0]
    return rate, sig, data_name 
Example 53
Project: tacotron2_cpu   Author: shoegazerstella   File: mel2samp.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def load_wav_to_torch(full_path):
    """
    Loads wavdata into torch array
    """
    sampling_rate, data = read(full_path)
    return torch.from_numpy(data).float(), sampling_rate 
Example 54
Project: tacotron2_cpu   Author: shoegazerstella   File: utils_tts.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def load_wav_to_torch(full_path):
    sampling_rate, data = read(full_path)
    return torch.FloatTensor(data.astype(np.float32)), sampling_rate 
Example 55
Project: BERT   Author: yyht   File: audio_encoder.py    Apache License 2.0 5 votes vote down vote up
def encode(self, s):
    """Transform a string with a filename into a list of float32.

    Args:
      s: path to the file with a waveform.

    Returns:
      samples: list of int16s
    """
    # Make sure that the data is a single channel, 16bit, 16kHz wave.
    # TODO(chorowski): the directory may not be writable, this should fallback
    # to a temp path, and provide instructions for installing sox.
    if s.endswith(".mp3"):
      # TODO(dliebling) On Linux, check if libsox-fmt-mp3 is installed.
      out_filepath = s[:-4] + ".wav"
      call([
          "sox", "--guard", s, "-r", "16k", "-b", "16", "-c", "1", out_filepath
      ])
      s = out_filepath
    elif not s.endswith(".wav"):
      out_filepath = s + ".wav"
      if not os.path.exists(out_filepath):
        call(["sox", "-r", "16k", "-b", "16", "-c", "1", s, out_filepath])
      s = out_filepath
    rate, data = wavfile.read(s)
    assert rate == self._sample_rate
    assert len(data.shape) == 1
    if data.dtype not in [np.float32, np.float64]:
      data = data.astype(np.float32) / np.iinfo(data.dtype).max
    return data.tolist() 
Example 56
Project: tn2-wg   Author: alphacep   File: utils.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def load_wav_to_torch(full_path):
    sampling_rate, data = read(full_path)
    return torch.FloatTensor(data.astype(np.float32)), sampling_rate 
Example 57
Project: Speech-commands-recognition   Author: lucko515   File: generator.py    MIT License 5 votes vote down vote up
def add_noise(self, audio_clip):
        '''
        Adds noise to an audio clip.

        :params:
            audio_clip - String, path to the audio clip

        :returns:
            noisy_tracks - Python list, generated nosiy tracks
        '''
        _, audio = wavfile.read(audio_clip)
        
        #padd the track with zeros if the padding mode is True
        if self.padd_to_sr:
            if audio.shape[0] < self.sample_rate:
                audio = np.append(audio, np.zeros(self.sample_rate - audio.shape[0])) 
                
        #randomly choose noisy backgrounds
        noisy_backgrounds = np.random.choice(self.background_noise_files, 
                                            size=self.number_of_noisy_samples)
        
        noisy_tracks = []
        for background in noisy_backgrounds:
            sr, noise = wavfile.read(background)
            #randomly choose starting point of the noisy background
            noisy_sample_start_id = np.random.choice([0, len(noise)-self.sample_rate])
            noisy_sample_end_id = noisy_sample_start_id +self.sample_rate
                
            audio_noise = noise[noisy_sample_start_id:noisy_sample_end_id]

            assert len(audio) == len(audio_noise)
            #Keep 90% of the original audio and add 10% of the noise to it
            #NOTE: These numbers could be randomized as well to generate even more, realistic noise
            new_audio = 0.10 * audio_noise + 0.9 * audio
            noisy_tracks.append(self.featurize(new_audio))
            
        return noisy_tracks 
Example 58
Project: Speech-commands-recognition   Author: lucko515   File: generator.py    MIT License 5 votes vote down vote up
def featurize(self, audio_clip):
        """ 
        For a given audio clip, calculate the corresponding feature
        :params:
            audio_clip - String, path to the audio clip
        """
        if not isinstance(audio_clip, str):
            audio = audio_clip
        else:
            _, audio = wavfile.read(audio_clip)
        
        #Pad all audios to be the same lengths
        if self.padd_to_sr:
            if audio.shape[0] < self.sample_rate:
                audio = np.append(audio, np.zeros(self.sample_rate - audio.shape[0]))
            
        if self.spectrogram:
            return generate_spectrogram(audio, 
                                       sample_rate=self.sample_rate, 
                                       step_size=self.step, 
                                       window_size=self.window_size)[-1]
        else:
            features = compute_mfcc_features(audio, self.sample_rate, numcep=self.mfcc_dim)
            if len(self.mfcc_features) > 1:
                #This will return delta or delta delta on top of normal mfcc features
                return mfcc_pack(np.array(features)[self.mfcc_features])
            else:
                return features[self.mfcc_features[0]] 
Example 59
Project: hunspeech   Author: hlt-bme-hu   File: shifted_delta_cepstra.py    MIT License 5 votes vote down vote up
def shifted_delta_cepstra(self, wav_fn, delta=1, shift=3, k_conc=3):
        """
        :param
            delta: represents the time advance and delay for the sdc
            k_conc: is the number of blocks whose delta coefficients are concd
            shift: is the time shift between consecutive blocks

        Shifted delta cepstra are feature vectors created by concatenating
        delta cepstra computed across multiple speech frames.
        See the paper
            PA Torres-Carrasquillo et al (2002)
            Approaches to language identification using
                Gaussian mixture models and Shifted delta cepstral features.
        """
        (rate,sig) = wav.read(wav_fn)
        mfcc_feats = mfcc(sig,rate)
        # len(mfcc) == 39 == 3 * (12 cepstral + 1 energy)
        # TODO include original cepstra as well?
        delta_feats = mfcc_feats[delta:] - mfcc_feats[:-delta]
        output_duration = delta_feats.shape[0] - shift*k_conc
        shifted = np.zeros((output_duration,
                            (k_conc + 1) * delta_feats.shape[1]))
        mfcc_dim = mfcc_feats.shape[1]
        shifted[:,0:mfcc_dim] = mfcc_feats[:output_duration]
        for i in xrange(output_duration):
            shifted[i,mfcc_dim:] = delta_feats[i:i+k_conc*shift:shift,
                                               :].reshape((1,-1))
        logger.debug('{} --> {}'.format(mfcc_feats.shape, shifted.shape))
        return shifted 
Example 60
Project: pcml   Author: projectclarify   File: audio_utils.py    Apache License 2.0 5 votes vote down vote up
def mp4_to_1d_array(mp4_path, audio_bitrate=44100):
  """Extract audio from MP4 and load as 1d array."""
  with tempfile.TemporaryDirectory() as tmpd:
    tmp_wav_path = os.path.join(tmpd, "mywav.wav")
    subprocess.check_output([
      "ffmpeg", "-loglevel", "quiet", "-i", mp4_path, "-f", "wav", "-ar", str(audio_bitrate), "-vn", tmp_wav_path
    ])
    audio_data = wavfile.read(tmp_wav_path)[1]
  audio_data = audio_data / np.iinfo(np.int16).max
  audio_data = audio_data.astype(np.float32)
  return audio_data 
Example 61
Project: F2CNN   Author: tictacmenthe   File: GammatoneFiltering.py    Apache License 2.0 5 votes vote down vote up
def GetArrayFromWAV(filename):
    with open(filename, 'rb') as wavFile:
        header = wavFile.read(4)
    if header == b'RIFF':  # RIFF header, for WAVE files
        framerate, wavArray = WavFileTool.read(filename)
    else:  # NIST header, which uses SPHERE
        file = SPHFile(filename)
        framerate = file.format['sample_rate']
        wavArray = numpy.zeros(len(file.time_range()), dtype=numpy.int16)
        for i, value in enumerate(file.time_range()):
            wavArray[i] = value
    return framerate, wavArray 
Example 62
Project: F2CNN   Author: tictacmenthe   File: GammatoneFiltering.py    Apache License 2.0 5 votes vote down vote up
def FilterAllOrganisedFiles():
    TotalTime = time.time()

    # Get all the WAV files under resources
    # wavFiles = glob.glob(join("resources", "f2cnn", "*", "*.WAV"))
    wavFiles = glob.glob(os.path.join("resources", "f2cnn", "**", "*.WAV"))

    print("\n###############################\nApplying FilterBank to files in '{}'.".format(
        os.path.split(wavFiles[0])[0]))

    if not wavFiles:
        print("NO WAV FILES FOUND, PLEASE ORGANIZE FILES")
        exit(-1)

    print(len(wavFiles), "files found")

    # #### READING CONFIG FILE
    config = ConfigParser()
    config.read('configF2CNN.conf')
    framerate = config.getint('FILTERBANK', 'FRAMERATE')
    nchannels = config.getint('FILTERBANK', 'NCHANNELS')
    lowcutoff = config.getint('FILTERBANK', 'LOW_FREQ')
    # ##### PREPARATION OF FILTERBANK
    # CENTER FREQUENCIES ON ERB SCALE
    CENTER_FREQUENCIES = filters.centre_freqs(framerate, nchannels, lowcutoff)
    # Filter coefficient for a Gammatone filterbank
    FILTERBANK_COEFFICIENTS = filters.make_erb_filters(framerate, CENTER_FREQUENCIES)

    # Usage of multiprocessing, to reduce computing time
    proc = cpu_count()
    counter = Value('i', 0)
    multiproc_pool = Pool(processes=proc, initializer=InitProcesses, initargs=(FILTERBANK_COEFFICIENTS, counter,))
    multiproc_pool.starmap(GammatoneFiltering, zip(wavFiles, repeat(len(wavFiles))))

    print("Filtered and Saved all files.")
    print('                Total time:', time.time() - TotalTime)
    print('') 
Example 63
Project: ble5-nrf52-mac   Author: tomasero   File: test_wavfile.py    MIT License 5 votes vote down vote up
def test_read_1():
    for mmap in [False, True]:
        rate, data = wavfile.read(datafile('test-44100Hz-le-1ch-4bytes.wav'),
                                  mmap=mmap)

        assert_equal(rate, 44100)
        assert_(np.issubdtype(data.dtype, np.int32))
        assert_equal(data.shape, (4410,))

        del data 
Example 64
Project: ble5-nrf52-mac   Author: tomasero   File: test_wavfile.py    MIT License 5 votes vote down vote up
def test_read_2():
    for mmap in [False, True]:
        rate, data = wavfile.read(datafile('test-8000Hz-le-2ch-1byteu.wav'),
                                  mmap=mmap)
        assert_equal(rate, 8000)
        assert_(np.issubdtype(data.dtype, np.uint8))
        assert_equal(data.shape, (800, 2))

        del data 
Example 65
Project: ble5-nrf52-mac   Author: tomasero   File: test_wavfile.py    MIT License 5 votes vote down vote up
def test_read_3():
    for mmap in [False, True]:
        rate, data = wavfile.read(datafile('test-44100Hz-2ch-32bit-float-le.wav'),
                                  mmap=mmap)
        assert_equal(rate, 44100)
        assert_(np.issubdtype(data.dtype, np.float32))
        assert_equal(data.shape, (441, 2))

        del data 
Example 66
Project: ble5-nrf52-mac   Author: tomasero   File: test_wavfile.py    MIT License 5 votes vote down vote up
def test_read_4():
    for mmap in [False, True]:
        with suppress_warnings() as sup:
            sup.filter(wavfile.WavFileWarning,
                       "Chunk .non-data. not understood, skipping it")
            rate, data = wavfile.read(datafile('test-48000Hz-2ch-64bit-float-le-wavex.wav'),
                                      mmap=mmap)

        assert_equal(rate, 48000)
        assert_(np.issubdtype(data.dtype, np.float64))
        assert_equal(data.shape, (480, 2))

        del data 
Example 67
Project: ble5-nrf52-mac   Author: tomasero   File: test_wavfile.py    MIT License 5 votes vote down vote up
def test_read_5():
    for mmap in [False, True]:
        rate, data = wavfile.read(datafile('test-44100Hz-2ch-32bit-float-be.wav'),
                                  mmap=mmap)
        assert_equal(rate, 44100)
        assert_(np.issubdtype(data.dtype, np.float32))
        assert_(data.dtype.byteorder == '>' or (sys.byteorder == 'big' and
                                                data.dtype.byteorder == '='))
        assert_equal(data.shape, (441, 2))

        del data 
Example 68
Project: ble5-nrf52-mac   Author: tomasero   File: test_wavfile.py    MIT License 5 votes vote down vote up
def test_read_early_eof():
    for mmap in [False, True]:
        fp = open(datafile('test-44100Hz-le-1ch-4bytes-early-eof.wav'), 'rb')
        assert_raises(ValueError, wavfile.read, fp, mmap=mmap)
        fp.close() 
Example 69
Project: ble5-nrf52-mac   Author: tomasero   File: test_wavfile.py    MIT License 5 votes vote down vote up
def test_read_incomplete_chunk():
    for mmap in [False, True]:
        fp = open(datafile('test-44100Hz-le-1ch-4bytes-incomplete-chunk.wav'), 'rb')
        assert_raises(ValueError, wavfile.read, fp, mmap=mmap)
        fp.close() 
Example 70
Project: ble5-nrf52-mac   Author: tomasero   File: test_wavfile.py    MIT License 5 votes vote down vote up
def _check_roundtrip(realfile, rate, dtype, channels):
    if realfile:
        fd, tmpfile = tempfile.mkstemp(suffix='.wav')
        os.close(fd)
    else:
        tmpfile = BytesIO()
    try:
        data = np.random.rand(100, channels)
        if channels == 1:
            data = data[:,0]
        if dtype.kind == 'f':
            # The range of the float type should be in [-1, 1]
            data = data.astype(dtype)
        else:
            data = (data*128).astype(dtype)

        wavfile.write(tmpfile, rate, data)

        for mmap in [False, True]:
            rate2, data2 = wavfile.read(tmpfile, mmap=mmap)

            assert_equal(rate, rate2)
            assert_(data2.dtype.byteorder in ('<', '=', '|'), msg=data2.dtype)
            assert_array_equal(data, data2)

            del data2
    finally:
        if realfile:
            os.unlink(tmpfile) 
Example 71
Project: FRIDA   Author: LCAV   File: bands_selection.py    MIT License 4 votes vote down vote up
def select_bands(samples, freq_range, fs, nfft, win, n_bands, div=1):
    '''
    Selects the bins with most energy in a frequency range.

    It is possible to specify a div factor. Then the range is subdivided
    into div equal subbands and n_bands / div per subband are selected.
    '''

    if win is not None and isinstance(win, bool):
        if win:
            win = np.hanning(nfft)
        else:
            win = None

    # Read the signals in a single array
    sig = [wavfile.read(s)[1] for s in samples]
    L = max([s.shape[0] for s in sig])
    signals = np.zeros((L,len(samples)), dtype=np.float32)
    for i in range(signals.shape[1]):
        signals[:sig[i].shape[0],i] = sig[i] / np.std(sig[i][sig[i] > 1e-2])

    sum_sig = np.sum(signals, axis=1)

    sum_STFT = pra.stft(sum_sig, nfft, nfft, win=win, transform=rfft).T
    sum_STFT_avg = np.mean(np.abs(sum_STFT)**2, axis=1)

    # Do some band selection
    bnds = np.linspace(freq_range[0], freq_range[1], div+1)

    freq_hz = np.zeros(n_bands)
    freq_bins = np.zeros(n_bands, dtype=int)

    nsb = n_bands // div

    for i in range(div):

        bl = int(bnds[i] / fs * nfft)
        bh = int(bnds[i+1] / fs * nfft)

        k = np.argsort(sum_STFT_avg[bl:bh])[-nsb:]

        freq_hz[nsb*i:nsb*(i+1)] = (bl + k) / nfft * fs
        freq_bins[nsb*i:nsb*(i+1)] = k + bl

    freq_hz = freq_hz[:n_bands]

    return np.unique(freq_hz), np.unique(freq_bins) 
Example 72
Project: stalta   Author: PALab   File: datainput.py    MIT License 4 votes vote down vote up
def FileScanner(filename, fsize = False):

    global filecount

    '''Tries to extract the time series in filename.  Tries sac,
    possibly-zipped wav, possibly-zipped columnar data, and
    possibly-zipped amaseis file.

    On success returns a dictionary containing the keys filename,
    format, and data, a list of the items found as (ary, dt, t0,
    info).  The data list may be empty.

    On failure raises BadFileFormat.
    
    fsize = False disables comparison of physical size and expected
    size for sac files.  Why does this option even exist?
    '''

    log().debug("scanning '%s'", filename)

    pulses = None
    detailary = []
    summaryary = []

    try:
        log().debug("try obspy (sac) trace")
        sacdb = obspy.core.read(filename, fsize = fsize)
        dataformat = "obspy"
        pulses = []
        i = 0
        for tr in sacdb:
            pulses.append((tr.data,
                          1.0 / tr.stats.sampling_rate,
                          tr.stats.starttime.timestamp,
                          str(tr))
                          )
            sacd = None
            details = []
            summaryary.append(str(tr))
            for k in tr.stats.iterkeys():
                if k == 'sac':
                    sacd = tr.stats[k]
                else:
                    details.append("%20s: %s" % (k, tr.stats[k]))
            if sacd is not None:
                kl = sacd.keys()
                kl.sort()
                for k in kl:
                    try:
                        v = int(round(float(sacd[k])))
                        if v == -12345 and verbose == 0:
                            continue
                    except:
                        pass
                    details.append("sacd:: %22s: %s" % (k, sacd[k]))
            detailary.append(details)
            i += 1
    except Exception, e:
        print e
        pulses = None 
Example 73
Project: pyramic-dataset   Author: fakufaku   File: doa_eval.py    MIT License 4 votes vote down vote up
def run_doa(angle, h, algo, doa_kwargs, freq_bins, speakers_numbering):
    ''' Run the doa localization for one source location and one algorithm '''

    # Prepare the DOA localizer object
    algo_key = doa_kwargs['algo_obj']
    doa = pra.doa.algorithms[algo_key](mic_array, fs, nfft, c=c, num_src=1, dim=3, **doa_kwargs)

    # get the loudspeaker index from its name
    spkr = speakers_numbering[h]

    # open the recording file
    filename = fn.format(name=sample_name, spkr=spkr, angle=angle)
    fs_data, data = wavfile.read(filename)

    if fs_data != fs:
        raise ValueError('Sampling frequency mismatch')

    # do time-freq decomposition
    X = np.array([ 
        pra.stft(signal, nfft, stft_hop, transform=np.fft.rfft).T 
        for signal in data.T ])

    # run doa
    doa.locate_sources(X, freq_bins=freq_bins)
    col = float(doa.colatitude_recon[0])
    az = float(doa.azimuth_recon[0])

    # manual calibration groundtruth
    col_gt_man = locations['speakers_manual_colatitude'][h]
    az_gt_man = np.radians(int(angle))
    error_man = pra.doa.great_circ_dist(1., col, az, col_gt_man, az_gt_man)

    # optimized calibration groundtruth
    col_gt_opt = locations['sources'][h]['colatitude'][angle]
    az_gt_opt = locations['sources'][h]['azimuth'][angle]
    error_opt = pra.doa.great_circ_dist(1., col, az, col_gt_opt, az_gt_opt)

    print(algo, h, angle, ': Err Man=', error_man, 'Opt=', error_opt)

    return {
            'algo' : algo,
            'angle' : angle,
            'spkr_height' : h,
            'loc_man' : (col_gt_man, az_gt_man),
            'loc_opt' : (col_gt_opt, az_gt_opt),
            'loc_doa' : (col, az),
            'error_man' : float(error_man),
            'error_opt' : float(error_opt),
            } 
Example 74
Project: pyramic-dataset   Author: fakufaku   File: segment.py    MIT License 4 votes vote down vote up
def open_segment_rigid(filename, noise_thresh=3, off_lo=150, off_hi=150, plot=False):
    global lengths, labels, basedir

    import re, os, sys
    import numpy
    from scipy.io import wavfile

    from matplotlib import pyplot

    sys.path.append(basedir)
    import ffmpeg_audio

    # the file to segment
    if os.path.splitext(filename)[1] == '.wav':
        rate, audio = wavfile.read(filename)
    else:
        rate, audio = ffmpeg_audio.read(filename)

    # find offset here
    noise_mean = numpy.mean(audio[:500, :], axis=0)
    noise_est = numpy.mean(numpy.std(audio[:500,:] - noise_mean[None,:], axis=0))

    # recursive code (method 2)
    boundaries = [0] + find_segments(audio, lengths[:3], noise_mean, noise_est, noise_thresh)
    boundaries[2] = boundaries[1] + lengths[1]

    for i in range(2, len(lengths)-1):
        boundaries.append(boundaries[-1] + lengths[i])

    # now extract
    signals = [audio[:boundaries[1]-off_lo,:]]

    # list of views
    for i in range(1, len(boundaries)-1, 2):
        b_lo, b_hi = boundaries[i], boundaries[i+1]
        signals.append(audio[b_lo-off_lo:b_hi+off_hi])

    # make a dictionary
    d = dict(zip(labels, signals))
    if plot:
        for label, signal in d.items():
            pyplot.figure()
            pyplot.plot(signal[:,0])
            pyplot.title(label)
        pyplot.show()

    # add the rate
    d['rate'] = rate

    return d 
Example 75
Project: ibllib   Author: int-brain-lab   File: training_audio.py    MIT License 4 votes vote down vote up
def extract_sound(ses_path, save=True, force=False, delete=False):
    """
    Simple audio features extraction for ambient sound characterization.
    From a wav file, generates several ALF files to be registered on Alyx

    :param ses_path: ALF full session path: (/mysubject001/YYYY-MM-DD/001)
    :param delete: if True, removes the wav file after processing
    :return: None
    """
    ses_path = Path(ses_path)
    wav_file = ses_path / 'raw_behavior_data' / '_iblrig_micData.raw.wav'
    out_folder = ses_path / 'raw_behavior_data'
    if not wav_file.exists():
        return None
    files_out = {'power': out_folder / '_iblmic_audioSpectrogram.power.npy',
                 'frequencies': out_folder / '_iblmic_audioSpectrogram.frequencies.npy',
                 'onset_times': out_folder / '_iblmic_audioOnsetGoCue.times_mic.npy',
                 'times_microphone': out_folder / '_iblmic_audioSpectrogram.times_mic.npy',
                 'times': out_folder / '_iblmic_audioSpectrogram.times.npy'
                 }
    # if they exist and the option Force is set to false, do not recompute and exit
    if all([files_out[f].exists() for f in files_out]) and not force:
        logger_.warning('Output exists. Skipping ' + str(wav_file) + ' Use force flag to override')
        return
    # crunch the wav file
    fs, wav = wavfile.read(wav_file, mmap=False)
    if len(wav) == 0:
        status = _fix_wav_file(wav_file)
        if status != 0:
            logger_.error(f"WAV Header Indicates empty file. Couldn't fix. Abort. {wav_file}")
            return
        else:
            fs, wav = wavfile.read(wav_file, mmap=False)
    tscale, fscale, W, detect = welchogram(fs, wav)
    # save files
    if save:
        out_folder.mkdir(exist_ok=True)
        np.save(file=files_out['power'], arr=W.astype(np.single))
        np.save(file=files_out['frequencies'], arr=fscale[None, :].astype(np.single))
        np.save(file=files_out['onset_times'], arr=detect)
        np.save(file=files_out['times_microphone'], arr=tscale[:, None].astype(np.single))
    # for the time scale, attempt to synchronize using onset sound detection and task data
    data = ioraw.load_data(ses_path)
    if data is None:  # if no session data, we're done
        return
    tgocue = np.array(ibllib.io.extractors.training_trials.get_goCueOnset_times(
        None, save=False, data=data))
    ilast = min(len(tgocue), len(detect))
    dt = tgocue[:ilast] - detect[: ilast]
    # only save if dt is consistent for the whole session
    if np.std(dt) < 0.2 and save:
        tscale += np.median(dt)
        np.save(file=files_out['times'], arr=tscale[:, None].astype(np.single))
    if delete:
        wav_file.unlink() 
Example 76
Project: osbpy   Author: KawaiiWafu   File: osbpy.py    MIT License 4 votes vote down vote up
def spectrum(
        wav_file, bar_file, mi, mx, har, start, end, posx, posy,
        layer, origin, gap=0, arrange="", radius=30, sine_height=6.1
):
    result = []
    frame_rate, snd = wavfile.read(wav_file)
    sound_info = snd[:, 0]
    specgram, frequencies, t, im = plt.specgram(sound_info, NFFT=1024, Fs=frame_rate, noverlap=5, mode='magnitude')
    n = 0
    rotation = 6.2831
    sine_pos = {}
    circle_pos = {}
    if arrange is "sine":
        sine_pos = sine(har, radius, sine_height)
        for i in range(har):
            circle_pos[i] = 0
    elif arrange is "circle":
        gap = 0
        sine_pos, circle_pos = circle(har, radius)
        rotation /= har
    else:
        for i in range(har):
            sine_pos[i] = 0
        for i in range(har):
            circle_pos[i] = 0
    maximum = plt.amax(specgram)
    minimum = plt.amin(specgram)
    position = 0
    while n < har:
        last_value = ((specgram[n][0] - minimum) / (maximum - minimum)) * (mx - mi) + mi
        last_value = math.ceil(last_value * 1000) / 1000
        last_time = int(round(t[0] * 1000))
        result.append(Osbject(
            bar_file, layer, origin, posx + position * gap + int(round(float(circle_pos[n]))),
            posy + int(round(float(sine_pos[n]))))
        )
        position += 1
        if arrange is "circle":
            result[n].rotate(
                0, start, start, math.ceil((1.5707 + n * rotation) * 1000) / 1000,
                math.ceil((1.5707 + n * rotation) * 1000) / 1000
            )
        for index, power in enumerate(specgram[n]):
            power = ((power - minimum) / (maximum - minimum)) * (mx - mi) + mi
            power = math.ceil(power * 1000) / 1000
            if power == last_value or int(round(t[index] * 1000)) < start or int(
                    round(t[index] * 1000)) > end or index % 2 is not 0:
                last_time = int(round(t[index] * 1000))
                continue
            else:
                result[n].vecscale(0, last_time, int(round(t[index] * 1000)), 1, last_value, 1, power)
                last_value = power
                last_time = int(round(t[index] * 1000))
        n += 1
    return result 
Example 77
Project: voicebot   Author: JustinaPetr   File: socketio_mod.py    GNU General Public License v3.0 4 votes vote down vote up
def blueprint(self, on_new_message):
        sio = socketio.Server()
        socketio_webhook = SocketBlueprint(sio, self.socketio_path,
                                           'socketio_webhook', __name__)

        @socketio_webhook.route("/", methods=['GET'])
        def health():
            return jsonify({"status": "ok"})

        @sio.on('connect', namespace=self.namespace)
        def connect(sid, environ):
            logger.debug("User {} connected to socketIO endpoint.".format(sid))

        @sio.on('disconnect', namespace=self.namespace)
        def disconnect(sid):
            logger.debug("User {} disconnected from socketIO endpoint."
                         "".format(sid))

        @sio.on('session_request', namespace=self.namespace)
        def session_request(sid, data):
            if data is None:
                data = {}
            if 'session_id' not in data or data['session_id'] is None:
                data['session_id'] = uuid.uuid4().hex
            sio.emit("session_confirm", data['session_id'], room=sid)
            logger.debug("User {} connected to socketIO endpoint."
                         "".format(sid))

        @sio.on(self.user_message_evt, namespace=self.namespace)
        def handle_message(sid, data):
            output_channel = SocketIOOutput(sio, sid, self.bot_message_evt)

            ##convert audio message to text and pass it to the Rasa Core
            ds = Model('models_stt/output_graph.pbmm', 26, 9, 'models_stt/alphabet.txt', 500)
            fs, audio = wav.read('LDC93S1.wav')
            audio_length = len(audio) * ( 1 / 16000)
            message = ds.stt(audio, fs)

            if self.session_persistence:
                if not data.get("session_id"):
                    logger.warning("A message without a valid sender_id "
                                   "was received. This message will be "
                                   "ignored. Make sure to set a proper "
                                   "session id using the "
                                   "`session_request` socketIO event.")
                    return
                sender_id = data['session_id']
            else:
                sender_id = sid

            message = UserMessage(data['message'], output_channel, sender_id,
                                  input_channel=self.name())
            on_new_message(message)

        return socketio_webhook 
Example 78
Project: BlockCIrculantRNN   Author: lz1313   File: libri_preprocess.py    Apache License 2.0 4 votes vote down vote up
def wav2feature(root_directory, save_directory, name, win_len, win_step, mode, feature_len, seq2seq, save):
    count = 0
    dirid = 0
    level = 'cha' if seq2seq is False else 'seq2seq'
    data_dir = os.path.join(root_directory, name)
    preprocess(data_dir)
    for subdir, dirs, files in os.walk(data_dir):
        for f in files:
            fullFilename = os.path.join(subdir, f)
            filenameNoSuffix =  os.path.splitext(fullFilename)[0]
            if f.endswith('.wav'):
                rate = None
                sig = None
                try:
                    (rate,sig)= wav.read(fullFilename)
                except ValueError as e:
                    if e.message == "File format 'NIST'... not understood.":
                        sf = Sndfile(fullFilename, 'r')
                    nframes = sf.nframes
                    sig = sf.read_frames(nframes)
                    rate = sf.samplerate
                feat = calcfeat_delta_delta(sig,rate,win_length=win_len,win_step=win_step,mode=mode,feature_len=feature_len)
                feat = preprocessing.scale(feat)
                feat = np.transpose(feat)
                print(feat.shape)
                labelFilename = filenameNoSuffix + '.label'
                with open(labelFilename,'r') as f:
                    characters = f.readline().strip().lower()
                targets = []
                if seq2seq is True:
                    targets.append(28)
                for c in characters:
                    if c == ' ':
                        targets.append(0)
                    elif c == "'":
                        targets.append(27)
                    else:
                        targets.append(ord(c)-96)
                if seq2seq is True:
                    targets.append(29)
                print(targets)
                if save:
                    count+=1
                    if count%4000 == 0:
                        dirid += 1
                    print('file index:',count)
                    print('dir index:',dirid)
                    label_dir = os.path.join(save_directory, level, name, str(dirid), 'label')
                    feat_dir = os.path.join(save_directory, level, name, str(dirid), 'feature')
                    if not os.path.isdir(label_dir):
                        os.makedirs(label_dir)
                    if not os.path.isdir(feat_dir):
                        os.makedirs(feat_dir)
                    featureFilename = os.path.join(feat_dir, filenameNoSuffix.split('/')[-1] +'.npy')
                    np.save(featureFilename,feat)
                    t_f = os.path.join(label_dir, filenameNoSuffix.split('/')[-1] +'.npy')
                    print(t_f)
                    np.save(t_f,targets) 
Example 79
Project: signaltrain   Author: drscotthawley   File: audio.py    GNU General Public License v3.0 4 votes vote down vote up
def read_audio_file(filename, sr=44100, mono=True, norm=False, device='cpu', dtype=np.float32, warn=True, fix_and_overwrite=False):
    """
    Generic wrapper for reading an audio file.
    Different libraries offer different speeds for this, so this routine is the
    'catch-all' for whatever read routine happens to work best

    Tries a fast method via scipy first, reverts to slower librosa when necessary.
    """
    # first try to read via scipy, because it's fast
    scipy_ok = False
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("error")    # scipy throws warnings which should be errors
        try:
            read_sr, signal = wavfile.read(filename)
            scipy_ok = True
        except wavfile.WavFileWarning:
            if warn:
                print("read_audio_file: Warning raised by scipy. ",end="")

    might_want_overwrite = False
    if scipy_ok:
        if mono and (len(signal.shape) > 1):     # convert to mono
            signal = signal[:,0]

        if isinstance(signal[0], np.int16):      # convert from ints to floats if necessary
            signal = np.array(signal/32767.0, dtype=dtype)   # change from [-32767..32767] to [-1..1]

        if read_sr != int(sr):
            print(f"read_audio_file: Got sample rate of {read_sr} Hz instead of {sr} Hz requested. Resampling.")
            signal = librosa.resample(signal, read_sr*1.0, sr*1.0, res_type='kaiser_fast')
            might_want_overwrite = True
    else:                                         # try librosa; it's slower but general
        if warn:
            print("Trying librosa.")
        signal, read_sr = librosa.core.load(filename, mono=mono, sr=sr, res_type='kaiser_fast')
        might_want_overwrite = True

    if fix_and_overwrite and might_want_overwrite:
        print(f"    Overwriting {filename} (so we don't have to use process as much again)")
        write_audio_file(filename, signal, sr)

    if signal.dtype != dtype:
        signal = signal.astype(dtype, copy=False)

    if norm:
        absmax = np.max(np.abs(signal))
        signal = signal/absmax if absmax > 0 else signal

    return signal, sr 
Example 80
Project: gm-cml   Author: wangyida   File: dataset_utils.py    BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def gtzan_music_speech_load(dst='gtzan_music_speech'):
    """Load the GTZAN Music and Speech dataset.

    Downloads the dataset if it does not exist into the dst directory.

    Parameters
    ----------
    dst : str, optional
        Location of GTZAN Music and Speech dataset.

    Returns
    -------
    Xs, ys : np.ndarray, np.ndarray
        Array of data, Array of labels
    """
    from scipy.io import wavfile

    if not os.path.exists(dst):
        gtzan_music_speech_download(dst)
    music_dir = os.path.join(os.path.join(dst, 'music_speech'), 'music_wav')
    music = [os.path.join(music_dir, file_i)
             for file_i in os.listdir(music_dir)
             if file_i.endswith('.wav')]
    speech_dir = os.path.join(os.path.join(dst, 'music_speech'), 'speech_wav')
    speech = [os.path.join(speech_dir, file_i)
              for file_i in os.listdir(speech_dir)
              if file_i.endswith('.wav')]
    Xs = []
    ys = []
    for i in music:
        sr, s = wavfile.read(i)
        s = s / 16384.0 - 1.0
        re, im = dft.dft_np(s)
        mag, phs = dft.ztoc(re, im)
        Xs.append((mag, phs))
        ys.append(0)
    for i in speech:
        sr, s = wavfile.read(i)
        s = s / 16384.0 - 1.0
        re, im = dft.dft_np(s)
        mag, phs = dft.ztoc(re, im)
        Xs.append((mag, phs))
        ys.append(1)
    Xs = np.array(Xs)
    Xs = np.transpose(Xs, [0, 2, 3, 1])
    ys = np.array(ys)
    return Xs, ys