Python scipy.io.wavfile.write() Examples

The following are code examples for showing how to use scipy.io.wavfile.write(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: Voiceprint-Recognition   Author: SunYanCN   File: mfcc.py    Apache License 2.0 7 votes vote down vote up
def generate_mfcc(sig, rate, sig_len, noise=None, noise_weight=0.1, winlen=0.03125, winstep=0.03125/2, numcep=13, nfilt=26, nfft=512, lowfreq=20, highfreq=4000, winfunc=np.hanning, ceplifter=0, preemph=0.97):
    if(len(sig) != sig_len):
        if(len(sig)< sig_len):
            sig = np.pad(sig, (0, sig_len - len(sig)), 'constant')
        if(len(sig) >sig_len):
            sig = sig[0:sig_len]
    # i dont know, 'tensorflow' normalization
    sig = sig.astype('float') / 32768

    if(noise is not None):
        noise = noise[random.randint(0, len(noise)-1)] # pick a noise
        start = random.randint(0, len(noise)-sig_len) # pick a sequence
        noise = noise[start:start+sig_len]
        noise = noise.astype('float')/32768
        sig = sig * (1-noise_weight) + noise * noise_weight
        #wav.write('noise_test.wav', rate, sig)
    mfcc_feat = mfcc(sig, rate, winlen=winlen, winstep=winstep, numcep=numcep, nfilt=nfilt, nfft=nfft, lowfreq=lowfreq,
                     highfreq=highfreq, winfunc=winfunc, ceplifter=ceplifter, preemph=preemph)
    mfcc_feat = mfcc_feat.astype('float32')
    return mfcc_feat 
Example 2
Project: blow   Author: joansj   File: audio.py    Apache License 2.0 6 votes vote down vote up
def synthesize(frames,filename,stride,sr=16000,deemph=0,ymax=0.98,normalize=False):
    # Generate stream
    y=torch.zeros((len(frames)-1)*stride+len(frames[0]))
    for i,x in enumerate(frames):
        y[i*stride:i*stride+len(x)]+=x
    # To numpy & deemph
    y=y.numpy().astype(np.float32)
    if deemph>0:
        y=deemphasis(y,alpha=deemph)
    # Normalize
    if normalize:
        y-=np.mean(y)
        mx=np.max(np.abs(y))
        if mx>0:
            y*=ymax/mx
    else:
        y=np.clip(y,-ymax,ymax)
    # To 16 bit & save
    wavfile.write(filename,sr,np.array(y*32767,dtype=np.int16))
    return y

######################################################################################################################## 
Example 3
Project: speech_separation   Author: bill9800   File: build_audio_database_v2.py    MIT License 6 votes vote down vote up
def split_to_mix(audio_path_list,database_repo=DATABASE_REPO_PATH,partition=2):
    # return split_list : (part1,part2,...)
    # each part : (idx,path)
    length = len(audio_path_list)
    part_len = length // partition
    head = 0
    part_idx = 0
    split_list = []
    while((head+part_len)<length):
        part = audio_path_list[head:(head+part_len)]
        split_list.append(part)
        with open('%s/single_TF_part%d.txt'%(database_repo,part_idx),'a') as f:
            for idx, _ in part:
                name = 'single-%05d' % idx
                f.write('%s.npy' % name)
                f.write('\n')
        head += part_len
        part_idx += 1
    return split_list

# mix single TF data 
Example 4
Project: signaltrain   Author: drscotthawley   File: io_methods.py    GNU General Public License v3.0 6 votes vote down vote up
def wavWrite(y, fs, nbits, audioFile):
		""" Write samples to WAV file
        Args:
            samples: (ndarray / 2D ndarray) (floating point) sample vector
                    	mono: DIM: nSamples
                    	stereo: DIM: nSamples x nChannels

            fs: 	(int) Sample rate in Hz
            nBits: 	(int) Number of bits
            fnWAV: 	(string) WAV file name to write
		"""
		if nbits == 8:
			intsamples = (y+1.0) * AudioIO.normFact['int' + str(nbits)]
			fX = np.int8(intsamples)
		elif nbits == 16:
			intsamples = y * AudioIO.normFact['int' + str(nbits)]
			fX = np.int16(intsamples)
		elif nbits > 16:
			fX = y

		write(audioFile, fs, fX) 
Example 5
Project: signaltrain   Author: drscotthawley   File: io_methods.py    GNU General Public License v3.0 6 votes vote down vote up
def sound(x,fs):
		""" Plays a wave file using the pyglet library. But first, it has to be written.
			Termination of the playback is being performed by any keyboard input and Enter.
			Args:
			x: 		   (array) Floating point samples
			fs:		   (int) The sampling rate
		"""
		import pyglet as pg
		global player
		# Call the writing function
		AudioIO.wavWrite(x, fs, 16, 'testPlayback.wav')
		# Initialize playback engine
		player = pg.media.Player()
		# Initialize the object with the audio file
		playback = pg.media.load('testPlayback.wav')
		# Set it to player
		player.queue(playback)
		# Sound call
		player.play()
		# Killed by "keyboard"
		kill = raw_input()
		if kill or kill == '':
			AudioIO.stop()
		# Remove the dummy wave write
		os.remove('testPlayback.wav') 
Example 6
Project: tacotron2decoder   Author: cnlinxi   File: train.py    MIT License 6 votes vote down vote up
def eval_step(sess, global_step, model, plot_dir, wav_dir, summary_writer, hparams):
	'''Evaluate model during training.
	Supposes that model variables are averaged.
	'''
	start_time = time.time()
	y_hat, y_target, loss = sess.run([model.y_hat, model.y_target, model.eval_loss])
	duration = time.time() - start_time
	log('Time Evaluation: Generation of {} audio frames took {:.3f} sec ({:.3f} frames/sec)'.format(
		len(y_target), duration, len(y_target)/duration))

	pred_wav_path = os.path.join(wav_dir, 'step-{}-pred.wav'.format(global_step))
	target_wav_path = os.path.join(wav_dir, 'step-{}-real.wav'.format(global_step))
	plot_path = os.path.join(plot_dir, 'step-{}-waveplot.png'.format(global_step))

	#Save Audio
	wavfile.write(pred_wav_path, hparams.sample_rate, y_hat)
	wavfile.write(target_wav_path, hparams.sample_rate, y_target)

	#Save figure
	util.waveplot(plot_path, y_hat, y_target, model._hparams)
	log('Eval loss for global step {}: {:.3f}'.format(global_step, loss))

	log('Writing eval summary!')
	add_test_stats(summary_writer, global_step, loss) 
Example 7
Project: tpse_tacotron2   Author: cnlinxi   File: train.py    MIT License 6 votes vote down vote up
def eval_step(sess, global_step, model, plot_dir, wav_dir, summary_writer, hparams):
	'''Evaluate model during training.
	Supposes that model variables are averaged.
	'''
	start_time = time.time()
	y_hat, y_target, loss = sess.run([model.y_hat, model.y_target, model.eval_loss])
	duration = time.time() - start_time
	log('Time Evaluation: Generation of {} audio frames took {:.3f} sec ({:.3f} frames/sec)'.format(
		len(y_target), duration, len(y_target)/duration))

	pred_wav_path = os.path.join(wav_dir, 'step-{}-pred.wav'.format(global_step))
	target_wav_path = os.path.join(wav_dir, 'step-{}-real.wav'.format(global_step))
	plot_path = os.path.join(plot_dir, 'step-{}-waveplot.png'.format(global_step))

	#Save Audio
	wavfile.write(pred_wav_path, hparams.sample_rate, y_hat)
	wavfile.write(target_wav_path, hparams.sample_rate, y_target)

	#Save figure
	util.waveplot(plot_path, y_hat, y_target, model._hparams)
	log('Eval loss for global step {}: {:.3f}'.format(global_step, loss))

	log('Writing eval summary!')
	add_test_stats(summary_writer, global_step, loss) 
Example 8
Project: silence-removal   Author: mauriciovander   File: segment.py    MIT License 6 votes vote down vote up
def process(self, data):
        if self.add_samples(data):
            while len(self.__buffer) >= self.__buffer_size:
                # Framing
                window = self.get_frame()
                # print('window size %i'%window.size)
                if self.vad(window):  # speech frame
                    print('voiced')
                    self.__out_buffer = numpy.append(self.__out_buffer, window)
                    self.__voice_detected = True
                elif self.__voice_detected:
                    print('unvoiced')
                    self.__voice_detected = False
                    self.__segment_count = self.__segment_count + 1
                    wf.write('%s.%i.%i.wav'%(sys.argv[2],self.__channel,self.__segment_count),sr,self.__out_buffer)
                    self.__out_buffer = numpy.array([],dtype=numpy.int16)
                    print(self.__segment_count)

                # print('__out_buffer size %i'%self.__out_buffer.size) 
Example 9
Project: Voiceprint-Recognition   Author: SunYanCN   File: mfcc.py    Apache License 2.0 6 votes vote down vote up
def generate_mfcc(sig, rate, sig_len, noise=None, noise_weight=0.1, winlen=0.03125, winstep=0.03125/2, numcep=13, nfilt=26, nfft=512, lowfreq=20, highfreq=4000, winfunc=np.hanning, ceplifter=0, preemph=0.97):
    if(len(sig) != sig_len):
        if(len(sig)< sig_len):
            sig = np.pad(sig, (0, sig_len - len(sig)), 'constant')
        if(len(sig) >sig_len):
            sig = sig[0:sig_len]

    # i dont know, 'tensorflow' normalization
    sig = sig.astype('float') / 32768

    if(noise is not None):
        noise = noise[random.randint(0, len(noise)-1)] # pick a noise
        start = random.randint(0, len(noise)-sig_len) # pick a sequence
        noise = noise[start:start+sig_len]
        noise = noise.astype('float')/32768
        sig = sig * (1-noise_weight) + noise * noise_weight
        #wav.write('noise_test.wav', rate, sig)
    mfcc_feat = mfcc(sig, rate, winlen=winlen, winstep=winstep, numcep=numcep, nfilt=nfilt, nfft=nfft, lowfreq=lowfreq,
                     highfreq=highfreq, winfunc=winfunc, ceplifter=ceplifter, preemph=preemph)
    mfcc_feat = mfcc_feat.astype('float32')
    return mfcc_feat 
Example 10
Project: tools   Author: kastnerkyle   File: audio_tools.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def run_phase_reconstruction_example():
    fs, d = fetch_sample_speech_tapestry()
    # actually gives however many components you say! So double what .m file
    # says
    fftsize = 512
    step = 64
    X_s = np.abs(stft(d, fftsize=fftsize, step=step, real=False,
                      compute_onesided=False))
    X_t = iterate_invert_spectrogram(X_s, fftsize, step, verbose=True)

    """
    import matplotlib.pyplot as plt
    plt.specgram(d, cmap="gray")
    plt.savefig("1.png")
    plt.close()
    plt.imshow(X_s, cmap="gray")
    plt.savefig("2.png")
    plt.close()
    """

    wavfile.write("phase_original.wav", fs, soundsc(d))
    wavfile.write("phase_reconstruction.wav", fs, soundsc(X_t)) 
Example 11
Project: tools   Author: kastnerkyle   File: audio_tools.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def run_fft_dct_example():
    random_state = np.random.RandomState(1999)

    fs, d = fetch_sample_speech_fruit()
    n_fft = 64
    X = d[0]
    X_stft = stft(X, n_fft)
    X_rr = complex_to_real_view(X_stft)
    X_dct = fftpack.dct(X_rr, axis=-1, norm='ortho')
    X_dct_sub = X_dct[1:] - X_dct[:-1]
    std = X_dct_sub.std(axis=0, keepdims=True)
    X_dct_sub += .01 * std * random_state.randn(
        X_dct_sub.shape[0], X_dct_sub.shape[1])
    X_dct_unsub = np.cumsum(X_dct_sub, axis=0)
    X_idct = fftpack.idct(X_dct_unsub, axis=-1, norm='ortho')
    X_irr = real_to_complex_view(X_idct)
    X_r = istft(X_irr, n_fft)[:len(X)]

    SNR = 20 * np.log10(np.linalg.norm(X - X_r) / np.linalg.norm(X))
    print(SNR)

    wavfile.write("fftdct_orig.wav", fs, soundsc(X))
    wavfile.write("fftdct_rec.wav", fs, soundsc(X_r)) 
Example 12
Project: tools   Author: kastnerkyle   File: audio_tools.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def run_ltsd_example():
    fs, d = fetch_sample_speech_tapestry()
    winsize = 1024
    d = d.astype("float32") / 2 ** 15
    d -= d.mean()

    pad = 3 * fs
    noise_pwr = np.percentile(d, 1) ** 2
    noise_pwr = max(1E-9, noise_pwr)
    d = np.concatenate((np.zeros((pad,)) + noise_pwr * np.random.randn(pad), d))
    _, vad_segments = ltsd_vad(d, fs, winsize=winsize)
    v_up = np.where(vad_segments == True)[0]
    s = v_up[0]
    st = v_up[-1] + int(.5 * fs)
    d = d[s:st]

    bname = "tapestry.wav".split(".")[0]
    wavfile.write("%s_out.wav" % bname, fs, soundsc(d)) 
Example 13
Project: irasl2018   Author: jfsantos   File: audio_tools.py    MIT License 6 votes vote down vote up
def run_phase_reconstruction_example():
    fs, d = fetch_sample_speech_tapestry()
    # actually gives however many components you say! So double what .m file
    # says
    fftsize = 512
    step = 64
    X_s = np.abs(stft(d, fftsize=fftsize, step=step, real=False,
                      compute_onesided=False))
    X_t = iterate_invert_spectrogram(X_s, fftsize, step, verbose=True)

    """
    import matplotlib.pyplot as plt
    plt.specgram(d, cmap="gray")
    plt.savefig("1.png")
    plt.close()
    plt.imshow(X_s, cmap="gray")
    plt.savefig("2.png")
    plt.close()
    """

    wavfile.write("phase_original.wav", fs, soundsc(d))
    wavfile.write("phase_reconstruction.wav", fs, soundsc(X_t)) 
Example 14
Project: irasl2018   Author: jfsantos   File: audio_tools.py    MIT License 6 votes vote down vote up
def run_fft_dct_example():
    random_state = np.random.RandomState(1999)

    fs, d = fetch_sample_speech_fruit()
    n_fft = 64
    X = d[0]
    X_stft = stft(X, n_fft)
    X_rr = complex_to_real_view(X_stft)
    X_dct = fftpack.dct(X_rr, axis=-1, norm='ortho')
    X_dct_sub = X_dct[1:] - X_dct[:-1]
    std = X_dct_sub.std(axis=0, keepdims=True)
    X_dct_sub += .01 * std * random_state.randn(
        X_dct_sub.shape[0], X_dct_sub.shape[1])
    X_dct_unsub = np.cumsum(X_dct_sub, axis=0)
    X_idct = fftpack.idct(X_dct_unsub, axis=-1, norm='ortho')
    X_irr = real_to_complex_view(X_idct)
    X_r = istft(X_irr, n_fft)[:len(X)]

    SNR = 20 * np.log10(np.linalg.norm(X - X_r) / np.linalg.norm(X))
    print(SNR)

    wavfile.write("fftdct_orig.wav", fs, soundsc(X))
    wavfile.write("fftdct_rec.wav", fs, soundsc(X_r)) 
Example 15
Project: Black-Box-Audio   Author: rtaori   File: run_audio_attack.py    MIT License 5 votes vote down vote up
def save_wav(audio, output_wav_file):
    wav.write(output_wav_file, 16000, np.array(np.clip(np.round(audio), -2**15, 2**15-1), dtype=np.int16))
    print('output dB', db(audio)) 
Example 16
Project: Griffin_lim   Author: candlewill   File: audio.py    MIT License 5 votes vote down vote up
def save_wav(wav, path):
    wav *= 32767 / max(0.01, np.max(np.abs(wav)))
    wavfile.write(path, hparams.sample_rate, wav.astype(np.int16)) 
Example 17
Project: voice-recognition   Author: golabies   File: read_data.py    MIT License 5 votes vote down vote up
def save_data(self, name='out_put'):
        # be careful last file will delete
        self.name = name
        for i in range(len(self.voice)):
            wavfile.write(self.name+'_'+str(i)+'.wav', rate=len(self.voice[0]), data=self.voice[i]) 
Example 18
Project: Deep_VoiceChanger   Author: pstuvwx   File: gla_gpu.py    MIT License 5 votes vote down vote up
def save(path, bps, data):
        if data.dtype != np.int16:
            data = data.astype(np.int16)
        data = np.reshape(data, -1)
        wav.write(path, bps, data) 
Example 19
Project: Deep_VoiceChanger   Author: pstuvwx   File: gla_util.py    MIT License 5 votes vote down vote up
def save(path, bps, data):
        if data.dtype != np.int16:
            data = data.astype(np.int16)
        data = np.reshape(data, -1)
        wav.write(path, bps, data) 
Example 20
Project: Deep_VoiceChanger   Author: pstuvwx   File: dataset.py    MIT License 5 votes vote down vote up
def save(path, bps, data):
    if data.dtype != np.int16:
        data = data.astype(np.int16)
    data = np.reshape(data, -1)
    wav.write(path, bps, data) 
Example 21
Project: fine-lm   Author: akzaidi   File: speech_recognition.py    MIT License 5 votes vote down vote up
def decode(self, ids):
    """Transform a sequence of float32 into a waveform.

    Args:
      ids: list of integers to be converted.

    Returns:
      Path to the temporary file where the waveform was saved.

    Raises:
      ValueError: if the ids are not of the appropriate size.
    """
    _, tmp_file_path = tempfile.mkstemp()
    wavfile.write(tmp_file_path, self._sample_rate, np.asarray(ids))
    return tmp_file_path 
Example 22
Project: speech_separation   Author: bill9800   File: AVHandler.py    MIT License 5 votes vote down vote up
def mix(loc,name,file1,file2,start,end,trim_clean=False):
    # mix the audio/video via sox
    # loc         | location of the mix files
    # name        | output name of wav
    # file1       | first file to mix
    # file2       | second file to mix
    # start       | mixture starting time
    # end         | mixture end time
    # trim_clean  | delete the trim file or not
    command = 'cd %s;' % loc
    cut(loc,file1,start,end)
    cut(loc,file2,start,end)
    trim1 = '%s/trim_%s.wav' % (loc,file1)
    trim2 = '%s/trim_%s.wav' % (loc,file2)
    with open(trim1, 'rb') as f:
        wav1, wav1_sr = librosa.load(trim1, sr=None)  # time series data,sample rate
    with open(trim2, 'rb') as f:
        wav2, wav2_sr = librosa.load(trim2, sr=None)

    # compress the audio to same volume level
    wav1 = wav1 / np.max(wav1)
    wav2 = wav2 / np.max(wav2)
    assert wav1_sr == wav2_sr
    mix_wav = wav1*0.5+wav2*0.5

    path = '%s/%s.wav' % (loc,name)
    wavfile.write(path,wav1_sr,mix_wav)
    if trim_clean:
        command += 'rm trim_%s.wav;rm trim_%s.wav;' % (file1,file2)
    os.system(command) 
Example 23
Project: speech_separation   Author: bill9800   File: build_audio_database_v2.py    MIT License 5 votes vote down vote up
def single_audio_to_npy(audio_path_list,database_repo=DATABASE_REPO_PATH,fix_sr=16000):
    for idx,path in audio_path_list:
        print('\rsingle npy generating... %d'%((idx/len(audio_path_list))*100),end='')
        data, _ = librosa.load(path, sr=fix_sr)
        data = utils.fast_stft(data)
        name = 'single-%05d'%idx
        with open('%s/single_TF.txt'%database_repo,'a') as f:
            f.write('%s.npy'%name)
            f.write('\n')
        np.save(('%s/single/%s.npy'%(database_repo,name)),data)
    print()


# split single TF data to different part in order to mix 
Example 24
Project: speech_separation   Author: bill9800   File: build_audio_database_v2.py    MIT License 5 votes vote down vote up
def single_mix(combo_idx,split_list,database_repo):
    assert len(combo_idx) == len(split_list)
    mix_rate = 1.0 / float(len(split_list))
    wav_list = []
    prefix = "mix"
    mid_name = ""

    for part_idx in range(len(split_list)):
        idx,path = split_list[part_idx][combo_idx[part_idx]]
        wav, _ = librosa.load(path, sr=16000)
        wav_list.append(wav)
        mid_name += '-%05d' % idx

    # mix wav file
    mix_wav = np.zeros_like(wav_list[0])
    for wav in wav_list:
        mix_wav += wav * mix_rate

    # save mix wav file
    wav_name = prefix+mid_name+'.wav'
    wavfile.write('%s/mix_wav/%s'%(database_repo,wav_name),16000,mix_wav)

    # transfer mix wav to TF domain
    F_mix = utils.fast_stft(mix_wav)
    name = prefix+mid_name+".npy"
    store_path = '%s/mix/%s'%(database_repo,name)

    # save mix as npy file
    np.save(store_path,F_mix)

    # save mix log
    with open('%s/mix_log.txt'%database_repo,'a') as f:
        f.write(name)
        f.write("\n") 
Example 25
Project: speech_separation   Author: bill9800   File: build_audio_database_v2.py    MIT License 5 votes vote down vote up
def single_crm(idx_str_list,mix_path,database_repo):
    F_mix = np.load(mix_path)
    mid_name = ""
    mix_name = "mix"
    dataset_line = ""

    for idx in idx_str_list:
        mid_name += "-%s"%idx
        mix_name += "-%s"%idx
    mix_name += '.npy'
    dataset_line += mix_name

    for idx in idx_str_list:
        single_name = 'single-%s.npy'%idx
        path = '%s/single/%s'%(database_repo,single_name)
        F_single = np.load(path)
        cRM = utils.fast_cRM(F_single,F_mix)

        last_name = '-%s'%idx
        cRM_name = 'crm' + mid_name + last_name + '.npy'

        # save crm to npy
        store_path = '%s/crm/%s'%(database_repo,cRM_name)
        np.save(store_path,cRM)

        # save crm information to log
        with open('%s/crm_log.txt'%database_repo, 'a') as f:
            f.write(cRM_name)
            f.write('\n')
        dataset_line += (" "+cRM_name)

    # write in database log
    with open('%s/dataset.txt'%database_repo,'a') as f:
        f.write(dataset_line)
        f.write('\n') 
Example 26
Project: LaserTOF   Author: kyleuckert   File: test_wavfile.py    MIT License 5 votes vote down vote up
def _check_roundtrip(realfile, rate, dtype, channels):
    if realfile:
        fd, tmpfile = tempfile.mkstemp(suffix='.wav')
        os.close(fd)
    else:
        tmpfile = BytesIO()
    try:
        data = np.random.rand(100, channels)
        if channels == 1:
            data = data[:,0]
        if dtype.kind == 'f':
            # The range of the float type should be in [-1, 1]
            data = data.astype(dtype)
        else:
            data = (data*128).astype(dtype)

        wavfile.write(tmpfile, rate, data)

        for mmap in [False, True]:
            rate2, data2 = wavfile.read(tmpfile, mmap=mmap)

            assert_equal(rate, rate2)
            assert_(data2.dtype.byteorder in ('<', '=', '|'), msg=data2.dtype)
            assert_array_equal(data, data2)

            del data2
    finally:
        if realfile:
            os.unlink(tmpfile) 
Example 27
Project: devicehive-audio-analysis   Author: devicehive   File: capture.py    Apache License 2.0 5 votes vote down vote up
def _process_loop(self):
        with WavProcessor() as proc:
            self._ask_data.set()
            while True:
                if self._process_buf is None:
                    # Waiting for data to process
                    time.sleep(self._processor_sleep_time)
                    continue

                self._ask_data.clear()
                if self._save_path:
                    f_path = os.path.join(
                        self._save_path, 'record_{:.0f}.wav'.format(time.time())
                    )
                    wavfile.write(f_path, self._sample_rate, self._process_buf)
                    logger.info('"{}" saved.'.format(f_path))

                logger.info('Start processing.')
                predictions = proc.get_predictions(
                    self._sample_rate, self._process_buf)
                logger.info(
                    'Predictions: {}'.format(format_predictions(predictions))
                )

                logger.info('Stop processing.')
                self._process_buf = None
                self._ask_data.set() 
Example 28
Project: devicehive-audio-analysis   Author: devicehive   File: daemon.py    Apache License 2.0 5 votes vote down vote up
def _process_loop(self):
        with WavProcessor() as proc:
            self._ask_data_event.set()
            while self.is_running:
                if self._process_buf is None:
                    # Waiting for data to process
                    time.sleep(self._processor_sleep_time)
                    continue

                self._ask_data_event.clear()
                if self._save_path:
                    f_path = os.path.join(
                        self._save_path, 'record_{:.0f}.wav'.format(time.time())
                    )
                    wavfile.write(f_path, self._sample_rate, self._process_buf)
                    logger.info('"{}" saved'.format(f_path))

                logger.info('Start processing')
                predictions = proc.get_predictions(
                    self._sample_rate, self._process_buf)
                formatted = format_predictions(predictions)
                logger.info('Predictions: {}'.format(formatted))

                self.events_queue.append((datetime.datetime.now(), formatted))
                self._send_dh(predictions)

                logger.info('Stop processing')
                self._process_buf = None
                self._ask_data_event.set() 
Example 29
Project: Tacotron-2   Author: cpuimage   File: audio.py    MIT License 5 votes vote down vote up
def save_wav(wav, path, sr):
    wav *= 32767 / max(0.01, np.max(np.abs(wav)))
    wavfile.write(path, sr, wav.astype(np.int16)) 
Example 30
Project: pyramic-dataset   Author: fakufaku   File: run_experiment.py    MIT License 5 votes vote down vote up
def save_audio(buf):
    global current_record, SAMPLING_FREQUENCY, RECORD_FOLDER, RECORD_FILENAME
    filename = '/'.join([RECORD_FOLDER, RECORD_FILENAME.format(**current_record)])
    wavfile.write(filename, SAMPLING_FREQUENCY, buf)
    print(filename, 'done with', len(buf), 'samples')
    current_record['done'] = True 
Example 31
Project: waveglow   Author: npuichigo   File: generate.py    Apache License 2.0 5 votes vote down vote up
def write_wav(wav, sample_rate, filename):
    max_value_16bit = (1 << 15) - 1
    wav *= max_value_16bit
    wavfile.write(filename, sample_rate, wav.astype(np.int16))
    logger.info('Updated wav file at {}'.format(filename)) 
Example 32
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_wavfile.py    GNU General Public License v3.0 5 votes vote down vote up
def _check_roundtrip(realfile, rate, dtype, channels):
    if realfile:
        fd, tmpfile = tempfile.mkstemp(suffix='.wav')
        os.close(fd)
    else:
        tmpfile = BytesIO()
    try:
        data = np.random.rand(100, channels)
        if channels == 1:
            data = data[:,0]
        if dtype.kind == 'f':
            # The range of the float type should be in [-1, 1]
            data = data.astype(dtype)
        else:
            data = (data*128).astype(dtype)

        wavfile.write(tmpfile, rate, data)

        for mmap in [False, True]:
            rate2, data2 = wavfile.read(tmpfile, mmap=mmap)

            assert_equal(rate, rate2)
            assert_(data2.dtype.byteorder in ('<', '=', '|'), msg=data2.dtype)
            assert_array_equal(data, data2)

            del data2
    finally:
        if realfile:
            os.unlink(tmpfile) 
Example 33
Project: signaltrain   Author: drscotthawley   File: audio.py    GNU General Public License v3.0 5 votes vote down vote up
def write_audio_file(filename, data, sr=44100):
    wavfile.write(filename, sr, data)
    #librosa.output.write_wav(filename, data, sr)
    #torchaudio.save(filename, torch.Tensor(data).unsqueeze(1), sr)
    return 
Example 34
Project: tensor2tensor   Author: tensorflow   File: audio_encoder.py    Apache License 2.0 5 votes vote down vote up
def decode(self, ids):
    """Transform a sequence of float32 into a waveform.

    Args:
      ids: list of integers to be converted.

    Returns:
      Path to the temporary file where the waveform was saved.

    Raises:
      ValueError: if the ids are not of the appropriate size.
    """
    _, tmp_file_path = tempfile.mkstemp()
    wavfile.write(tmp_file_path, self._sample_rate, np.asarray(ids))
    return tmp_file_path 
Example 35
Project: tacotron2decoder   Author: cnlinxi   File: audio.py    MIT License 5 votes vote down vote up
def save_wav(wav, path, sr):
	wav *= 32767 / max(0.01, np.max(np.abs(wav)))
	#proposed by @dsmiller
	wavfile.write(path, sr, wav.astype(np.int16)) 
Example 36
Project: BERT   Author: yyht   File: audio_encoder.py    Apache License 2.0 5 votes vote down vote up
def decode(self, ids):
    """Transform a sequence of float32 into a waveform.

    Args:
      ids: list of integers to be converted.

    Returns:
      Path to the temporary file where the waveform was saved.

    Raises:
      ValueError: if the ids are not of the appropriate size.
    """
    _, tmp_file_path = tempfile.mkstemp()
    wavfile.write(tmp_file_path, self._sample_rate, np.asarray(ids))
    return tmp_file_path 
Example 37
Project: tpse_tacotron2   Author: cnlinxi   File: audio.py    MIT License 5 votes vote down vote up
def save_wav(wav, path, sr):
	wav *= 32767 / max(0.01, np.max(np.abs(wav)))
	#proposed by @dsmiller
	wavfile.write(path, sr, wav.astype(np.int16)) 
Example 38
Project: dsp   Author: nick-thompson   File: utils.py    MIT License 5 votes vote down vote up
def write_wav(name, sr, arr):
    """
    Write a numpy array to disk as a little-endian 32-bit signed WAV file.

    Parameters
    name : Output file name
    sr : Output sample rate
    arr : The numpy array to write
    """
    factor = 2**31 - 1
    output = (arr * factor).astype('<i4')
    wavfile.write(name, sr, output) 
Example 39
Project: dsp   Author: nick-thompson   File: utils.py    MIT License 5 votes vote down vote up
def write_pcm(name, arr):
    """
    Write a numpy array to disk as a PCM file. Virtually the same as `write_wav`
    above, but the resulting file doesn't have the WAV header.

    Parameters
    name : Output file name
    arr : The numpy array to write
    """
    factor = 2**31 - 1
    output = (arr * factor).astype('<i4')
    output.tofile(name) 
Example 40
Project: F2CNN   Author: tictacmenthe   File: Evaluating.py    Apache License 2.0 5 votes vote down vote up
def EvaluateWithNoise(file, LPF=False, CUTOFF=100, model='last_trained_model', CENTER_FREQUENCIES=None,
                      FILTERBANK_COEFFICIENTS=None, SNRdB=-3):
    print("File:\t\t{}".format(file))
    print("Appyling gaussian noise, new SNR is {SNR}dB".format(SNR=SNRdB))
    framerate, wavList = GetArrayFromWAV(file)
    # Generating noise
    noise = numpy.random.normal(scale=RMS(wavList) / SNRdbToSNRlinear(SNRdB), size=wavList.shape[0])
    output = noise + wavList

    # Creating and saving the new wav file
    os.makedirs(os.path.join('OutputWavFiles', 'addedNoise'), exist_ok=True)
    baseName = os.path.join('OutputWavFiles', 'addedNoise',
                            os.path.split(os.path.splitext(file)[0])[1]) + '{SNR}dB'.format(SNR=SNRdB)
    newPath = baseName + '.WAV'
    srcBasename = os.path.splitext(file)[0]

    wavfile.write(newPath, framerate, output)
    try:
        copyfile(srcBasename + '.FB', baseName + '.FB')
        copyfile(srcBasename + '.PHN', baseName + '.PHN')
        copyfile(srcBasename + '.WRD', baseName + '.WRD')
    except FileNotFoundError as e:
        print(e.strerror)
        print("No .FB or .PHN or .WRD files.")

    print('New noisy WAVE file saved as', newPath)
    EvaluateOneWavArray(output, framerate, newPath, model=model, LPF=LPF, CUTOFF=CUTOFF, CENTER_FREQUENCIES=CENTER_FREQUENCIES, FILTERBANK_COEFFICIENTS=FILTERBANK_COEFFICIENTS)

    print("\t\t{}\tdone !".format(file)) 
Example 41
Project: ble5-nrf52-mac   Author: tomasero   File: test_wavfile.py    MIT License 5 votes vote down vote up
def _check_roundtrip(realfile, rate, dtype, channels):
    if realfile:
        fd, tmpfile = tempfile.mkstemp(suffix='.wav')
        os.close(fd)
    else:
        tmpfile = BytesIO()
    try:
        data = np.random.rand(100, channels)
        if channels == 1:
            data = data[:,0]
        if dtype.kind == 'f':
            # The range of the float type should be in [-1, 1]
            data = data.astype(dtype)
        else:
            data = (data*128).astype(dtype)

        wavfile.write(tmpfile, rate, data)

        for mmap in [False, True]:
            rate2, data2 = wavfile.read(tmpfile, mmap=mmap)

            assert_equal(rate, rate2)
            assert_(data2.dtype.byteorder in ('<', '=', '|'), msg=data2.dtype)
            assert_array_equal(data, data2)

            del data2
    finally:
        if realfile:
            os.unlink(tmpfile) 
Example 42
Project: DCASE2017-task1   Author: ronggong   File: audioChannelSep.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def channelSep(filename_wav, path_dcase):
    """
    Separate stereo audio into left, right, average and difference
    :param filename_wav:
    :return:
    """
    LOADER = es.AudioLoader(filename=filename_wav)
    audio, sr, num_chan, md5, bitrate, codec = LOADER()
    filename_wav = os.path.basename(filename_wav)
    print(filename_wav)
    wavfile.write(os.path.join(path_dcase, 'audio_left', filename_wav), sr, audio[:, 0])
    wavfile.write(os.path.join(path_dcase, 'audio_right', filename_wav), sr, audio[:, 1])
    wavfile.write(os.path.join(path_dcase, 'audio_average', filename_wav), sr, audio[:, 0]/2.0+audio[:, 1]/2.0)
    wavfile.write(os.path.join(path_dcase, 'audio_difference', filename_wav), sr, audio[:, 0]-audio[:, 1]) 
Example 43
Project: vae_tacotron2   Author: rishikksh20   File: audio.py    MIT License 5 votes vote down vote up
def save_wav(wav, path):
	wav *= 32767 / max(0.01, np.max(np.abs(wav))) 
	#proposed by @dsmiller
	wavfile.write(path, hparams.sample_rate, wav.astype(np.int16)) 
Example 44
Project: SparseRecurrentNetwork   Author: eidonfiloi   File: audio_data_utils.py    MIT License 5 votes vote down vote up
def write_np_as_wav(X, sample_rate, filename):
    Xnew = X * 32767.0
    Xnew = Xnew.astype('int16')
    #Xnew = np.subtract(Xnew.astype('int16'), np.ones(Xnew.shape))
    print np.asarray(Xnew[0:100]).tolist()
    wav.write(filename, sample_rate, Xnew)
    return 
Example 45
Project: Computable   Author: ktraunmueller   File: test_wavfile.py    MIT License 5 votes vote down vote up
def _check_roundtrip(realfile, rate, dtype, channels):
    if realfile:
        fd, tmpfile = tempfile.mkstemp(suffix='.wav')
        os.close(fd)
    else:
        tmpfile = BytesIO()
    try:
        data = np.random.rand(100, channels)
        if channels == 1:
            data = data[:,0]
        if dtype.kind == 'f':
            # The range of the float type should be in [-1, 1]
            data = data.astype(dtype)
        else:
            data = (data*128).astype(dtype)

        wavfile.write(tmpfile, rate, data)

        for mmap in [False, True]:
            rate2, data2 = wavfile.read(tmpfile, mmap=mmap)

            assert_equal(rate, rate2)
            assert_(data2.dtype.byteorder in ('<', '=', '|'), msg=data2.dtype)
            assert_array_equal(data, data2)

            del data2
    finally:
        if realfile:
            os.unlink(tmpfile) 
Example 46
Project: poker   Author: surgebiswas   File: test_wavfile.py    MIT License 5 votes vote down vote up
def _check_roundtrip(realfile, rate, dtype, channels):
    if realfile:
        fd, tmpfile = tempfile.mkstemp(suffix='.wav')
        os.close(fd)
    else:
        tmpfile = BytesIO()
    try:
        data = np.random.rand(100, channels)
        if channels == 1:
            data = data[:,0]
        if dtype.kind == 'f':
            # The range of the float type should be in [-1, 1]
            data = data.astype(dtype)
        else:
            data = (data*128).astype(dtype)

        wavfile.write(tmpfile, rate, data)

        for mmap in [False, True]:
            rate2, data2 = wavfile.read(tmpfile, mmap=mmap)

            assert_equal(rate, rate2)
            assert_(data2.dtype.byteorder in ('<', '=', '|'), msg=data2.dtype)
            assert_array_equal(data, data2)

            del data2
    finally:
        if realfile:
            os.unlink(tmpfile) 
Example 47
Project: WaveNet   Author: MU94W   File: generate.py    MIT License 5 votes vote down vote up
def main():
    args = get_arguments()
    if hasattr(args, 'hyper_param_path'):
        hp = HyperParams(param_json_path=args.hyper_param_path)
    else:
        hp = HyperParams()

    with tf.variable_scope('data'):
        waveform_seed_placeholder = tf.placeholder(name='waveform_seed', shape=(None, 1, 1), dtype=tf.int32)
        max_infer_samples_placeholder = tf.placeholder(name='max_infer_samples', shape=(), dtype=tf.int32)

    with tf.variable_scope('model'):
        model = WaveNet(waveform_seed_placeholder, max_infer_samples_placeholder, hyper_params=hp)

    saver = tf.train.Saver()

    config = tf.ConfigProto()
    #config.gpu_options.allow_growth = True
    #config.device_count = {'CPU': 24}
    #config.intra_op_parallelism_threads = 0
    #config.inter_op_parallelism_threads = 0
    with tf.Session(config=config) as sess:
        model.sess = sess
        save_path = '/home/tpog/Lab_2017_end/WaveNet/exp/Y2017_M10_D30_h1_m48_s27/save'
        ckpt = tf.train.get_checkpoint_state(save_path)
        assert ckpt, '[E] No trained model found!'
        ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
        saver.restore(sess, os.path.join(save_path, ckpt_name))

        rnd_seed = np.random.randint(low=0, high=hp.waveform_categories, size=(args.gen_samples, 1, 1), dtype=np.int32)
        begin_time = time.time()
        global_step_eval = sess.run(model.global_step)
        pred_wav_eval = sess.run(model.pred_wav, feed_dict={waveform_seed_placeholder: rnd_seed,
                                                            max_infer_samples_placeholder: args.max_infer_samples})
        used_time = time.time() - begin_time
        print(f'Generate {args.gen_samples} waves, per {args.max_infer_samples} samples, use {used_time} seconds.')
        for idx, single_wav in enumerate(pred_wav_eval):
            siowav.write('step_{}_pred_{}.wav'.format(global_step_eval, idx), data=single_wav, rate=hp.sample_rate) 
Example 48
Project: ADAGIO   Author: nilakshdas   File: attack.py    MIT License 5 votes vote down vote up
def convert_mp3(new, lengths):
    import pydub
    wav.write("/tmp/load.wav", 16000,
              np.array(np.clip(np.round(new[0][:lengths[0]]),
                               -2**15, 2**15-1),dtype=np.int16))
    pydub.AudioSegment.from_wav("/tmp/load.wav").export("/tmp/saved.mp3")
    raw = pydub.AudioSegment.from_mp3("/tmp/saved.mp3")
    mp3ed = np.array([struct.unpack("<h", raw.raw_data[i:i+2])[0] for i in range(0,len(raw.raw_data),2)])[np.newaxis,:lengths[0]]
    return mp3ed 
Example 49
Project: chainerui   Author: chainer   File: audio_report.py    MIT License 5 votes vote down vote up
def _set_wav_writer():
    try:
        from scipy.io.wavfile import write

        def scipy_write_wav(out, data, rate):
            return write(out, rate, data)

        return scipy_write_wav
    except (ImportError, TypeError):
        return None 
Example 50
Project: tools   Author: kastnerkyle   File: test_audio_extract.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def run_world_mgc_example():
    # run on chromebook
    # enc 839.71
    # synth 48.79
    fs, d = fetch_sample_speech_tapestry()
    d = d.astype("float32") / 2 ** 15

    # harcoded for 16k from
    # https://github.com/CSTR-Edinburgh/merlin/blob/master/misc/scripts/vocoder/world/extract_features_for_merlin.sh
    mgc_alpha = 0.58
    #mgc_order = 59
    mgc_order = 59
    # this is actually just mcep
    mgc_gamma = 0.0

    def enc():
        temporal_positions_h, f0_h, vuv_h, f0_candidates_h = harvest(d, fs)
        temporal_positions_ct, spectrogram_ct, fs_ct = cheaptrick(d, fs,
                temporal_positions_h, f0_h, vuv_h)
        temporal_positions_d4c, f0_d4c, vuv_d4c, aper_d4c, coarse_aper_d4c = d4c(d, fs,
                temporal_positions_h, f0_h, vuv_h)

        mgc_arr = sp2mgc(spectrogram_ct, mgc_order, mgc_alpha, mgc_gamma,
                verbose=True)
        return mgc_arr, spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c


    start = time.time()
    mgc_arr, spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c = enc()
    enc_done = time.time()

    sp_r = mgc2sp(mgc_arr, mgc_alpha, mgc_gamma, fs=fs, verbose=True)
    synth_done = time.time()

    print("enc time: {}".format(enc_done - start))
    print("synth time: {}".format(synth_done - enc_done))
    y = world_synthesis(f0_d4c, vuv_d4c, coarse_aper_d4c, sp_r, fs)
    #y = world_synthesis(f0_d4c, vuv_d4c, aper_d4c, sp_r, fs)
    wavfile.write("out_mgc.wav", fs, soundsc(y)) 
Example 51
Project: tools   Author: kastnerkyle   File: test_audio_extract.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def run_world_base_example():
    # on chromebook
    # enc 114.229
    # synth 5.165
    fs, d = fetch_sample_speech_tapestry()
    d = d.astype("float32") / 2 ** 15

    def enc():
        temporal_positions_h, f0_h, vuv_h, f0_candidates_h = harvest(d, fs)
        temporal_positions_ct, spectrogram_ct, fs_ct = cheaptrick(d, fs,
                temporal_positions_h, f0_h, vuv_h)
        temporal_positions_d4c, f0_d4c, vuv_d4c, aper_d4c, coarse_aper_d4c = d4c(d, fs,
                temporal_positions_h, f0_h, vuv_h)

        return spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c


    start = time.time()
    spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c = enc()
    enc_done = time.time()

    y = world_synthesis(f0_d4c, vuv_d4c, coarse_aper_d4c, spectrogram_ct, fs)
    synth_done = time.time()

    print("enc time: {}".format(enc_done - start))
    print("synth time: {}".format(synth_done - enc_done))
    #y = world_synthesis(f0_d4c, vuv_d4c, aper_d4c, sp_r, fs)
    wavfile.write("out_base.wav", fs, soundsc(y)) 
Example 52
Project: tools   Author: kastnerkyle   File: test_audio_extract.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def run_world_dct_example():
    # on chromebook
    # enc 114.229
    # synth 5.165
    fs, d = fetch_sample_speech_tapestry()
    d = d.astype("float32") / 2 ** 15

    def enc():
        temporal_positions_h, f0_h, vuv_h, f0_candidates_h = harvest(d, fs)
        temporal_positions_ct, spectrogram_ct, fs_ct = cheaptrick(d, fs,
                temporal_positions_h, f0_h, vuv_h)
        temporal_positions_d4c, f0_d4c, vuv_d4c, aper_d4c, coarse_aper_d4c = d4c(d, fs,
                temporal_positions_h, f0_h, vuv_h)

        return spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c

    start = time.time()
    spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c = enc()
    dct_buf = fftpack.dct(spectrogram_ct)
    n_fft = 512
    n_dct = 20
    dct_buf = dct_buf[:, :n_dct]
    idct_buf = np.zeros((dct_buf.shape[0], n_fft + 1))
    idct_buf[:, :n_dct] = dct_buf
    ispectrogram_ct = fftpack.idct(idct_buf)
    enc_done = time.time()

    y = world_synthesis(f0_d4c, vuv_d4c, coarse_aper_d4c, spectrogram_ct, fs)
    synth_done = time.time()

    print("enc time: {}".format(enc_done - start))
    print("synth time: {}".format(synth_done - enc_done))
    #y = world_synthesis(f0_d4c, vuv_d4c, aper_d4c, sp_r, fs)
    wavfile.write("out_dct.wav", fs, soundsc(y))


#run_world_mgc_example()
#run_world_base_example() 
Example 53
Project: tools   Author: kastnerkyle   File: audio_tools.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def download(url, server_fname, local_fname=None, progress_update_percentage=5,
             bypass_certificate_check=False):
    """
    An internet download utility modified from
    http://stackoverflow.com/questions/22676/
    how-do-i-download-a-file-over-http-using-python/22776#22776
    """
    if bypass_certificate_check:
        import ssl
        ctx = ssl.create_default_context()
        ctx.check_hostname = False
        ctx.verify_mode = ssl.CERT_NONE
        u = urllib.urlopen(url, context=ctx)
    else:
        u = urllib.urlopen(url)
    if local_fname is None:
        local_fname = server_fname
    full_path = local_fname
    meta = u.info()
    with open(full_path, 'wb') as f:
        try:
            file_size = int(meta.get("Content-Length"))
        except TypeError:
            print("WARNING: Cannot get file size, displaying bytes instead!")
            file_size = 100
        print("Downloading: %s Bytes: %s" % (server_fname, file_size))
        file_size_dl = 0
        block_sz = int(1E7)
        p = 0
        while True:
            buffer = u.read(block_sz)
            if not buffer:
                break
            file_size_dl += len(buffer)
            f.write(buffer)
            if (file_size_dl * 100. / file_size) > p:
                status = r"%10d  [%3.2f%%]" % (file_size_dl, file_size_dl *
                                               100. / file_size)
                print(status)
                p += progress_update_percentage 
Example 54
Project: tools   Author: kastnerkyle   File: audio_tools.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def run_cqt_example():
    try:
        fs, d = fetch_sample_file("/Users/User/cqt_resources/kempff1.wav")
    except ValueError:
        print("WARNING: Using sample music instead but kempff1.wav is the example")
        fs, d = fetch_sample_music()
    X = d[:44100]
    X_cq, c_dc, c_nyq, multiscale, shift, window_lens = cqt(X, fs)
    X_r = icqt(X_cq, c_dc, c_nyq, multiscale, shift, window_lens)
    SNR = 20 * np.log10(np.linalg.norm(X - X_r) / np.linalg.norm(X))
    wavfile.write("cqt_original.wav", fs, soundsc(X))
    wavfile.write("cqt_reconstruction.wav", fs, soundsc(X_r)) 
Example 55
Project: tools   Author: kastnerkyle   File: audio_tools.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def run_world_example():
    fs, d = fetch_sample_speech_tapestry()
    d = d.astype("float32") / 2 ** 15
    temporal_positions_h, f0_h, vuv_h, f0_candidates_h = harvest(d, fs)
    temporal_positions_ct, spectrogram_ct, fs_ct = cheaptrick(d, fs,
            temporal_positions_h, f0_h, vuv_h)
    temporal_positions_d4c, f0_d4c, vuv_d4c, aper_d4c, coarse_aper_d4c = d4c(d, fs,
            temporal_positions_h, f0_h, vuv_h)
    #y = world_synthesis(f0_d4c, vuv_d4c, aper_d4c, spectrogram_ct, fs_ct)
    y = world_synthesis(f0_d4c, vuv_d4c, coarse_aper_d4c, spectrogram_ct, fs_ct)
    wavfile.write("out.wav", fs, soundsc(y)) 
Example 56
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_wavfile.py    MIT License 5 votes vote down vote up
def _check_roundtrip(realfile, rate, dtype, channels):
    if realfile:
        fd, tmpfile = tempfile.mkstemp(suffix='.wav')
        os.close(fd)
    else:
        tmpfile = BytesIO()
    try:
        data = np.random.rand(100, channels)
        if channels == 1:
            data = data[:,0]
        if dtype.kind == 'f':
            # The range of the float type should be in [-1, 1]
            data = data.astype(dtype)
        else:
            data = (data*128).astype(dtype)

        wavfile.write(tmpfile, rate, data)

        for mmap in [False, True]:
            rate2, data2 = wavfile.read(tmpfile, mmap=mmap)

            assert_equal(rate, rate2)
            assert_(data2.dtype.byteorder in ('<', '=', '|'), msg=data2.dtype)
            assert_array_equal(data, data2)

            del data2
    finally:
        if realfile:
            os.unlink(tmpfile) 
Example 57
Project: apicultor   Author: sonidosmutantes   File: FixedSegmentation.py    GNU General Public License v3.0 5 votes vote down vote up
def fixed_segmentation(filename, options):
    """
        Segmenta con  duración fija
        Solo soporta wav files como input
        Si el archivo tiene una duración menor a la segmentación requerida, se graba
        en su duración original y nombre
    """

    if not '.wav' in filename:
        raise Exception("fixed_segmentation only process wav files")

    outputPath = options['outputPath']
    fixed_dur = options['duration']

    try:
        sr, wavsignal = wavfile.read(filename)

        durSamples = int( fixed_dur*sr )
        n_samples = len(wavsignal)

        baseName = os.path.splitext(filename)[0].split('/')[-1]

        if durSamples > n_samples:
            print("El archivo tiene una duración inferior a la segmentación requerida")
            print("Se graba el archivo en su duración original")
            outputFilename = outputPath+'/'+baseName+'.wav'
            wavfile.write(outputFilename,sr,np.array(wavsignal, dtype='int16'))
            print("File generated: %s"%outputFilename)
            return

        segments = int( np.ceil( n_samples/durSamples ) )
        pos = 0
        for i in range(segments):
            signalOut = wavsignal[pos:pos+durSamples]
            pos += durSamples
            outputFilename = outputPath+'/'+baseName+'_sample'+str(i)+'.wav'
            wavfile.write(outputFilename,sr,np.array(signalOut, dtype='int16'))
            print("File generated: %s"%outputFilename)
    except Exception as e:
        #TODO: add standard logging output
        print("Error: %s"%e) 
Example 58
Project: summus   Author: urinieto   File: main.py    MIT License 5 votes vote down vote up
def generate_summary(audio_file, P=3, N=16, L=None, feature_type=PCP_TYPE,
                     opt=False, out_file=None):
    """Generates the summary of a given audio file.

    Parameters
    ----------
    audio_file : str
        Path to the input audio file.
    P : int > 0
        Number of subsequences in the summary.
    N : int > 0
        Numnber of beats per subsequence.
    L : int > 0 < N
        Length of the shingles (If None, L = N / 2)
    opt : bool
        Whether to use the optimal or the heuristic method.
    out_file : str
        Path to the output summary audio file (None to not save output).

    Returns
    -------
    summary : np.array
        Samples of the final summary.
    """
    # Compute audio features
    features, audio = compute_features(audio_file, type=feature_type)

    # Find summary
    if opt:
        summary_idxs = find_optimal_summary(features["bs_sequence"], P, N, L)
    else:
        summary_idxs = find_heur_summary(features["bs_sequence"], P, N, L)

    # Synthesize summary
    summary = synth_summary(audio, features["beats"], summary_idxs, N)

    # Save file if needed
    if out_file is not None:
        wavfile.write(out_file, SAMPLING_RATE, summary)

    return summary 
Example 59
Project: deep-karaoke-maker   Author: bachsh   File: deep_karaoke.py    MIT License 5 votes vote down vote up
def break_song(self, song_path, model_path="karaoke_gpu.torch", **kwargs):
        selectedDelta = 10
        sampleLen = spectrum_helper.sampleLen
        song = wavfile.read(song_path)[1][:, 0]
        Sxx_abs_norm = np.abs(spectrum_helper.transform_signal(song/song.std()))
        # Sxx = spectrum_helper.transform_signal(song)
        # Sxx_abs, Sxx_phase = np.abs(Sxx), np.angle(Sxx)  # type: np.ndarray, np.ndarray
        spectrogram_parts = spectrum_helper.dissect_spectrogram(Sxx_abs_norm, sampleDelta=selectedDelta)
        spectrogram_parts_flat = np.stack([x.flatten() for x in spectrogram_parts])
        spectrogram_parts_tensor = torch.from_numpy(spectrogram_parts_flat).float()  # type: torch.Tensor
        print(spectrogram_parts_tensor)

        input_size = spectrogram_parts_flat.shape[1]
        print("input size: %d" % input_size)
        inner_size = input_size
        print("Loading trained net and params")
        t0 = time()
        net = Net1(input_size, inner_size).float()
        net.load_state_dict(torch.load(model_path))
        print("Done loading net. Took {} seconds".format(time()-t0))
        t0 = time()
        output = net(spectrogram_parts_tensor)  # type: torch.Tensor
        mask_parts_flat = output.sigmoid().detach().numpy()
        print("Finished calculating mask. Took {} seconds".format(time()-t0))
        mask_parts = [np.reshape(x, [input_size//sampleLen, sampleLen]) for x in mask_parts_flat]
        np.save("mask_parts.npy", mask_parts)
        mask = spectrum_helper.assemble_mask(mask_parts, selectedDelta)
        print("mask max: {} min: {}".format(mask.max(), mask.min()))
        np.save("mask.npy", mask)

        vocals, instrumental = spectrum_helper.separate_with_mask(song, mask, force_mask_structure=True)
        wavfile.write("vocals.wav", spectrum_helper.fs, vocals)
        wavfile.write("instrumental.wav", spectrum_helper.fs, instrumental) 
Example 60
Project: jingjuSingingPhraseMatching   Author: ronggong   File: feature_extraction.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def extract_for_one(wavDataDir, lineList, filename, FILE_EXT_WAV):
    filename_wav                 = os.path.join(wavDataDir,filename+FILE_EXT_WAV)
    filename_wav_silence_removed = os.path.join(wavDataDir+'_silence_removed','temp'+FILE_EXT_WAV)

    ##-- remove the silence from audio
    sr = 44100
    audio = ess.MonoLoader(filename=filename_wav,downmix='left',sampleRate=sr)()
    audio_remove_silence = removeSilence(audio,sr,lineList)
    wavfile.write(filename_wav_silence_removed,sr,audio_remove_silence)

    ##-- process the silence removed audio
    loader = essentia.streaming.EqloudLoader(filename=filename_wav_silence_removed)
    fEx = FeatureExtractor(frameSize=2048, hopSize=1024, sampleRate=loader.paramValue('sampleRate'))
    p = essentia.Pool()

    loader.audio >> fEx.signal

    for desc, output in fEx.outputs.items():
        output >> (p, desc)

    essentia.run(loader)

    # convert pitch from hz to cents
    for i in range(len(p['pitch_instantaneous_pitch'])):
        p['pitch_instantaneous_pitch'][i] = hz2cents(p['pitch_instantaneous_pitch'][i])

    stats = ['mean', 'var', 'dmean', 'dvar']
    statsPool = essentia.standard.PoolAggregator(defaultStats=stats)(p)

    return statsPool 
Example 61
Project: arabic-tacotron-tts   Author: youssefsharief   File: audio.py    MIT License 5 votes vote down vote up
def save_wav(wav, path):
  wav *= 32767 / max(0.01, np.max(np.abs(wav)))
  # librosa.output.write_wav(path, wav.astype(np.int16), hparams.sample_rate)
  wavfile.write(path, hparams.sample_rate, wav.astype(np.int16)) 
Example 62
Project: irasl2018   Author: jfsantos   File: audio_tools.py    MIT License 5 votes vote down vote up
def download(url, server_fname, local_fname=None, progress_update_percentage=5,
             bypass_certificate_check=False):
    """
    An internet download utility modified from
    http://stackoverflow.com/questions/22676/
    how-do-i-download-a-file-over-http-using-python/22776#22776
    """
    if bypass_certificate_check:
        import ssl
        ctx = ssl.create_default_context()
        ctx.check_hostname = False
        ctx.verify_mode = ssl.CERT_NONE
        u = urllib.urlopen(url, context=ctx)
    else:
        u = urllib.urlopen(url)
    if local_fname is None:
        local_fname = server_fname
    full_path = local_fname
    meta = u.info()
    with open(full_path, 'wb') as f:
        try:
            file_size = int(meta.get("Content-Length"))
        except TypeError:
            print("WARNING: Cannot get file size, displaying bytes instead!")
            file_size = 100
        print("Downloading: %s Bytes: %s" % (server_fname, file_size))
        file_size_dl = 0
        block_sz = int(1E7)
        p = 0
        while True:
            buffer = u.read(block_sz)
            if not buffer:
                break
            file_size_dl += len(buffer)
            f.write(buffer)
            if (file_size_dl * 100. / file_size) > p:
                status = r"%10d  [%3.2f%%]" % (file_size_dl, file_size_dl *
                                               100. / file_size)
                print(status)
                p += progress_update_percentage 
Example 63
Project: irasl2018   Author: jfsantos   File: audio_tools.py    MIT License 5 votes vote down vote up
def run_cqt_example():
    try:
        fs, d = fetch_sample_file("/Users/User/cqt_resources/kempff1.wav")
    except ValueError:
        print("WARNING: Using sample music instead but kempff1.wav is the example")
        fs, d = fetch_sample_music()
    X = d[:44100]
    X_cq, c_dc, c_nyq, multiscale, shift, window_lens = cqt(X, fs)
    X_r = icqt(X_cq, c_dc, c_nyq, multiscale, shift, window_lens)
    SNR = 20 * np.log10(np.linalg.norm(X - X_r) / np.linalg.norm(X))
    wavfile.write("cqt_original.wav", fs, soundsc(X))
    wavfile.write("cqt_reconstruction.wav", fs, soundsc(X_r)) 
Example 64
Project: irasl2018   Author: jfsantos   File: audio_tools.py    MIT License 5 votes vote down vote up
def run_world_mgc_example():
    fs, d = fetch_sample_speech_tapestry()
    d = d.astype("float32") / 2 ** 15

    # harcoded for 16k from
    # https://github.com/CSTR-Edinburgh/merlin/blob/master/misc/scripts/vocoder/world/extract_features_for_merlin.sh
    mgc_alpha = 0.58
    #mgc_order = 59
    mgc_order = 59
    # this is actually just mcep
    mgc_gamma = 0.0

    temporal_positions_h, f0_h, vuv_h, f0_candidates_h = harvest(d, fs)
    temporal_positions_ct, spectrogram_ct, fs_ct = cheaptrick(d, fs,
            temporal_positions_h, f0_h, vuv_h)
    temporal_positions_d4c, f0_d4c, vuv_d4c, aper_d4c, coarse_aper_d4c = d4c(d, fs,
            temporal_positions_h, f0_h, vuv_h)

    mgc_arr = sp2mgc(spectrogram_ct, mgc_order, mgc_alpha, mgc_gamma,
            verbose=True)

    from sklearn.externals import joblib
    mem = joblib.Memory("/tmp")
    mem.clear()

    sp_r = mgc2sp(mgc_arr, mgc_alpha, mgc_gamma, fs=fs, verbose=True)

    import matplotlib.pyplot as plt
    plt.imshow(20 * np.log10(sp_r))
    plt.figure()
    plt.imshow(20 * np.log10(spectrogram_ct))
    plt.show()

    y = world_synthesis(f0_d4c, vuv_d4c, coarse_aper_d4c, sp_r, fs)
    #y = world_synthesis(f0_d4c, vuv_d4c, aper_d4c, sp_r, fs)
    wavfile.write("out_mgc.wav", fs, soundsc(y)) 
Example 65
Project: audio_scripts   Author: audiofilter   File: lame.py    MIT License 5 votes vote down vote up
def save_samples(inputFile,sample_rate,channel_count,bit_rate,audio_data):
    (root,ext) = inputFile.split('.')
    if (ext == 'wav'):
        if (audio_data.dtype == numpy.int16):
            # assume already scaled
            wavfile.write(inputFile,sample_rate,audio_data)
        else:
            print "data type is ",audio_data.dtype, " converting to int16 for .wav"
            sar = numpy.array(audio_data,dtype=numpy.int16)
            wavfile.write(inputFile,sample_rate,sar)
    else:
        encode_mp3(inputFile,sample_rate,channel_count,bit_rate,audio_data) 
Example 66
Project: audio_scripts   Author: audiofilter   File: lame.py    MIT License 5 votes vote down vote up
def encode(self, pcm_data, fn):
        sample_count    = len(pcm_data) /2
        output_buff_len = int(1.25 * sample_count + 7200)
        output_buff     = (ctypes.c_char*output_buff_len)()
        self.dll.lame_encode_buffer.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_int, ctypes.POINTER(ctypes.c_char), ctypes.c_int];
        output_size     = self.dll.lame_encode_buffer(self.lame, pcm_data, 0, sample_count, output_buff, output_buff_len);
        if (output_size): fn.write(output_buff[0:output_size]) 
Example 67
Project: Whisper   Author: Galaxy-Research-Group   File: Encoder.py    MIT License 5 votes vote down vote up
def encode2wav(self, somestring, filename):
        soundlist = self.string2sound(somestring)
        # print("data:" + str(soundlist.astype(np.dtype('int16'))))
        wavfile.write(filename, self.setting.rate, soundlist.astype(np.dtype('int16'))) 
Example 68
Project: Whisper   Author: Galaxy-Research-Group   File: Encoder.py    MIT License 5 votes vote down vote up
def encodeplay(self, somestring):
        soundlist = self.string2sound(somestring)
        data = soundlist.astype(np.dtype('int16'))
        data_to_send = data.tobytes()
        stream = self.stream
        print("channels:" + str(stream._channels))
        print("rate:" + str(stream._rate))
        print("format:" + str(stream._format))
        print("frames_per_buffer:" + str(stream._frames_per_buffer))
        self.stream.write(data_to_send) 
Example 69
Project: Tacotron-2   Author: Rayhane-mamah   File: audio.py    MIT License 5 votes vote down vote up
def save_wav(wav, path, sr):
	wav *= 32767 / max(0.01, np.max(np.abs(wav)))
	#proposed by @dsmiller
	wavfile.write(path, sr, wav.astype(np.int16)) 
Example 70
Project: Tacotron-2   Author: Rayhane-mamah   File: audio.py    MIT License 5 votes vote down vote up
def save_wavenet_wav(wav, path, sr, inv_preemphasize, k):
	# wav = inv_preemphasis(wav, k, inv_preemphasize)
	wav *= 32767 / max(0.01, np.max(np.abs(wav)))
	wavfile.write(path, sr, wav.astype(np.int16)) 
Example 71
Project: AudioCrab   Author: Abdulr-intija   File: WavProcessor.py    MIT License 5 votes vote down vote up
def process(self, leftData, rightData, sampleFrequency):
        self.leftData = leftData
        self.rightData = rightData
        self.sampleFrequency = sampleFrequency

        t = time.time()
        waveLength = sampleFrequency * 16 #sum of wavelength per revolution of sound around baba's head
        self.waveLength = waveLength

        sinePad = self.generateSinePadding(waveLength, self.generatePattern())

        print("Still on the work...\n")
        self.reverb()

        print("Constraining samples and Finalizing...\n")
        finalSampleData = self.injectSinePad(sinePad)

        
        #--------- Portion below in this fucntion will be moved to WavRebox.py soon -----------
        #save
        print("Saving...\n")
        wavfile.write(self.absoluteAudioPath + '.wav', sampleFrequency, np.array(finalSampleData))

        print("Converting to mp3...")
        subprocess.call(['ffmpeg', '-i', self.absoluteAudioPath + '.wav', self.absoluteAudioPath + '.wav.mp3'])


        print("Time spent: ", time.time() - t, " secs")

        #remove tmp files
        if os.path.exists(self.absoluteAudioPath + '.wav'):
            os.remove(self.absoluteAudioPath + '.wav')
        
        if os.path.exists(self.absoluteAudioPath):
            os.remove(self.absoluteAudioPath) 
Example 72
Project: Tacotron-2-keras   Author: Stevel705   File: 4_test.py    MIT License 5 votes vote down vote up
def save_wav(wav, path, sr):
	wav *= 32767 / max(0.01, np.max(np.abs(wav)))
	#proposed by @dsmiller
	wavfile.write(path, sr, wav.astype(np.int16)) 
Example 73
Project: Black-Box-Audio   Author: rtaori   File: run_audio_attack.py    MIT License 4 votes vote down vote up
def run(self, log=None):
        max_fitness_score = float('-inf') 
        dist = float('inf')
        best_text = ''
        itr = 1
        prev_loss = None
        if log is not None:
            log.write('target phrase: ' + self.target_phrase + '\n')
            log.write('itr, corr, lev dist \n')
        
        while itr <= self.max_iters and best_text != self.target_phrase:
            pop_scores, ctc = self.get_fitness_score(self.pop, self.target_phrase, self.input_audio)
            elite_ind = np.argsort(pop_scores)[-self.elite_size:]
            elite_pop, elite_pop_scores, elite_ctc = self.pop[elite_ind], pop_scores[elite_ind], ctc[elite_ind]
            
            if prev_loss is not None and prev_loss != elite_ctc[-1]: 
                self.mutation_p = self.mu * self.mutation_p + self.alpha / np.abs(prev_loss - elite_ctc[-1]) 

            if itr % 10 == 0:
                print('**************************** ITERATION {} ****************************'.format(itr))
                print('Current loss: {}'.format(-elite_ctc[-1]))
                save_wav(elite_pop[-1], self.output_wave_file)
                best_pop = np.tile(np.expand_dims(elite_pop[-1], axis=0), (100, 1))
                _, best_text = self.get_fitness_score(best_pop, self.target_phrase, self.input_audio, classify=True)
                
                dist = levenshteinDistance(best_text, self.target_phrase)
                corr = "{0:.4f}".format(np.corrcoef([self.input_audio, elite_pop[-1]])[0][1])
                print('Audio similarity to input: {}'.format(corr))
                print('Edit distance to target: {}'.format(dist))
                print('Currently decoded as: {}'.format(best_text))
                if log is not None:
                    log.write(str(itr) + ", " + corr + ", " + str(dist) + "\n")
                    
            if dist > 2:
                next_pop = get_new_pop(elite_pop, elite_pop_scores, self.pop_size)
                self.pop = mutate_pop(next_pop, self.mutation_p, self.noise_stdev, elite_pop)
                prev_loss = elite_ctc[-1]
                
            else:
                perturbed = np.tile(np.expand_dims(elite_pop[-1], axis=0), (self.num_points_estimate, 1))
                indices = np.random.choice(self.pop.shape[1], size=self.num_points_estimate, replace=False)

                perturbed[np.arange(self.num_points_estimate), indices] += self.delta_for_gradient
                perturbed_scores = self.get_fitness_score(perturbed, self.target_phrase, self.input_audio)[0]

                grad = (perturbed_scores - elite_ctc[-1]) / self.delta_for_gradient
                grad /= np.abs(grad).max()
                modified = elite_pop[-1].copy()
                modified[indices] += grad * self.delta_for_perturbation

                self.pop = np.tile(np.expand_dims(modified, axis=0), (self.pop_size, 1))
                self.delta_for_perturbation *= 0.995
                
            itr += 1

        return itr > self.max_iterations 
Example 74
Project: pyramic-dataset   Author: fakufaku   File: segment.py    MIT License 4 votes vote down vote up
def save_samples(filename):
    global pattern, output_dir, qc_images

    import re, os
    import numpy
    from scipy.io import wavfile
    from samplerate import resample

    from matplotlib import pyplot



    _, fname = os.path.split(filename)

    if not pattern.match(fname):
        return None

    spkr, angle = [int(i) for i in re.findall(r'\d+', fname)]
    signals = open_segment_rigid(filename, noise_thresh=3, off_lo=150, off_hi=150, plot=False)

    out_name = '{}_spkr{}_angle{}.{}'

    for name, signal in signals.items():

        if name == 'rate':
            continue

        if 'fq_sample' in name:
            # we resample the speech signals to 16kHz because this was the
            # orignal rate of the samples played 
            # this saves some storage and load speed is faster
            signal = resample(signal, 16000. / signals['rate'], 'sinc_best')
            rate = 16000
        else:
            rate = signals['rate']

        # save in folder with name of the sample
        folder = os.path.join(output_dir, name)
        if not os.path.exists(folder):
            os.mkdir(folder)

        # format filename with angle and speaker location
        filename = out_name.format(name, spkr, (360 - angle) % 360, 'wav')

        signal_float = signal.astype(numpy.int16)
        wavfile.write(os.path.join(folder, filename), rate, signal_float)

        if qc_images:
            # save a spectrogram for later inspection

            folder = os.path.join(output_dir, 'qc_images', name)
            if not os.path.exists(folder):
                os.mkdir(folder)

            filename = out_name.format(name, spkr, (360 - angle) % 360, 'png')

            pyplot.specgram(signal[:,0].astype(numpy.float32) / (2**15+1), Fs=rate, NFFT=1024, noverlap=512)
            pyplot.savefig(os.path.join(folder, filename))
            pyplot.clf() 
Example 75
Project: signaltrain   Author: drscotthawley   File: audio.py    GNU General Public License v3.0 4 votes vote down vote up
def compressor_4controls(x, thresh=-24.0, ratio=2.0, attackTime=0.01, releaseTime=0.01, sr=44100.0):
    """
    Thanks to Eric Tarr for MATLAB code for this, p. 428 of his Hack Audio book.  Used with permission.
    Our mods for Python:
        Minimized the for loop, removed dummy variables, and invoked numba @jit to make this "fast"
    Inputs:
      x: input signal
      sr: sample rate in Hz
      thresh: threhold in dB
      ratio: ratio (should be >=1 , i.e. ratio:1)
      attackTime, releaseTime: in seconds
    """
    N = len(x)
    dtype = x.dtype
    y = np.zeros(N, dtype=dtype)
    lin_A = np.zeros(N, dtype=dtype)  # functions as gain

    # Initialize separate attack and release times
    alphaA = np.exp(-np.log(9)/(sr * attackTime))#.astype(dtype,copy=False)  numba doesn't support astype
    alphaR = np.exp(-np.log(9)/(sr * releaseTime))#.astype(dtype,copy=False)

    # Turn the input signal into a uni-polar signal on the dB scale
    x_uni = np.abs(x)
    x_dB = 20*np.log10(x_uni + 1e-8)  # x_uni casts type

    # Ensure there are no values of negative infinity
    #x_dB = np.clip(x_dB, -96, None)   # Numba doesn't yet support np.clip but we can write our own
    x_dB = my_clip_min(x_dB, -96)

    # Static Characteristics
    gainChange_dB = np.zeros(x_dB.shape[0], dtype=dtype)
    i = np.where(x_dB > thresh)
    gainChange_dB[i] =  thresh + (x_dB[i] - thresh)/ratio - x_dB[i] # Perform Downwards Compression

    for n in range(1, N):  # this loop is slow but not vectorizable due to its cumulative, sequential nature. @autojit makes it fast(er).
        # smooth over the gainChange
        if gainChange_dB[n] < lin_A[n-1]:
            lin_A[n] = ((1-alphaA)*gainChange_dB[n]) +(alphaA*lin_A[n-1]) # attack mode
        else:
            lin_A[n] = ((1-alphaR)*gainChange_dB[n]) +(alphaR*lin_A[n-1]) # release

    lin_A = np.power(10.0,(lin_A/20))  # Convert to linear amplitude scalar; i.e. map from dB to amplitude

    y = lin_A * x    # Apply linear amplitude to input sample

    return y


 # this is a echo or delay function 
Example 76
Project: WaveNet   Author: MU94W   File: fast_gen.py    MIT License 4 votes vote down vote up
def main():
    args = get_args()
    net = FastGen()
    graph = tf.Graph()
    with graph.as_default():
        with tf.variable_scope("data"):
            wav_placeholder = tf.placeholder(shape=(args.batch_size, 1), dtype=tf.float32)
            inputs = {"wav": wav_placeholder}
        # build net.
        net_tensor_dic = net.build(inputs=inputs)
        global_step = tf.Variable(0, dtype=tf.int32, name="global_step")

        # get saver.
        saver = tf.train.Saver()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(graph=graph, config=config) as sess:
        # warm-up queues
        sess.run(net_tensor_dic["init_op"])

        # get checkpoint
        ckpt = tf.train.get_checkpoint_state(args.save_path)
        assert ckpt
        saver.restore(sess=sess, save_path=ckpt.model_checkpoint_path)

        global_step_eval = sess.run(global_step)
        samples_batch = np.zeros(shape=(args.batch_size, 1), dtype=np.float32)
        audio_batch = np.empty(shape=(args.batch_size, args.gen_samples), dtype=np.float32)
        for idx in tqdm.trange(args.gen_samples):
            samples_batch = sess.run(net_tensor_dic["synthesized_samples"], feed_dict={wav_placeholder: samples_batch})
            audio_batch[:, idx] = samples_batch[:, 0]

    # save syn-ed audios
    if not os.path.exists(args.gen_path) or not os.path.isdir(args.gen_path):
        os.makedirs(args.gen_path)
    audio_batch = np.int16(audio_batch * (1 << 15))
    for idx, audio in enumerate(audio_batch):
        siowav.write(os.path.join(args.gen_path, "{}_{}.wav".format(global_step_eval, idx)),
                     data=audio, rate=args.sample_rate)

    print("Congratulations!") 
Example 77
Project: speaker_extraction   Author: xuchenglin28   File: decode.py    GNU General Public License v3.0 4 votes vote down vote up
def decode():
    tfrecords_list, num_batches = read_list(FLAGS.lists_dir, FLAGS.data_type, FLAGS.batch_size)

    with tf.Graph().as_default():
        with tf.device('/cpu:0'):
            with tf.name_scope('input'):
                cmvn = np.load(FLAGS.inputs_cmvn)
                cmvn_aux = np.load(FLAGS.inputs_cmvn.replace('cmvn', 'cmvn_aux'))
                if FLAGS.with_labels:
                    inputs, inputs_cmvn, inputs_cmvn_aux, labels, lengths, lengths_aux = paddedFIFO_batch(tfrecords_list, FLAGS.batch_size,
                        FLAGS.input_size, FLAGS.output_size, cmvn=cmvn, cmvn_aux=cmvn_aux, with_labels=FLAGS.with_labels, 
                        num_enqueuing_threads=1, num_epochs=1, shuffle=False)
                else:
                    inputs, inputs_cmvn, inputs_cmvn_aux, lengths, lengths_aux = paddedFIFO_batch(tfrecords_list, FLAGS.batch_size,
                        FLAGS.input_size, FLAGS.output_size, cmvn=cmvn, cmvn_aux=cmvn_aux, with_labels=FLAGS.with_labels,
                        num_enqueuing_threads=1, num_epochs=1, shuffle=False)
                    labels = None
               
        with tf.name_scope('model'):
            model = Model(FLAGS, inputs, inputs_cmvn, inputs_cmvn_aux, labels, lengths, lengths_aux, infer=True)

        init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
        sess = tf.Session()
        sess.run(init)

        checkpoint = tf.train.get_checkpoint_state(FLAGS.save_model_dir)
        if checkpoint and checkpoint.model_checkpoint_path:
            tf.logging.info("Restore best model from " + checkpoint.model_checkpoint_path)
            model.saver.restore(sess, checkpoint.model_checkpoint_path)
        else:
            tf.logging.fatal("Checkpoint is not found, please check the best model save path is correct.")
            sys.exit(-1)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        try:
            for batch in xrange(num_batches):
                if coord.should_stop():
                    break

                sep, mag_lengths = sess.run([model._sep, model._lengths])
                for i in xrange(FLAGS.batch_size):
                    filename = tfrecords_list[FLAGS.batch_size*batch+i]
                    (_, name) = os.path.split(filename)
                    (uttid, _) = os.path.splitext(name)
                    noisy_file = os.path.join(FLAGS.noisy_dir, uttid + '.wav')
                    enhan_sig, rate = reconstruct(np.squeeze(sep[i,:mag_lengths[i],:]), noisy_file)
                    savepath = os.path.join(FLAGS.rec_dir, uttid + '.wav')
                    wav.write(savepath, rate, enhan_sig)

                if (batch+1) % 100 == 0:
                    tf.logging.info("Number of batch processed: %d." % (batch+1))

        except Exception, e:
            coord.request_stop(e)
        finally: 
Example 78
Project: tools   Author: kastnerkyle   File: audio_tools.py    BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def run_world_mgc_example():
    fs, d = fetch_sample_speech_tapestry()
    d = d.astype("float32") / 2 ** 15

    # harcoded for 16k from
    # https://github.com/CSTR-Edinburgh/merlin/blob/master/misc/scripts/vocoder/world/extract_features_for_merlin.sh
    mgc_alpha = 0.58
    #mgc_order = 59
    mgc_order = 59
    # this is actually just mcep
    mgc_gamma = 0.0

    #from sklearn.externals import joblib
    #mem = joblib.Memory("/tmp")
    #mem.clear()

    def enc():
        temporal_positions_h, f0_h, vuv_h, f0_candidates_h = harvest(d, fs)
        temporal_positions_ct, spectrogram_ct, fs_ct = cheaptrick(d, fs,
                temporal_positions_h, f0_h, vuv_h)
        temporal_positions_d4c, f0_d4c, vuv_d4c, aper_d4c, coarse_aper_d4c = d4c(d, fs,
                temporal_positions_h, f0_h, vuv_h)

        mgc_arr = sp2mgc(spectrogram_ct, mgc_order, mgc_alpha, mgc_gamma,
                verbose=True)
        return mgc_arr, spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c


    mgc_arr, spectrogram_ct, f0_d4c, vuv_d4c, coarse_aper_d4c = enc()
    sp_r = mgc2sp(mgc_arr, mgc_alpha, mgc_gamma, fs=fs, verbose=True)

    """
    import matplotlib.pyplot as plt
    plt.imshow(20 * np.log10(sp_r))
    plt.figure()
    plt.imshow(20 * np.log10(spectrogram_ct))
    plt.show()
    raise ValueError()
    """

    y = world_synthesis(f0_d4c, vuv_d4c, coarse_aper_d4c, sp_r, fs)
    #y = world_synthesis(f0_d4c, vuv_d4c, aper_d4c, sp_r, fs)
    wavfile.write("out_mgc.wav", fs, soundsc(y)) 
Example 79
Project: deep-karaoke-maker   Author: bachsh   File: __init__.py    MIT License 4 votes vote down vote up
def make_dataset(db_json_filename, datadir="data"):
    t0 = time()
    makedirs(datadir, exist_ok=True)
    metadata = []
    with open(db_json_filename) as json_file:
        data = json.load(json_file)
        base_path = data['base_path']
        for idx, song in enumerate(data['mixes']):
            print("Reading song %d, name: %s" % (idx, song['mix_path']))
            song_dir = path.join(datadir, "song{}".format(idx))
            makedirs(song_dir, exist_ok=True)
            other_stems_paths = [path.join(base_path, x) for x in song['other_stems']]
            target_stems_paths = [path.join(base_path, x) for x in song['target_stems']]
            try:
                other_stems = combine_stems(other_stems_paths)
                target_stems = combine_stems(target_stems_paths)
            except ValueError as ex:
                print(ex)
                print("Error. Skipping file")
                continue
            mix, mask = mix_channels(other_stems, target_stems)
            Sxx = transform_signal(mix)
            Sxx_real = np.real(Sxx)
            mix_parts = dissect_spectrogram(Sxx_real)
            mask_parts = dissect_spectrogram(mask)
            wavfile.write(path.join(song_dir, "mix.wav"), rate=fs, data=mix)
            for idx2, (mix_part, mask_part) in enumerate(zip(mix_parts, mask_parts)):
                input_path = path.join(song_dir, "{}.in".format(idx2))
                output_path = path.join(song_dir, "{}.out".format(idx2))
                np.save(input_path, mix_part)
                np.save(output_path, mask_part)
                metadata.append({
                    "input": input_path,
                    "output": output_path,
                    "song": song['mix_path'],
                })
    dataset_data = {
        "sample_shape": [nperseg//2+1, sampleLen],
        "song_samples": metadata,
    }
    json.dump(dataset_data, open("metadata.json", "w+"))
    print("Dataset ready. Took {} seconds".format(time()-t0)) 
Example 80
Project: irasl2018   Author: jfsantos   File: audio_tools.py    MIT License 4 votes vote down vote up
def run_phase_vq_example():
    def _pre(list_of_data):
        # Temporal window setting is crucial! - 512 seems OK for music, 256
        # fruit perhaps due to samplerates
        n_fft = 256
        step = 32
        f_r = np.vstack([np.abs(stft(dd, n_fft, step=step, real=False,
                                compute_onesided=False))
                         for dd in list_of_data])
        return f_r, n_fft, step

    def preprocess_train(list_of_data, random_state):
        f_r, n_fft, step = _pre(list_of_data)
        clusters = copy.deepcopy(f_r)
        return clusters

    def apply_preprocess(list_of_data, clusters):
        f_r, n_fft, step = _pre(list_of_data)
        f_clust = f_r
        # Nondeterministic ?
        memberships, distances = vq(f_clust, clusters)
        vq_r = clusters[memberships]
        d_k = iterate_invert_spectrogram(vq_r, n_fft, step, verbose=True)
        return d_k

    random_state = np.random.RandomState(1999)

    fs, d = fetch_sample_speech_fruit()
    d1 = d[::9]
    d2 = d[7::8][:5]
    # make sure d1 and d2 aren't the same!
    assert [len(di) for di in d1] != [len(di) for di in d2]

    clusters = preprocess_train(d1, random_state)
    fix_d1 = np.concatenate(d1)
    fix_d2 = np.concatenate(d2)
    vq_d2 = apply_preprocess(d2, clusters)

    wavfile.write("phase_train_no_agc.wav", fs, soundsc(fix_d1))
    wavfile.write("phase_vq_test_no_agc.wav", fs, soundsc(vq_d2))

    agc_d1, freq_d1, energy_d1 = time_attack_agc(fix_d1, fs, .5, 5)
    agc_d2, freq_d2, energy_d2 = time_attack_agc(fix_d2, fs, .5, 5)
    agc_vq_d2, freq_vq_d2, energy_vq_d2 = time_attack_agc(vq_d2, fs, .5, 5)

    """
    import matplotlib.pyplot as plt
    plt.specgram(agc_vq_d2, cmap="gray")
    #plt.title("Fake")
    plt.figure()
    plt.specgram(agc_d2, cmap="gray")
    #plt.title("Real")
    plt.show()
    """

    wavfile.write("phase_train_agc.wav", fs, soundsc(agc_d1))
    wavfile.write("phase_test_agc.wav", fs, soundsc(agc_d2))
    wavfile.write("phase_vq_test_agc.wav", fs, soundsc(agc_vq_d2))