Python pydub.AudioSegment.from_wav() Examples

The following are 17 code examples of pydub.AudioSegment.from_wav(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pydub.AudioSegment , or try the search function .
Example #1
Source File:    From honk with MIT License
def segment_audio(self, file_name, segments):
        audio_data = AudioSegment.from_wav(file_name)

        for segment in segments:
            assert segment[0] < segment[1]
            center = round((segment[0] + segment[1]) / 2)

            padding = round(self.audio_length / 2)
            if center < padding:
                start_time = 0
                start_time = center - padding

            end_time = start_time + self.audio_length

            audio_segment = audio_data[start_time:end_time]

            file_prefix = os.path.basename(file_name).split('.')[0]

            file_name = os.path.join(self.output_dir, file_prefix + "_" + str(start_time) + "~" + str(end_time) + ".wav")

            audio_segment.export(file_name, format="wav") 
Example #2
Source File:    From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License
def addFrameWithPause(self, image_file, audio_file, pause):
        audio_file = audio_file.replace("\\", "/")
        f = sf.SoundFile(audio_file)
        audio_clip = AudioSegment.from_wav(audio_file)
        duration = (len(f) / f.samplerate) + pause / 1000
        audio_clip_with_pause = audio_clip + AudioSegment.silent(duration=pause)
Example #3
Source File:    From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License
def addFrameWithTransition(self, image_file, audio_file, transition_file):
        media_info = MediaInfo.parse(transition_file)
        duration_in_ms = media_info.tracks[0].duration
        audio_file = audio_file.replace("\\", "/")
            audio_clip = AudioSegment.from_wav(r"%s"%audio_file)
            f = sf.SoundFile(r"%s"%audio_file)
        except Exception as e:
            audio_clip = AudioSegment.from_wav("%s/pause.wav" % settings.assetPath)
            f = sf.SoundFile("%s/pause.wav" % settings.assetPath)
        duration = (len(f) / f.samplerate)
        audio_clip_with_pause = audio_clip
        self.transitions.append((transition_file, len(self.imageframes) - 1, duration_in_ms / 1000)) 
Example #4
Source File:    From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License
def addFrameWithTransitionAndPause(self, image_file, audio_file, transition_file, pause):
        media_info = MediaInfo.parse(transition_file)
        duration_in_ms = media_info.tracks[0].duration
        audio_file = r"%s"%audio_file
        f = sf.SoundFile(audio_file)
            audio_clip = AudioSegment.from_wav(audio_file)
            print("error with frame audio transition pause for %s" % audio_file)
            audio_clip = AudioSegment.silent(duration=pause)
        duration = (len(f) / f.samplerate)
        audio_clip_with_pause = audio_clip
        self.durations.append(duration + (pause/1000))
        self.transitions.append((transition_file, len(self.imageframes) - 1, (duration_in_ms / 1000) + (pause/1000))) 
Example #5
Source File:    From HanTTS with MIT License
def synthesize(self, text, src, dst):
        Synthesize .wav from text
        src is the folder that contains all syllables .wav files
        dst is the destination folder to save the synthesized file
        print("Synthesizing ...")
        delay = 0
        increment = 355 # milliseconds
        pause = 500 # pause for punctuation
        syllables = lazy_pinyin(text, style=pypinyin.TONE3)

        # initialize to be complete silence, each character takes up ~500ms
        result = AudioSegment.silent(duration=500*len(text))
        for syllable in syllables:
            path = src+syllable+".wav"
            sound_file = Path(path)
            # insert 500 ms silence for punctuation marks
            if syllable in TextToSpeech.punctuation:
                short_silence = AudioSegment.silent(duration=pause)
                result = result.overlay(short_silence, position=delay)
                delay += increment
            # skip sound file that doesn't exist
            if not sound_file.is_file():
            segment = AudioSegment.from_wav(path)
            result = result.overlay(segment, position=delay)
            delay += increment

        directory = dst
        if not os.path.exists(directory):

        result.export(directory+"generated.wav", format="wav")
Example #6
Source File:    From nyumaya_audio_recognition with Apache License 2.0
def load_audio_file(filename,resize=False):
	sound = None
		if filename.endswith('.mp3') or filename.endswith('.MP3'):
			sound = AudioSegment.from_mp3(filename)
		elif filename.endswith('.wav') or filename.endswith('.WAV'):
			sound = AudioSegment.from_wav(filename)
		elif filename.endswith('.ogg'):
			sound = AudioSegment.from_ogg(filename)
		elif filename.endswith('.flac'):
			sound = AudioSegment.from_file(filename, "flac")
		elif filename.endswith('.3gp'):
			sound = AudioSegment.from_file(filename, "3gp")
		elif filename.endswith('.3g'):
			sound = AudioSegment.from_file(filename, "3gp")

		sound = sound.set_frame_rate(samplerate)
		sound = sound.set_channels(1)
		sound = sound.set_sample_width(2)
		duration = sound.duration_seconds
		print("Couldn't load file")
		return None,None
	return sound,duration 
Example #7
Source File:    From Self-Supervised-Speech-Pretraining-and-Representation-Learning with MIT License
def segment_mosei(args):
    output_dir = args.output_path
    mosei_summary = os.path.join(output_dir, 'mosei_no_semi.csv')
    flac_dir = os.path.join(output_dir, 'flac')
    assert os.path.exists(mosei_summary), 'Output path should already be created with a mosei_no_semi.csv inside it'
    for target_dir in [flac_dir]:
        if os.path.exists(target_dir):
            decision = input(f'{target_dir} already exists. Remove it? [Y/N]: ')
            if decision.upper() == 'Y':
                print(f'{target_dir} removed')

    df = pd.read_csv(mosei_summary)

    for index, row in df.iterrows():
        underscore = row.key
        wavname = f'{row.filename}.wav'
        wavpath = os.path.join(args.data_path, wavname)
        assert os.path.exists(wavpath), f'wav not exists: {wavpath}'
        wav = AudioSegment.from_wav(wavpath)

        start = int(row.start * 1000)
        end = int(row.end * 1000)
        assert start >= 0, f'{underscore} has negative start time'
        assert end >= 0, f'{underscore} has negative end time'
        seg_wav = wav[start:end]
        seg_flacpath = os.path.join(flac_dir, f'{underscore}.flac')
        seg_wav.export(seg_flacpath, format='flac', parameters=['-ac', '1', '-sample_fmt', 's16', '-ar', '16000'])

# MAIN #
Example #8
Source File:    From audiogrep with MIT License
def extract_words(files):
    ''' Extracts individual words form files and exports them to individual files. '''
    output_directory = 'extracted_words'
    if not os.path.exists(output_directory):

    for f in files:
        file_format = None
        source_segment = None
        if f.lower().endswith('.mp3'):
            file_format = 'mp3'
            source_segment = AudioSegment.from_mp3(f)
        elif f.lower().endswith('.wav'):
            file_format = 'wav'
            source_segment = AudioSegment.from_wav(f)
        if not file_format or source_segment:
            print('Unsupported audio format for ' + f)
        sentences = convert_timestamps(files)
        for s in sentences:
            for word in s['words']:
                start = float(word[1]) * 1000
                end = float(word[2]) * 1000
                word = word[0]
                total_time = end - start
                audio = AudioSegment.silent(duration=total_time)
                audio = audio.overlay(source_segment[start:end])
                number = 0
                output_path = None
                while True:
                    output_filename = word
                    if number:
                        output_filename += "_" + str(number)
                    output_filename = output_filename + '.' + file_format
                    output_path = os.path.join(output_directory, output_filename)
                    if not os.path.exists(output_path):
                        # this file doesn't exist, so we can continue
                    # file already exists, increment name and try again
                    number += 1
                print('Exporting to: ' + output_path)
                audio.export(output_path, format=file_format) 
Example #9
Source File:    From parrots with Apache License 2.0
def synthesize(self, input_text='', output_wav_path=''):
        Synthesize .wav from text
        input_text: the folder that contains all syllables .wav files
        output_wav_path: the destination folder to save the synthesized file
        delay = 0
        increment = 355  # milliseconds
        pause = 500  # pause for punctuation
        syllables = lazy_pinyin(input_text, style=pypinyin.TONE3)

        # initialize to be complete silence, each character takes up ~500ms
        result = AudioSegment.silent(duration=500 * len(input_text))
        for syllable in syllables:
            path = os.path.join(self.syllables_dir, syllable + ".wav")
            sound_file = Path(path)
            # insert 500 sr silence for punctuation marks
            if syllable in self.punctuation:
                short_silence = AudioSegment.silent(duration=pause)
                result = result.overlay(short_silence, position=delay)
                delay += increment
            # skip sound file that doesn't exist
            if not sound_file.is_file():
            segment = AudioSegment.from_wav(path)
            result = result.overlay(segment, position=delay)
            delay += increment
        if not output_wav_path:
            output_wav_path = 'out.wav'

        result.export(output_wav_path, format="wav")
        default_logger.debug("Exported:" + output_wav_path)
        return result 
Example #10
Source File:    From DeepMusicClassification with MIT License
def slice_audio(audio_file, end=3):
    """ Slices a single audio file into 3 second chunks """
    start = 0
    end *= 1000
    audio = AudioSegment.from_wav(audio_file)
    slices = []

    for i in range(10):
        audio_slice = audio[start:end]
        start += 3000
        end += 3000

    return slices 
Example #11
Source File:    From fake-voice-detection with Apache License 2.0
def get_durations_from_dir(audio_dir, file_extension='.wav'):
    durations = list()
    for root, dirs, filenames in os.walk(audio_dir):
        for file_name in filenames:
            if file_extension in file_name:
                file_path = os.path.join(root, file_name)
                audio = AudioSegment.from_wav(file_path)
                duration = audio.duration_seconds
    return np.array(durations) 
Example #12
Source File:    From midi2voice with MIT License
def sinsyFix(wavPath,tempo):
	song = AudioSegment.from_wav(wavPath)
	song = song[int(1000*4*60/tempo):] # Delete extra 4 beats of silence at the beginning of the file
Example #13
Source File:    From Speech-Hacker with Apache License 2.0
def audio_generator(dict_dir, text, output_dest):

    with open(dict_dir + "/") as f:
        myDict = ast.literal_eval(

    textList = text.split(" ")

    mainList = []

    for i in textList:
        if i in myDict.keys():
            mainList.append(AudioSegment.from_wav(dict_dir + "/" + myDict[i]))

    # Check to see if at least one word was generated
    if mainList == []:
        raise Exception('\033[91m' + "None of the words you entered was" +
                        " spoken by your figure." + '\033[0m')

    # If a file with the default name exits, create a new name with a
    # new suffix
    res = 0
    while(os.path.exists(output_dest + "/output" + str(res) + ".wav")):
        res += 1

    mainAudio = mainList[0]

    # Concatenate selected audio words
    for i in range(1, len(mainList)):
        mainAudio += mainList[i]

    # Export the joined audio
    mainAudio.export(output_dest + '/output' + str(res) + '.wav', format="wav")

    if os.path.exists(output_dest + "/output" + str(res) + ".wav"):
        print ('\033[94m' + "Speech-Hacker: " +
               "Your audio was generated at: " + output_dest + "/output" +
               str(res) + ".wav" + '\033[0m')
        print ("Speech-Hacker: " '\033[91m' +
               "Failed to generate your requested audio." + '\033[0m') 
Example #14
Source File:    From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License
def addFrame(self, image_file, audio_file):
        audio_file = audio_file.replace("\\", "/")
            audio_clip = AudioSegment.from_wav(r"%s"%audio_file)
            f = sf.SoundFile(r"%s"%audio_file)
        except Exception as e:
            audio_clip = AudioSegment.from_wav("%s/pause.wav" % settings.assetPath)
            f = sf.SoundFile("%s/pause.wav" % settings.assetPath)

        duration = len(f) / f.samplerate
Example #15
Source File:    From dnn-model-services with MIT License
def spleeter(audio_url=None, audio=None):
        audio_data = audio
        if audio_url:
            # Link
            if "http://" in audio_url or "https://" in audio_url:
                header = {'User-Agent': 'Mozilla/5.0 (Windows NT x.y; Win64; x64; rv:9.0) Gecko/20100101 Firefox/10.0'}
                # Check if audio file has less than 5Mb
                r = requests.head(audio_url, headers=header, allow_redirects=True)
                size = r.headers.get('content-length', 0)
                size = int(size) / float(1 << 20)
      "File size: {:.2f} Mb".format(size))
                if size > 10:
                    return {"error": "Input audio file is too large! (max 10Mb)"}
                r = requests.get(audio_url, headers=header, allow_redirects=True)
                audio_data = r.content
            # Base64
            elif len(audio_url) > 500:
                audio_data = base64.b64decode(audio_url)"Preparing directories...")
        tmp_dir = "/tmp/" + generate_uid() + "/"
        if not os.path.exists(tmp_dir):

        audio_path = generate_uid() + ".audio"
        with open(tmp_dir + audio_path, "wb") as f:
            f.write(audio_data)"Preparing Spleeter...")
        # Using embedded configuration.
        separator = Separator("spleeter:2stems")
        separator.separate_to_file(tmp_dir + audio_path, tmp_dir)

        if os.path.exists(tmp_dir + audio_path):
            os.remove(tmp_dir + audio_path)

        # Getting the output files content
        out_dir = tmp_dir + audio_path.replace(".audio", "") + "/"
        output_vocals = out_dir + "vocals.wav"
        AudioSegment.from_wav(output_vocals).export(out_dir + "vocals.mp3", format="mp3")
        with open(out_dir + "vocals.mp3", "rb") as fv:
            vocals =
        output_accomp = out_dir + "accompaniment.wav"
        AudioSegment.from_wav(output_accomp).export(out_dir + "accompaniment.mp3", format="mp3")
        with open(out_dir + "accompaniment.mp3", "rb") as fa:
            accomp =

        # Deleting the files output directory
        return {"vocals": vocals, "accomp": accomp}

    except Exception as e:
        return {"error": str(e)} 
Example #16
Source File:    From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License
def renderVideo(self):
        clips = self.videoformat.renderClips(self.content, self.title)
        self.videoformat.createMovie(clips, self)
        self.background_music_name =

        clips = []
        for i, transition in enumerate(self.transitions):
            print("Putting together clip (%s/%s)" % (i + 1, len(self.transitions)))
            transition_file_name = transition[0]
            last_image_index = transition[1]
            transition_duration = transition[2]
            if i == 0:
                clip = ImageSequenceClip(self.imageframes[0:last_image_index + 1], durations=self.durations[0:last_image_index + 1])
                combined_sounds = sum(self.audiofiles[0:last_image_index + 1])
                audio_name = "%s/%s%s.wav" % (settings.tempPath, "atestaudio", i)
                combined_sounds.export(audio_name, format="wav")
                video_clip = VideoFileClip(transition_file_name).fx(afx.volumex, settings.voice_volume)
                audio_clip = AudioFileClip(audio_name)
                clip = clip.set_audio(audio_clip)
                clip_with_interval = concatenate_videoclips([clip, video_clip])

                prev_image_index = self.transitions[i-1][1]
                clip = ImageSequenceClip(self.imageframes[prev_image_index + 1:last_image_index + 1], durations=self.durations[prev_image_index + 1:last_image_index + 1])
                combined_sounds = sum(self.audiofiles[prev_image_index + 1:last_image_index + 1])
                audio_name = "%s/%s%s.wav" % (settings.tempPath, "atestaudio", i)
                combined_sounds.export(audio_name, format="wav")
                video_clip = VideoFileClip(transition_file_name).fx(afx.volumex, settings.voice_volume)
                audio_clip = AudioFileClip(audio_name)
                clip = clip.set_audio(audio_clip)
                clip_with_interval = concatenate_videoclips([clip, video_clip])

        main_vid_duration = 0
        for i in range(1, len(clips), 1):
            main_vid_duration += clips[i].duration

        print("Generating Audio Loop (%s) " % main_vid_duration)
        print("Using Audio Loop %s" % self.background_music_name)
        music_loop = afx.audio_loop(AudioFileClip(self.background_music_name).fx(afx.volumex, settings.background_music_volume),
        music_loop.to_audiofile("%s/music-loop.wav" % settings.tempPath)
        pause_time = int(clips[0].duration * 1000)
        print("Adding pause to start of Audio Loop (%s) " % (pause_time / 1000))
        audio_clip = AudioSegment.from_wav("%s/music-loop.wav" % settings.tempPath)
        new_audio = AudioSegment.silent(duration=(pause_time)) + audio_clip
        new_audio.export("%s/music-loop2.wav" % settings.tempPath, format='wav')

        # here we are combining the first clip with the last
        print("Combining all Video Clips %s" % (pause_time / 1000))
        main_vid_combined = concatenate_videoclips(clips)
        main_vid_with_audio = main_vid_combined.set_audio(CompositeAudioClip([, AudioFileClip("%s/music-loop2.wav" % settings.tempPath)]))

        folder_location = settings.finishedvideosdirectory + "/vid%s" % self.scriptno
        if not os.path.exists(folder_location):
        print("Writing video to location %s" % folder_location)
        main_vid_with_audio.write_videofile("%s/%s.mp4" % (folder_location, "vid%s" % self.scriptno), threads=4,
                                            fps=settings.movieFPS, temp_audiofile=settings.currentPath + "\\temp.mp3")
        return folder_location 
Example #17
Source File:    From audiogrep with MIT License
def compose(segments, out='out.mp3', padding=0, crossfade=0, layer=False):
    '''Stiches together a new audiotrack'''

    files = {}

    working_segments = []

    audio = AudioSegment.empty()

    if layer:
        total_time = max([s['end'] - s['start'] for s in segments]) * 1000
        audio = AudioSegment.silent(duration=total_time)

    for i, s in enumerate(segments):
            start = s['start'] * 1000
            end = s['end'] * 1000
            f = s['file'].replace('.transcription.txt', '')
            if f not in files:
                if f.endswith('.wav'):
                    files[f] = AudioSegment.from_wav(f)
                elif f.endswith('.mp3'):
                    files[f] = AudioSegment.from_mp3(f)

            segment = files[f][start:end]

            print(start, end, f)

            if layer:
                audio = audio.overlay(segment, times=1)
                if i > 0:
                    audio = audio.append(segment, crossfade=crossfade)
                    audio = audio + segment

            if padding > 0:
                audio = audio + AudioSegment.silent(duration=padding)

            s['duration'] = len(segment)

    audio.export(out, format=os.path.splitext(out)[1].replace('.', ''))
    return working_segments