# -*- coding=utf-8 -*- # Author: junzew # Date: 2017 May 27 # Adapted from code written by Alex I. Ramirez @alexram1313 arcompware.com from pypinyin import lazy_pinyin import pypinyin import pydub from pydub import AudioSegment from pathlib import Path import wave import pyaudio import _thread import time import sys import os import requests import atc import argparse # for demo only, please replace with your own API key Turing_API_key = "64c88489ad7f432591d702ec1334dedc" Turing_API_address = "http://www.tuling123.com/openapi/api" class TextToSpeech: CHUNK = 1024 punctuation = [',', '。','?','!','“','”',';',':','(',")",":",";",",",".","?","!","\"","\'","(",")"] def __init__(self): pass def speak(self, text): syllables = lazy_pinyin(text, style=pypinyin.TONE3) print(syllables) delay = 0 def preprocess(syllables): temp = [] for syllable in syllables: for p in TextToSpeech.punctuation: syllable = syllable.replace(p, "") if syllable.isdigit(): syllable = atc.num2chinese(syllable) new_sounds = lazy_pinyin(syllable, style=pypinyin.TONE3) for e in new_sounds: temp.append(e) else: temp.append(syllable) return temp syllables = preprocess(syllables) for syllable in syllables: path = "syllables/"+syllable+".wav" _thread.start_new_thread(TextToSpeech._play_audio, (path, delay)) delay += 0.355 def synthesize(self, text, src, dst): """ Synthesize .wav from text src is the folder that contains all syllables .wav files dst is the destination folder to save the synthesized file """ print("Synthesizing ...") delay = 0 increment = 355 # milliseconds pause = 500 # pause for punctuation syllables = lazy_pinyin(text, style=pypinyin.TONE3) # initialize to be complete silence, each character takes up ~500ms result = AudioSegment.silent(duration=500*len(text)) for syllable in syllables: path = src+syllable+".wav" sound_file = Path(path) # insert 500 ms silence for punctuation marks if syllable in TextToSpeech.punctuation: short_silence = AudioSegment.silent(duration=pause) result = result.overlay(short_silence, position=delay) delay += increment continue # skip sound file that doesn't exist if not sound_file.is_file(): continue segment = AudioSegment.from_wav(path) result = result.overlay(segment, position=delay) delay += increment directory = dst if not os.path.exists(directory): os.makedirs(directory) result.export(directory+"generated.wav", format="wav") print("Exported.") def _play_audio(path, delay): try: time.sleep(delay) wf = wave.open(path, 'rb') p = pyaudio.PyAudio() stream = p.open(format=p.get_format_from_width(wf.getsampwidth()), channels=wf.getnchannels(), rate=wf.getframerate(), output=True) data = wf.readframes(TextToSpeech.CHUNK) while data: stream.write(data) data = wf.readframes(TextToSpeech.CHUNK) stream.stop_stream() stream.close() p.terminate() return except: pass def start_chatting(key, location): print("你好!") key = Turing_API_key if key is None else key location = "北京市中关村" if location is None else location while True: sentence = input('输入中文:') r = requests.post( Turing_API_address, json = { "key": key, "info": sentence, "loc": location, "userid":"1" }) response = r.json()["text"] print(response) tts.speak(response) if __name__ == '__main__': tts = TextToSpeech() parser = argparse.ArgumentParser(description="HanTTS: Chinese Text-to-Speech program") subparsers = parser.add_subparsers(title="subcommands", help='optional subcommands', dest='cmd') synthesize_parser = subparsers.add_parser('synthesize', help='synthesize audio from text') synthesize_parser.add_argument('--text', help='the text to convert to speech', dest='text') synthesize_parser.add_argument('--src', help='source directory of audio library', dest='src') synthesize_parser.add_argument('--dst', help='destination directory for generated .wav file', dest='dst') chat_parser = subparsers.add_parser('chat', help='chat using Turing Robot API') chat_parser.add_argument('--key', help='Turing Robot API key', dest='api_key') chat_parser.add_argument('--location', help='your physical location', dest='location') args = parser.parse_args() if args.cmd == 'synthesize': if not args.text: synthesize_parser.print_help() print('ERROR: Missing argument --text') sys.exit(1) if not args.src: synthesize_parser.print_help() print('ERROR: Missing argument --src') sys.exit(1) if not args.dst: synthesize_parser.print_help() print('ERROR: Missing argument --dst') sys.exit(1) tts.synthesize(args.text, args.src, args.dst) elif args.cmd == 'chat': start_chatting(args.api_key, args.location) else: while True: tts.speak(input('输入中文:'))