# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import print_function from six.moves import range from six.moves import input __author__ = 'jamiebrew' import os import operator import textwrap import random import re from .corpus import Corpus from .voice import Voice from .pickler import loadobject, save_object # import urllib2 # from bs4 import BeautifulSoup # import unicodedata """ input loop for writing with a corpus or set of corpora WRITING CONTROLS: [option #]: choose option x: delete . or ? or !: end sentence z: move cursor left c: move cursor right r: choose random word (weighted) v[voice #]: change voice rand[#]: randomly chooses # number of words add: add voice set: set corpus weights for this voice info: toggle extra info dynamic: toggle dynamic writing save: save session load: load session [other word]: insert word 0: yield output """ class Voicebox(object): def __init__(self): self.more_info = False self.dynamic = False self.mode_list = ['frequency', 'sigscore', 'count'] self.mode = 'frequency' # self.spanish_to_english = False self.num_options = 20 load_prev = input('Load previous session? y/n\n') if load_prev != 'n': loaded_voicebox = self.load_session() # unpickles a previously-saved object self.cursor = loaded_voicebox.cursor self.cursor_position = loaded_voicebox.cursor_position self.voices = loaded_voicebox.voices self.active_voice = loaded_voicebox.active_voice self.log = loaded_voicebox.log else: self.cursor = "|" self.cursor_position = 0 self.voices = {} self.load_voices() self.active_voice = self.choose_voice() self.log = [] def header(self): headerString = "\nVOICES\n" vnum = 1 for voice_key in sorted(self.voices): v = self.voices[voice_key] headerString += 'v%s. %s\n' % (str(vnum), v.name) vnum += 1 source_num = 1 if len(v.weighted_corpora) > 1: # only print the component sources of voices with more than one for corpus_item in sorted(v.weighted_corpora): c, wt = v.weighted_corpora[corpus_item] headerString += '\ts%s. %s, weight %s\n' % (str(source_num), c.name, wt) source_num += 1 headerString += "\n____________________" return headerString def write(self): sentence = ['START_SENTENCE'] self.cursor_position = 1 voice_name = self.active_voice.name.upper() self.log += [voice_name + ':'] while 1: words_before = sentence[0:self.cursor_position] words_after = sentence[self.cursor_position:] suggestions = self.active_voice.suggest(sentence, self.cursor_position, self.num_options) print(self.header()) print(textwrap.fill(" ".join(self.log + words_before[1:] + [self.cursor] + words_after), 80)) self.display_suggestions(suggestions) # if self.spanish_to_english: # print words_before[-1]+ ": " + self.to_english(words_before[-1]).encode('utf-8').strip() # self.spanish_to_english = False user_input = input('What now?\n') try: user_input = int(user_input) if user_input in range(1, len(suggestions)+1): choice = self.take_suggestion(suggestions, user_input) next_word = choice[0] score_tree = choice[1][1] words_before.append(next_word) sentence = words_before + words_after if self.dynamic: self.update_weights(self.active_voice, score_tree, .1) self.cursor_position += 1 elif user_input == 0: self.log = self.log + sentence self.log.remove('START_SENTENCE') print(" ".join(self.log)) return else: print("That's out of range!") except: pass if user_input == 'z': self.cursor_position -= 1 elif user_input == 'c': self.cursor_position += 1 elif user_input == 'x': self.delete_word(words_before) self.cursor_position -= 1 sentence = words_before + words_after elif user_input == 'r': next_word = self.weighted_random_choice(suggestions) words_before.append(next_word) sentence = words_before + words_after self.cursor_position += 1 # elif user_input == 't': # self.spanish_to_english = True elif user_input == 'info': self.toggle_info() elif user_input == 'dynamic': self.toggle_dynamic() elif user_input == 'add': self.add_voice() elif user_input == 'set': self.set_weights(self.active_voice) elif re.compile('v[0-9]').search(user_input): # switch to different corpus voice_num = user_input[1:] voice_keys = sorted(self.voices.keys()) chosen_voice_name = voice_keys[int(voice_num) - 1] self.active_voice = self.voices[chosen_voice_name] print('%s chosen!' % chosen_voice_name) finished_sentence = self.finish_sentence(words_before, words_after, '.', '\n\n') self.log = self.log + [finished_sentence] + [chosen_voice_name.upper() + ':'] sentence = ['START_SENTENCE'] elif re.compile('rand[0-9]').search(user_input): num_words = user_input[4:] counter = 0 while counter < int(num_words): next_word = self.weighted_random_choice(suggestions) words_before.append(next_word) sentence = words_before + words_after self.cursor_position += 1 counter += 1 words_before = sentence[0:self.cursor_position] words_after = sentence[self.cursor_position:] suggestions = self.active_voice.suggest(sentence, self.cursor_position, self.num_options) elif re.compile('o[0-9]').search(user_input): # change number of options number_chosen = user_input[1:] self.num_options = int(number_chosen) print('Now writing with %s options!' % number_chosen) elif user_input in ['.', '?', '!']: finished_sentence = self.finish_sentence(words_before, words_after, user_input) self.log = self.log + [finished_sentence] sentence = ['START_SENTENCE'] self.cursor_position = 1 elif user_input == 'save': self.save_session() elif user_input == 'load': self.load_session() elif isinstance(user_input, str) and len(user_input.strip()) > 0: words_before.append(user_input) sentence = words_before + words_after self.cursor_position += 1 else: print("Invalid input.") # toggles whether weights to sources in the current voice adjust automatically def toggle_dynamic(self): self.dynamic = not self.dynamic if self.dynamic: print("Dynamic weight adjustment on!") else: print("Dynamic weight adjustment off!") # toggles whether to show information about scores (and their decomposition by source) def toggle_info(self): self.more_info = not self.more_info if self.more_info: print("More info on!") else: print("More info off!") def set_mode(self): for i in range(len(self.mode_list)): print("%s %s" % (i + 1, self.mode_list[i])) choice = input('Enter the number of the session you want to load:\n') self.mode = self.mode_list[int(choice) - 1] # saves all information about the current session def save_session(self): path = 'saved/%s.pkl' % input("Choose save name:\n") save_object(self, path) print("Saved voicebox to %s!" % path) # prompts choice of session to load, then loads it. def load_session(self): sessions = os.listdir('saved') for i in range(len(sessions)): print("%s %s" % (i + 1, sessions[i])) choice = input('Enter the number of the session you want to load:\n') session_name = sessions[int(choice) - 1] path = 'saved/%s' % session_name return loadobject(path) # given a chosen word and a tree of scores assigned to it by different sources, updates the weights of those sources # according to whether they exceeded or fell short of their expected contribution to the suggestion def update_weights(self, v, score_tree, delta): total_score = sum(score_tree.values()) for key in v.weighted_corpora: corp, wt = v.weighted_corpora[key] expected_share = wt/1 if key in score_tree: sub_score = score_tree[key] else: sub_score = 0 actual_share = sub_score / total_score performance_relative_to_expectation = actual_share - expected_share v.weighted_corpora[corp.name][1] += performance_relative_to_expectation * delta # prompts user to set weights for each corpus in a given voice def set_weights(self, v): for key in v.weighted_corpora: corpus_name = v.weighted_corpora[key][0].name corpus_weight_prompt = 'Enter the weight for %s:\n' % corpus_name corpus_weight = float(input(corpus_weight_prompt)) v.weighted_corpora[key][1] = corpus_weight v.normalize_weights() # random choice without weight bias def flat_random_choice(self, suggestions): return random.randint(1, len(suggestions)) # returns a word from the suggestion list; choice weighted according to scores def weighted_random_choice(self, suggestions): total = sum(score_info[0] for word, score_info in suggestions) r = random.uniform(0, total) upto = 0 for word, score_info in suggestions: if upto + score_info[0] >= r: return word upto += score_info[0] assert False, "Shouldn't get here" # deletes word before the cursor from sentence def delete_word(self, before): if len(before) == 1: print("Cannot delete the start of the sentence!") else: del before[-1] # remove last element of current line def finish_sentence(self, before, after, delimiter, line_break=''): sentence = before[1:] + after if len(sentence) > 0: sentence[-1] += delimiter sentence += line_break return " ".join(sentence) def load_voices(self): # load_from_transcript = raw_input('Load from transcript? y/n\n') load_from_transcript = 'n' if load_from_transcript in ['y', 'yes']: self.load_voices_from_transcript() else: add_another_voice = '' while add_another_voice != 'n': self.add_voice() add_another_voice = input('Add more? y/n\n') # asks you to choose corpora from files in 'texts', then adds a voice with those corpora def add_voice(self): new_voice = Voice({}) # creates new voice with no name and empty tree of corpora texts = os.listdir('texts') add_another_corpus = '' while add_another_corpus != 'n': for i in range(len(texts)): print("%s %s" % (i + 1, texts[i])) choice = input('Enter the number of the corpus you want to load:\n') corpus_name = texts[int(choice) - 1] path = 'texts/%s' % corpus_name f = open(path, 'r') text = f.read() corpus_weight_prompt = 'Enter the weight for %s:\n' % corpus_name corpus_weight = float(input(corpus_weight_prompt)) new_voice.add_corpus(Corpus(text, corpus_name), corpus_weight) texts.remove(corpus_name) add_another_corpus = input('Add another corpus to this voice? y/n\n') voicename = input('Name this voice:\n') new_voice.name = voicename new_voice.normalize_weights() self.voices[voicename] = new_voice # asks user to specify a transcript and number of characters, and makes separate voices for that number of # the most represented characters in the transcript def load_voices_from_transcript(self): transcripts = os.listdir('texts/transcripts') for i in range(len(transcripts)): print("%s %s" % (i + 1, transcripts[i])) choice = input('Enter the number of the transcript you want to load:\n') transcript_name = transcripts[int(choice) - 1] number = int(input('Enter the number of voices to load:\n')) for charname, size in self.biggest_characters(transcript_name, number): print(charname) path = 'texts/transcripts/%s/%s' % (transcript_name, charname) source_text = open(path).read() corpus_name = charname weighted_corpora = {} weighted_corpora[charname] = [Corpus(source_text, corpus_name), 1] self.voices[charname] = Voice(weighted_corpora, charname) # retrieves a list of the top 20 largest character text files in a transcript folder def biggest_characters(self, tname, number): size_by_name = {} tpath = 'texts/transcripts/%s' % tname for cname in os.listdir(tpath): cpath = '%s/%s' % (tpath, cname) size_by_name[cname] = len(open(cpath).read().split()) sorted_chars = list(reversed(sorted(list(size_by_name.items()), key=operator.itemgetter(1)))) return sorted_chars[0:number] # offers several voice choices, returns a voice def choose_voice(self): voice_keys = sorted(self.voices.keys()) print("VOICES:") for i in range(len(voice_keys)): print("%s: %s" % (i + 1, voice_keys[i])) choice = input('Choose a voice by entering a number...\n') self.active_voice = self.voices[voice_keys[int(choice) - 1]] return self.active_voice def display_suggestions(self, suggestions): suggestion_string = '\n' for i in range(len(suggestions)): total_score = format(sum(suggestions[i][1][1].values()), 'g') info_string = "%s: %s" % (i + 1, str(suggestions[i][0])) if self.more_info: info_string += '\t' + str(total_score) suggestion_string += info_string score_tree = suggestions[i][1][1] if self.more_info: suggestion_string += '\t\t' for key in score_tree: score = format(score_tree[key], 'g') suggestion_string += '\t%s: %s' % (key, score) suggestion_string += '\n' print(suggestion_string) def take_suggestion(self, suggestions, user_input): return suggestions[int(user_input) - 1] """ # These functions are for looking up the definitions of unfamiliar words. # They require installing the HTML parsing tool Beautiful Soup, so I've commented them out. # If you have bs4, feel free to try them! def to_english(self, word): search_term = urllib2.quote(word) search_url = 'http://www.spanishdict.com/translate/%s' % search_term print search_url page = urllib2.urlopen(search_url).read() print len(page) soup = BeautifulSoup(page.decode('utf-8','ignore'), "html.parser") foo = soup.findAll(class_="dictionary-neodict-translation-translation") print len(foo) to_return = [] for x in foo: to_return.append(x.get_text()) return ", ".join(to_return) def to_english2(self, word): search_url = 'https://translate.google.com/#es/en/%s' % word page = urllib2.urlopen(search_url).read() soup = BeautifulSoup(page.decode('utf-8','ignore'), "html.parser") foo = soup.findAll(class_="dictionary-neodict-translation-translation") """