# Copyright (c) 2017-present, Facebook, Inc. # All rights reserved. # # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. # from __future__ import absolute_import, division, unicode_literals import numpy as np import re import inspect from torch import optim def create_dictionary(sentences): words = {} for s in sentences: for word in s: if word in words: words[word] += 1 else: words[word] = 1 words['<s>'] = 1e9 + 4 words['</s>'] = 1e9 + 3 words['<p>'] = 1e9 + 2 # words['<UNK>'] = 1e9 + 1 sorted_words = sorted(words.items(), key=lambda x: -x[1]) # inverse sort id2word = [] word2id = {} for i, (w, _) in enumerate(sorted_words): id2word.append(w) word2id[w] = i return id2word, word2id def cosine(u, v): return np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v)) class dotdict(dict): """ dot.notation access to dictionary attributes """ __getattr__ = dict.get __setattr__ = dict.__setitem__ __delattr__ = dict.__delitem__ def get_optimizer(s): """ Parse optimizer parameters. Input should be of the form: - "sgd,lr=0.01" - "adagrad,lr=0.1,lr_decay=0.05" """ if "," in s: method = s[:s.find(',')] optim_params = {} for x in s[s.find(',') + 1:].split(','): split = x.split('=') assert len(split) == 2 assert re.match("^[+-]?(\d+(\.\d*)?|\.\d+)$", split[1]) is not None optim_params[split[0]] = float(split[1]) else: method = s optim_params = {} if method == 'adadelta': optim_fn = optim.Adadelta elif method == 'adagrad': optim_fn = optim.Adagrad elif method == 'adam': optim_fn = optim.Adam elif method == 'adamax': optim_fn = optim.Adamax elif method == 'asgd': optim_fn = optim.ASGD elif method == 'rmsprop': optim_fn = optim.RMSprop elif method == 'rprop': optim_fn = optim.Rprop elif method == 'sgd': optim_fn = optim.SGD assert 'lr' in optim_params else: raise Exception('Unknown optimization method: "%s"' % method) # check that we give good parameters to the optimizer expected_args = inspect.getargspec(optim_fn.__init__)[0] assert expected_args[:2] == ['self', 'params'] if not all(k in expected_args[2:] for k in optim_params.keys()): raise Exception('Unexpected parameters: expected "%s", got "%s"' % ( str(expected_args[2:]), str(optim_params.keys()))) return optim_fn, optim_params