''' Build and run the RNN model ''' import cPickle as pkl import time import numpy as np import theano import theano.tensor as tensor from theano import config from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams from collections import OrderedDict, deque import utils from op_link import Link from op_sentence import Sentence from sklearn.decomposition import PCA import wiki import qp import parameters as prm import matplotlib matplotlib.use('Agg') # Must be before importing matplotlib.pyplot or pylab since the server might not have an X server. import matplotlib.pyplot as plt from nltk.tokenize import wordpunct_tokenize import copy import itertools import random # compute_test_value is 'off' by default, meaning this feature is inactive #theano.config.compute_test_value = 'warn' # Use 'warn' to activate this feature # Set the random number generators' seeds for consistency SEED = 123 np.random.seed(SEED) def vis_att(pages_idx, query, alpha, wiki, vocab, idx): rows = [prm.root_page.title()] for pageidx in pages_idx[:-1]: if pageidx != -1: rows.append(wiki.get_article_title(pageidx).decode('utf-8', 'ignore').title()) else: break #rows.append('Stop') rows = rows[::-1] columns = [] for word in wordpunct_tokenize(query): if word.lower() in vocab: columns.append(str(word)) columns = columns[:prm.max_words_query*prm.n_consec] alpha = alpha[:len(rows),:len(columns)] alpha = alpha[::-1] fig,ax=plt.subplots(figsize=(27,10)) #Advance color controls norm = matplotlib.colors.Normalize(0,1) im = ax.pcolor(alpha,cmap=plt.cm.gray,edgecolors='w',norm=norm) fig.colorbar(im) ax.set_xticks(np.arange(0,len(columns))+0.5) ax.set_yticks(np.arange(0,len(rows))+0.5) ax.tick_params(axis='x', which='minor', pad=15) # Here we position the tick labels for x and y axis ax.xaxis.tick_bottom() ax.yaxis.tick_left() ax.axis('tight') # correcting pyplot bug that add extra white columns. plt.xticks(rotation=90) fig.subplots_adjust(bottom=0.2) fig.subplots_adjust(left=0.2) #Values against each labels ax.set_xticklabels(columns,minor=False,fontsize=18) ax.set_yticklabels(rows,minor=False,fontsize=18) plt.savefig('vis' + str(idx) + '.svg') plt.close() def np_floatX(data): return np.asarray(data, dtype=config.floatX) def _slice(_x, n, dim): if _x.ndim == 3: return _x[:, :, n * dim:(n + 1) * dim] return _x[:, n * dim:(n + 1) * dim] def get_minibatches_idx(n, minibatch_size, shuffle=False, max_samples=None): """ Used to shuffle the dataset at each iteration. """ idx_list = np.arange(n, dtype="int32") if shuffle: np.random.shuffle(idx_list) if max_samples: idx_list = idx_list[:max_samples] n = max_samples minibatches = [] minibatch_start = 0 for i in range(n // minibatch_size): minibatches.append(idx_list[minibatch_start: minibatch_start + minibatch_size]) minibatch_start += minibatch_size if (minibatch_start != n): # Make a minibatch out of what is left minibatches.append(idx_list[minibatch_start:]) return zip(range(len(minibatches)), minibatches) def zipp(params, tparams): """ When we reload the model. Needed for the GPU stuff. """ for kk, vv in params.iteritems(): tparams[kk].set_value(vv) def unzip(zipped): """ When we pickle the model. Needed for the GPU stuff. """ new_params = OrderedDict() for kk, vv in zipped.iteritems(): new_params[kk] = vv.get_value() return new_params def dropout_layer(state_before, is_train, trng): proj = tensor.switch(is_train, (state_before * trng.binomial(state_before.shape, p=(1-prm.dropout), n=1, dtype=state_before.dtype)), state_before * (1-prm.dropout)) return proj def load_params(path, params): pp = np.load(path) for kk, vv in params.iteritems(): if kk in pp: if params[kk].shape == pp[kk].shape: params[kk] = pp[kk] else: print 'The shape of layer', kk, params[kk].shape, 'is different from shape of the stored layer with the same name', pp[kk].shape, '.' else: print '%s is not in the archive' % kk return params def load_wemb(params, vocab): wemb = pkl.load(open(prm.wordemb_path, 'rb')) dim_emb_orig = wemb.values()[0].shape[0] W = 0.01 * np.random.randn(prm.n_words, dim_emb_orig).astype(config.floatX) for word, pos in vocab.items(): if word in wemb: W[pos,:] = wemb[word] if prm.dim_emb < dim_emb_orig: pca =PCA(n_components=prm.dim_emb, copy=False, whiten=True) W = pca.fit_transform(W) params['W'] = W return params def itemlist(tparams): return [vv for kk, vv in tparams.iteritems()] def init_tparams(params): tparams = OrderedDict() for kk, pp in params.iteritems(): tparams[kk] = theano.shared(params[kk], name=kk) return tparams def ortho_weight(ndim): W = np.random.randn(ndim, ndim) u, s, v = np.linalg.svd(W) return u.astype(config.floatX) def matrix(dim): return np.concatenate([ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim)], axis=1) def softmax_mask(x, mask): m = tensor.max(x, axis=-1, keepdims=True) e_x = tensor.exp(x - m) * mask return e_x / tensor.maximum(e_x.sum(axis=-1, keepdims=True), 1e-8) #this small constant avoids possible division by zero created by the mask def init_params(): params = OrderedDict() params['l_a_init'] = 0.01 * np.random.randn(prm.dim_emb,).astype(config.floatX) # initial values params['h_init'] = 0.01 * np.random.randn(prm.n_rnn_layers, prm.dim_proj).astype(config.floatX) # initial values params['c_init'] = 0.01 * np.random.randn(prm.n_rnn_layers, prm.dim_proj).astype(config.floatX) # initial values if prm.encoder.lower() == 'lstm': mul = 4 else: mul = 1 params['E_L'] = 0.01 * np.random.randn(prm.dim_emb, mul * prm.dim_proj).astype(config.floatX) # document params['E_Q'] = 0.01 * np.random.randn(prm.dim_emb, mul * prm.dim_proj).astype(config.floatX) # query params['U_I'] = 0.01 * np.random.randn(prm.dim_proj, mul * prm.dim_proj).astype(config.floatX) # hiddent state t-1 params['b'] = np.zeros((mul * prm.dim_proj,)).astype(config.floatX) # bias for i in range(1, prm.n_rnn_layers): i = str(i) params['E_L'+i] = 0.01 * np.random.randn(prm.dim_emb, mul * prm.dim_proj).astype(config.floatX) # document params['E_Q'+i] = 0.01 * np.random.randn(prm.dim_emb, mul * prm.dim_proj).astype(config.floatX) # query params['U_H'+i] = 0.01 * np.random.randn(prm.dim_proj, mul * prm.dim_proj).astype(config.floatX) # hidden state t-1 params['U_I'+i] = 0.01 * np.random.randn(prm.dim_proj, mul * prm.dim_proj).astype(config.floatX) # hidden state n-1 params['b'+i] = np.zeros((mul * prm.dim_proj,)).astype(config.floatX) # bias params['stop'] = 0.01 * np.random.randn(prm.dim_emb).astype(config.floatX) # stop action vector params['U_O'] = 0.01 * np.random.randn(prm.dim_proj, prm.dim_proj).astype(config.floatX) # score params['b_U_O'] = np.zeros((prm.dim_proj,)).astype(config.floatX) # bias for i in range(prm.n_doc_layers_nav): if i == 0: i = '' in_dim = prm.dim_emb else: in_dim = prm.dim_proj params['U_L' + str(i)] = 0.01 * np.random.randn(in_dim, prm.dim_proj).astype(config.floatX) # doc embedding params['b_U_L' + str(i)] = np.zeros((prm.dim_proj,)).astype(config.floatX) # bias ns = [prm.dim_proj] + prm.scoring_layers_nav + [1] for i in range(len(ns)-1): if i == 0: i_ = '' else: i_ = str(i+1) # +1 for compatibility purposes. params['U_R'+i_] = 0.01 * np.random.randn(ns[i], ns[i+1]).astype(config.floatX) # score params['b_U_R'+i_] = np.zeros((ns[i+1],)).astype(config.floatX) # bias if prm.att_query: n_features = [prm.dim_emb,] + prm.filters_query for i in range(len(prm.filters_query)): params['Ww_att_q'+str(i)] = 0.001 * np.random.randn(n_features[i+1], n_features[i], 1, prm.window_query[i]).astype(config.floatX) params['bw_att_q'+str(i)] = np.zeros((n_features[i+1],)).astype(config.floatX) # bias score q_feat_size = n_features[-1] params['Wq_att_q'] = 0.001 * np.random.randn(q_feat_size, prm.dim_proj).astype(config.floatX) # query params['Wh_att_q'] = 0.001 * np.random.randn(prm.dim_proj, prm.dim_proj).astype(config.floatX) # hidden state params['Wl_att_q'] = 0.001 * np.random.randn(prm.dim_emb, prm.dim_proj).astype(config.floatX) # link embedding params['bq_att_q'] = np.zeros((prm.dim_proj,)).astype(config.floatX) # bias params['We_att_q'] = 0.001 * np.random.randn(prm.dim_proj, 1).astype(config.floatX) # score params['be_att_q'] = np.zeros((1,)).astype(config.floatX) # bias score if prm.att_doc: n_features = [prm.dim_emb,] + prm.filters_doc for i in range(len(prm.filters_doc)): params['Ww_att_d'+str(i)] = 0.01 * np.random.randn(n_features[i+1], n_features[i], 1, prm.window_doc[i]).astype(config.floatX) params['bw_att_d'+str(i)] = np.zeros((n_features[i+1],)).astype(config.floatX) # bias score doc_feat_size = n_features[-1] params['Wq_att_d'] = 0.01 * np.random.randn(prm.dim_emb, prm.dim_proj).astype(config.floatX) # query params['Wh_att_d'] = 0.01 * np.random.randn(prm.dim_proj, prm.dim_proj).astype(config.floatX) # hidden state params['Wl_att_d'] = 0.01 * np.random.randn(doc_feat_size, prm.dim_proj).astype(config.floatX) # link embedding params['bq_att_d'] = np.zeros((prm.dim_proj,)).astype(config.floatX) # bias params['We_att_d'] = 0.01 * np.random.randn(prm.dim_proj, 1).astype(config.floatX) # score params['be_att_d'] = np.zeros((1,)).astype(config.floatX) # bias score if prm.learning.lower() == 'reinforce' and prm.idb: params['R_W'] = 0.01 * np.random.randn(prm.dim_proj, 1).astype(config.floatX) # question params['R_b'] = np.zeros((1,)).astype(config.floatX) # bias params['W'] = 0.01 * np.random.randn(prm.n_words, prm.dim_emb).astype(config.floatX) # vocab to word embeddings params['UNK'] = 0.01 * np.random.randn(1, prm.dim_emb).astype(config.floatX) # vector for UNK words exclude_params = {} if prm.fixed_wemb: exclude_params['W'] = True return params, exclude_params def rnn_layer(x, h_, c_, m_): if prm.encoder.lower() == 'lstm': i = tensor.nnet.sigmoid(_slice(x, 0, prm.dim_proj)) f = tensor.nnet.sigmoid(_slice(x, 1, prm.dim_proj)) o = tensor.nnet.sigmoid(_slice(x, 2, prm.dim_proj)) c = tensor.tanh(_slice(x, 3, prm.dim_proj)) c = f * c_ + i * c c = m_[:, None] * c + (1. - m_)[:, None] * c_ h = o * tensor.tanh(c) h = m_[:, None] * h + (1. - m_)[:, None] * h_ else: c = c_ h = tensor.tanh(x) * m_[:, None] return h, c def val(q_a, q_m, h_, l_a_, c_, m_, L_a, L_m, tparams_v, tparams, k_beam, n_samples, uidx, is_train, trng): def fparams(name): return tparams_v[tparams.keys().index(name)] n_links = L_a.shape[1] + 1 if prm.att_query: # Convolution q_aw = q_a.dimshuffle(0, 2, 'x', 1) # (n_samples, dim_emb, 1, n_words) for j in range(len(prm.filters_query)): q_aw = tensor.nnet.conv2d(q_aw, fparams('Ww_att_q'+str(j)), border_mode=(0, prm.window_query[j]//2)) q_aw += fparams('bw_att_q'+str(j))[None,:,None,None] q_aw = tensor.maximum(q_aw, 0.) #q_aw = tensor.nnet.relu(q_aw) # relu results in NAN. Use maximum() instead. q_aw = q_aw[:, :, 0, :].dimshuffle(0, 2, 1) e = tensor.dot(q_aw, fparams('Wq_att_q')) e += tensor.dot(h_[-1], fparams('Wh_att_q'))[:,None,:] e += tensor.dot(l_a_, fparams('Wl_att_q'))[:,None,:] e += fparams('bq_att_q') e = tensor.tanh(e) e = tensor.dot(e, fparams('We_att_q')) + fparams('be_att_q') e = e.reshape((e.shape[0],e.shape[1])) # repeat for beam search q_m_ = tensor.extra_ops.repeat(q_m, k_beam, axis=0) alpha = softmax_mask(e, q_m_) q_at = (alpha[:,:,None] * q_a).sum(1) else: alpha = tensor.alloc(np.array(0., dtype=np.float32), q_a.shape[0], q_a.shape[1]) q_at = q_a alpha_q = alpha h = tensor.zeros_like(h_) c = tensor.zeros_like(c_) # Multi-layer lstm for i in range(prm.n_rnn_layers): i_ = '' if i == 0 else str(i) a = tensor.dot(q_at, fparams('E_Q' + i_)) if prm.dropout > 0: a = dropout_layer(a, is_train, trng) b = tensor.dot(l_a_, fparams('E_L' + i_)) if prm.dropout > 0: b = dropout_layer(b, is_train, trng) preact = a + b preact += tensor.dot(h_[i], fparams('U_I' + i_)) preact += fparams('b' + i_) if i > 0: hp = tensor.dot(h[i-1], fparams('U_H' + i_)) if prm.dropout > 0: hp = dropout_layer(hp, is_train, trng) preact += hp h_i, c_i = rnn_layer(preact, h_[i], c_[i], tensor.neq(m_,-1.).astype('float32')) h = tensor.set_subtensor(h[i], h_i) c = tensor.set_subtensor(c[i], c_i) if prm.att_doc: # Convolution. L_aw = L_a.reshape((L_a.shape[0] * L_a.shape[1], L_a.shape[2], L_a.shape[3])) L_aw = L_aw.dimshuffle(0, 2, 'x', 1) # (n_samples*n_docs, n_emb, 1, n_char) for j in range(len(prm.filters_doc)): L_aw = tensor.nnet.conv2d(L_aw, fparams('Ww_att_d'+str(j)), border_mode=(0, prm.window_doc[j]//2)) L_aw += fparams('bw_att_d'+str(j))[None,:,None,None] L_aw = tensor.maximum(L_aw, 0.) # L_aw = tensor.nnet.relu(L_aw) # relu results in NAN. Use maximum() instead. L_aw = L_aw[:, :, 0, :].dimshuffle(0, 2, 1) L_aw = L_aw.reshape((L_a.shape[0], L_a.shape[1], L_a.shape[2], L_aw.shape[2])) e = tensor.dot(L_aw, fparams('Wl_att_d')) e += tensor.dot(h[-1], fparams('Wh_att_d'))[:,None,None,:] e += tensor.dot(q_at, fparams('Wq_att_d'))[:,None,None,:] e += fparams('bq_att_d') e = tensor.tanh(e) e = tensor.dot(e, fparams('We_att_d')) + fparams('be_att_d') e = e.reshape((e.shape[0],e.shape[1],e.shape[2])) alpha = softmax_mask(e, L_m) L_at = (alpha[:,:,:,None] * L_a).sum(2) L_m = L_m.any(2).astype('float32') else: L_at = L_a # Append stop action stop = fparams('stop')[None, None, :] stop = tensor.extra_ops.repeat(x=stop, repeats=n_samples * k_beam, axis=0) L_as = tensor.concatenate([stop, L_at], axis=1) stop_m = tensor.alloc(np_floatX(1.), n_samples * k_beam, 1) L_ms = tensor.concatenate([stop_m, L_m], axis=1) z = tensor.tanh(tensor.dot(h[-1], fparams('U_O')) + fparams('b_U_O')) L_as2 = L_as for i in range(prm.n_doc_layers_nav): if i == 0: i_ = '' else: i_ = str(i) L_as2 = tensor.dot(L_as2, fparams('U_L'+i_)) + fparams('b_U_L'+i_) if prm.dropout > 0: L_as2 = dropout_layer(L_as2, is_train, trng) L_as2 = tensor.tanh(L_as2) res = tensor.dot(L_as2 * z[:,None,:], fparams('U_R')) + fparams('b_U_R') for i in range(1,len(prm.scoring_layers_nav)+1): if prm.dropout > 0: res = dropout_layer(res, is_train, trng) res = tensor.tanh(res) # tanh here instead after the dot product makes no tanh in the last layer. res = tensor.dot(res, fparams('U_R'+str(i+1))) + fparams('b_U_R'+str(i+1)) res = res.reshape((n_samples, k_beam * n_links)) # Reshape for beam search L_ms = L_ms.reshape((n_samples, k_beam * n_links)) score = res * L_ms return score, h, c, L_as, L_ms, alpha_q def adam(lr0, tparams, grads, iin, out, updates): gshared = [theano.shared(p.get_value() * 0., name='%s_grad'%k) for k, p in tparams.iteritems()] gsup = [(gs, g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function(iin, out, updates=gsup+updates, \ on_unused_input='ignore', allow_input_downcast=True) b1 = 0.1 b2 = 0.001 e = 1e-8 updates = [] i = theano.shared(np.float32(0.)) i_t = i + 1. fix1 = 1. - b1**(i_t) fix2 = 1. - b2**(i_t) lr_t = lr0 * (tensor.sqrt(fix2) / fix1) for p, g in zip(tparams.values(), gshared): m = theano.shared(p.get_value() * 0.) v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * m) v_t = (b2 * tensor.sqr(g)) + ((1. - b2) * v) g_t = m_t / (tensor.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((i, i_t)) f_update = theano.function([lr0], [], updates=updates, on_unused_input='ignore') return f_grad_shared, f_update def compute_emb(x, W): def _step(xi, emb, W): if prm.att_doc: new_shape = (xi.shape[0], xi.shape[1], xi.shape[2], prm.dim_emb) else: new_shape = (xi.shape[0], xi.shape[1], prm.dim_emb) out = W[xi.flatten()].reshape(new_shape).sum(-2) return out / tensor.maximum(1., tensor.neq(xi,-1).astype('float32').sum(-1, keepdims=True)) if prm.att_doc: emb_init = tensor.alloc(0., x.shape[1], x.shape[2], prm.dim_emb) else: emb_init = tensor.alloc(0., x.shape[1], prm.dim_emb) (embs), scan_updates = theano.scan(_step, sequences=[x], outputs_info=[emb_init], non_sequences=[W], name='emb_scan', n_steps=x.shape[0]) return embs def ff(q, q_m, k_beam, trng, is_train, options, uidx, tparams, mixer, sup, root_pages, max_hops, acts_p, rl_idx=None, get_links=None): n_samples = q.shape[0] off = 1e-8 epsilon = tensor.maximum(prm.epsilon_min, prm.epsilon_start - (prm.epsilon_start - prm.epsilon_min) * (uidx / prm.epsilon_decay)) if not get_links: get_links = Link(options['wiki'], options['wikipre'], options['vocab']) # instantiate custom function to get wiki links # append vector for UNK words (index == -1). W_ = tensor.concatenate([tparams['W'], tparams['UNK']], axis=0) def _step(it, act_p, p_, m_, l_a_, h_, c_, q_a, pr_all, W_, k_beam, uidx, is_train, sup, mixer, *tparams_v): L_a, L_m, l_page, l_truth = get_links(p_, act_p, it, uidx, k_beam) if prm.compute_emb: L_a = compute_emb(L_a, W_) dist, h, c, L_as, L_ms, alpha_q = val(q_a, q_m, h_, l_a_, c_, m_, L_a, L_m, tparams_v, tparams, k_beam, n_samples, uidx, is_train, trng) n_links = L_as.shape[1] if prm.learning.lower() == 'q-learning': if rl_idx: #if this is the replay memory pass, just use the q-value function dist = tensor.nnet.sigmoid(dist) * L_ms res_ = dist.argmax(1) else: # otherwise, select actions using epsilon-greedy or softmax. if prm.act_sel.lower() == 'epsilon-greedy': dist = tensor.nnet.sigmoid(dist) * L_ms greedy = tensor.eq(is_train,1.).astype('float32') * (trng.uniform(size=(n_samples,)) > epsilon) \ + tensor.eq(is_train,0.).astype('float32') randd = tensor.floor(trng.uniform(size=(n_samples,)) * L_ms.sum(1)).astype('int32') res_pre = tensor.eq(it, 0.).astype('int32') * dist[:,:n_links].argsort(axis=1)[:,::-1][:, :k_beam].flatten().astype("int32") \ + tensor.neq(it, 0.).astype('int32') * dist.argsort(axis=1)[:,::-1][:, :k_beam].reshape((n_samples * k_beam,)).astype("int32") # Repeat for beam search greedy = tensor.extra_ops.repeat(greedy, k_beam, axis=0) randd = tensor.extra_ops.repeat(randd, k_beam, axis=0) res_ = (1. - greedy) * randd + greedy * res_pre elif prm.act_sel.lower() == 'softmax': dist = softmax_mask(dist, L_ms) # if training, sample. Otherwise, use the maximum value. lp_ = tensor.eq(is_train,1.).astype('float32') * trng.multinomial(n=1, pvals=dist, dtype=dist.dtype) \ + tensor.eq(is_train,0.).astype('float32') * dist res_ = tensor.eq(it, 0.).astype('int32') * lp_[:,:n_links].argsort(axis=1)[:,::-1][:, :k_beam].flatten().astype("int32") \ + tensor.neq(it, 0.).astype('int32') * lp_.argsort(axis=1)[:,::-1][:, :k_beam].reshape((n_samples * k_beam,)).astype("int32") else: dist = softmax_mask(dist, L_ms) lp_ = tensor.eq(is_train,1.).astype('float32') * trng.multinomial(n=1, pvals=dist, dtype=dist.dtype) \ + tensor.eq(is_train,0.).astype('float32') * \ (tensor.log(pr_all[:it] + off).sum(0)[:,None] + tensor.log(dist.reshape((n_samples*k_beam,-1)) + off)).reshape((n_samples,-1)) res_ = tensor.eq(it, 0.).astype('int32') * lp_[:,:n_links].argsort(axis=1)[:,::-1][:, :k_beam].flatten().astype("int32") \ + tensor.neq(it, 0.).astype('int32') * lp_.argsort(axis=1)[:,::-1][:, :k_beam].reshape((n_samples * k_beam,)).astype("int32") # Select action: supervised, RL, or mixed. if prm.mixer > 0 and prm.learning.lower() == 'reinforce': # Mixed l_idx = ((it < mixer) * l_truth + (1 - (it < mixer)) * res_).astype("int32") else: # Supervised or RL if rl_idx: #if this is the replay forward pass, just choose the same action taken in the past l_idx = rl_idx[:,it] else: # Otherwise, use the supervised signal or the action chosen by the policy. l_idx = (sup * l_truth + (1 - sup) * res_).astype("int32") l_idx0 = (k_beam * tensor.floor(tensor.arange(l_idx.shape[0]) / k_beam) + tensor.floor(l_idx / (n_links)) ).astype('int32') l_idx1 = tensor.mod(l_idx, n_links).astype('int32') l_a = L_as[l_idx0, l_idx1, :] dist = dist.reshape((n_samples*k_beam, n_links)) l_prob = dist[l_idx0, l_idx1] # get the probability of the chosen action. l_ent = -(dist * tensor.log(dist + off)).sum(1) # get the entropy. pr_all = tensor.set_subtensor(pr_all[it], l_prob) # supervised only: compute the cost for page selection cost_p = -tensor.log(dist[tensor.arange(dist.shape[0]), l_truth] + off) # check if the stop action was chosen, and # mark the sample as "not stop" by storing the current iteration. m = tensor.neq(l_idx1, 0).astype("float32") m = m * it - (1. - m) m = m.astype('float32') # Get indices of the next articles. p = l_page[l_idx0, l_idx1] # the returned variable in the scan function must have same size in all iterations. dist_ = tensor.alloc(0., n_samples * k_beam, prm.max_links+1) dist_ = tensor.set_subtensor(dist_[tensor.arange(n_samples*k_beam), :dist.shape[1]], dist) # the returned variable in the scan function must have same size in all iterations. l_page_ = tensor.alloc(-1, n_samples * k_beam, prm.max_links+1).astype('int32') l_page_ = tensor.set_subtensor(l_page_[tensor.arange(n_samples*k_beam), :l_page.shape[1]], l_page) return p, m, l_a, h, c, l_prob, l_ent, cost_p, l_idx, dist_, alpha_q, l_page_ #get embeddings for the queries q_a = W_[q.flatten()].reshape((q.shape[0], q.shape[1], prm.dim_emb)) * q_m[:,:,None] if not prm.att_query: q_a = q_a.sum(1) / tensor.maximum(1., q_m.sum(1, keepdims=True)) #repeat question for beam search q_a = tensor.extra_ops.repeat(q_a, k_beam, axis=0) root_pages_ = tensor.extra_ops.repeat(root_pages, k_beam) l_a_init = tensor.extra_ops.repeat(tparams['l_a_init'][None,:], k_beam * n_samples, axis=0) h_init = tensor.extra_ops.repeat(tparams['h_init'][:,None,:], k_beam * n_samples, axis=1) c_init = tensor.extra_ops.repeat(tparams['c_init'][:,None,:], k_beam * n_samples, axis=1) pr_all = tensor.alloc(1., max_hops+1, k_beam * n_samples) (pages_idx, mask, l_a, h, _, l_prob, l_ent, cost_p, l_idx, dist, alpha_q, l_page), scan_updates = theano.scan(_step, sequences=[tensor.arange(max_hops+1), acts_p], outputs_info=[root_pages_, #page idx tensor.alloc(0., k_beam * n_samples), # mask l_a_init, h_init, c_init, None, # l_prob None, # l_ent None, # cost_p None, # l_idx None, # dist None, # alpha_q None, # l_page ], non_sequences=[q_a, pr_all, W_, k_beam, uidx, is_train, sup, mixer]+tparams.values(), name='lstm_layers', n_steps=max_hops+1, strict=True) #convert mask mask = mask.max(0) indices = tensor.repeat(tensor.arange(max_hops+1)[:,None], mask.shape[0], axis=1) mask = (indices <= mask[None,:]).astype('float32') return (pages_idx, mask, l_a, h[:,-1,:,:], l_prob, l_ent, cost_p, root_pages_, l_idx, dist, alpha_q, l_page), scan_updates, get_links def build_model(tparams, tparams_next, baseline_vars, options): trng = RandomStreams(SEED) off = 1e-8 # small constant to avoid log 0 = -inf consider_constant = [] is_train = theano.shared(np_floatX(0.)) # Used for dropout. mixer = theano.shared(np.asarray(0, dtype=np.int32)) # Used for MIXER. sup = theano.shared(np_floatX(0.)) # Supervised or not max_hops = theano.shared(np.asarray(prm.max_hops_pred, dtype=np.int32)) # Max number of iterations k_beam = theano.shared(np.asarray(prm.k, dtype=np.int32)) # top-k items in the beam search. q = tensor.imatrix('q') q_m = tensor.fmatrix('q_m') root_pages = tensor.fvector('root_pages') acts_p = tensor.imatrix('acts_p') #used only when prm.learning = 'q-learning' uidx = tensor.iscalar('uidx') rs_q = tensor.imatrix('rs_q') rs_q_m = tensor.fmatrix('rs_q_m') rl_idx = tensor.imatrix('rl_idx') rt = tensor.fmatrix('rt') rR = tensor.fmatrix('rR') """ q.tag.test_value = np.zeros((prm.batch_size_train,prm.n_consec*prm.max_words_query), dtype='int32') q_m.tag.test_value = np.ones((prm.batch_size_train,prm.n_consec*prm.max_words_query), dtype=theano.config.floatX) root_pages.tag.test_value = np.zeros((prm.batch_size_train,), dtype=theano.config.floatX) acts_p.tag.test_value = np.zeros((prm.max_hops_train+1,prm.batch_size_train), dtype='int32') uidx.tag.test_value = np.zeros((1,), dtype='int32') rs_q_a.tag.test_value = np.zeros((prm.batch_size_train,prm.dim_emb), dtype=theano.config.floatX) rs_q_m.tag.test_value = np.zeros((prm.batch_size_train,prm.n_consec*prm.max_words_query), dtype=theano.config.floatX) rl_idx.tag.test_value = np.zeros((prm.batch_size_train,), dtype='int32') rt.tag.test_value = np.zeros((prm.batch_size_train,), dtype=theano.config.floatX) rR.tag.test_value = np.zeros((prm.batch_size_train,), dtype=theano.config.floatX) """ (pages_idx, mask, l_a, h, l_prob, l_ent, cost_p, root_pages_, l_idx, dist, alpha_q, l_page), scan_updates_a, _ = \ ff(q, q_m, k_beam, trng, is_train, options, uidx, tparams, mixer, sup, root_pages, max_hops, acts_p) # Get only the used probabilities. mask_ = tensor.concatenate([tensor.alloc(np_floatX(1.), 1, mask.shape[1]), mask], axis=0)[:-1,:] l_prob *= mask_ # l_prob.shape = (n_iterations, n_samples) l_ent *= mask_ # l_ent.shape = (n_iterations, n_samples) get_sent = Sentence(options['wiki'], options['vocab'], prm.n_consec) # instantiate custom function to get sentences pages_idx_ = tensor.concatenate([root_pages_[None,:], pages_idx[:-1]], axis=0) # get last valid action before the stop action. In case the all the mask is True, get the last action. js = (tensor.minimum(mask.shape[0] - 1, mask.sum(axis=0))).astype("int32") sel_docs = pages_idx_[js, tensor.arange(js.shape[0])] R, best_answer = get_sent(q, q_m, sel_docs, k_beam) # in case the agent didn't stop (all mask is true), the reward is zero. R *= tensor.neq(mask.sum(0), mask.shape[0]).astype('float32').reshape((R.shape[0], k_beam)).any(1) l_aT = l_a.dimshuffle((1,0,2)) l_aT = l_aT.reshape((q.shape[0],-1, prm.dim_emb)) sel_docs = sel_docs.reshape((-1, k_beam)) # the first doc always has the best prob. best_doc = sel_docs[:, 0] f_pred = theano.function([q, q_m, root_pages, acts_p, uidx], \ [best_doc, best_answer, R, pages_idx, sel_docs, js, dist, alpha_q, l_page], \ updates=scan_updates_a, name='f_pred', on_unused_input='ignore') # entropy regularization cost_ent = -prm.erate * l_ent if prm.learning.lower() == 'supervised': # cost for link selection. cost = ((cost_p + cost_ent) * mask_).sum(0).mean() # costs for document scoring. a = tensor.neq(acts_p,-1).astype('int32').sum(0) - 1 baseline_updates = [] elif prm.learning.lower() == 'q-learning': (_, m, _, _, _, _, _, _, _, _, q_vals), scan_updates_b, get_links = \ ff(rs_q, rs_q_m, k_beam, trng, is_train, \ options, uidx, tparams, mixer, sup, \ root_pages, max_hops, acts_p, rl_idx) m = m.T m_ = tensor.concatenate([tensor.alloc(np_floatX(1.), m.shape[0], 1), m], axis=1)[:,:-1] q_vals = q_vals.dimshuffle((1,0,2)) if prm.update_freq > 1: (_, _, _, _, _, _, _, _, _, _, n_q_vals), scan_updates_c, _ = \ ff(rs_q, rs_q_m, k_beam, trng, is_train, \ options, uidx, tparams_next, mixer, sup, \ root_pages, max_hops, acts_p, rl_idx, get_links) n_q_vals = n_q_vals.dimshuffle((1,0,2)) # left shift n_q_vals and add zeros at the end. n_q_vals = tensor.concatenate([n_q_vals[:,1:,:], tensor.zeros_like(n_q_vals[:,0,:])[:,None,:]], axis=1) else: # left shift n_q_vals and add zeros at the end. n_q_vals = tensor.concatenate([q_vals[:,1:,:], tensor.zeros_like(q_vals[:,0,:])[:,None,:]], axis=1) n_q_vals *= tensor.ones_like(n_q_vals) # Dummy operation # Don't update weights with respect to n_q_vals n_q_vals = theano.gradient.disconnected_grad(n_q_vals) q_vals_ = q_vals.reshape((-1, q_vals.shape[2])) n_q_vals_ = n_q_vals.reshape((-1,n_q_vals.shape[2])) rR_ = rR.flatten() rt_ = rt.flatten() rl_idx_ = rl_idx.flatten() target = rR_ + (tensor.ones_like(rt_) - rt_) * prm.discount * n_q_vals_.max(1) diff = target - q_vals_[tensor.arange(rl_idx_.shape[0]), rl_idx_] if prm.clip > 0.: # If we simply take the squared clipped diff as our loss, # then the gradient will be zero whenever the diff exceeds # the clip bounds. To avoid this, we extend the loss # linearly past the clip point to keep the gradient constant # in that regime. # # This is equivalent to declaring d loss/d q_vals to be # equal to the clipped diff, then backpropagating from # there, which is what the DeepMind implementation does. quadratic_part = tensor.minimum(abs(diff), prm.clip) linear_part = abs(diff) - quadratic_part cost = 0.5 * quadratic_part ** 2 + prm.clip * linear_part else: cost = 0.5 * diff ** 2 cost = (cost * m_.flatten()).sum() / tensor.maximum(1., m_.sum()) # use entropy regularization if it is using softmax. if prm.act_sel.lower() == 'softmax': cost += (cost_ent * mask_).sum() / tensor.maximum(1., mask_.sum()) cost *= (uidx > prm.replay_start).astype('float32') # start learning only after some updates. baseline_updates = [] elif prm.learning.lower() == 'reinforce': if prm.mov_avg: R_mean = R.mean() R_std = R.std() R_mean_ = 0.9 * baseline_vars['R_mean'] + 0.1 * R_mean R_std_ = 0.9 * baseline_vars['R_std'] + 0.1 * R_std # Update baseline vars. baseline_updates = [(baseline_vars['R_mean'], R_mean_), (baseline_vars['R_std'], R_std_)] else: baseline_updates = [] R_mean_ = 0. R_std_ = 1. if prm.idb: # input-dependent baseline #R_idb = tensor.dot(h[js, tensor.arange(h.shape[1]), :], tparams['R_W']) + tparams['R_b'] h_const = theano.gradient.disconnected_grad(h) R_idb = tensor.nnet.sigmoid(tensor.dot(h_const.mean(0), tparams['R_W']) + tparams['R_b']) R_ = (R[:,None] - R_mean_ - R_idb) / tensor.maximum(1., R_std_) else: R_ = (R[:,None] - R_mean_) / tensor.maximum(1., R_std_) R_ = R_[:,0] consider_constant += [R_] cost_sup = (cost_p + cost_ent) * mask_ cost_sup = cost_sup[:mixer].sum(0).mean() if prm.clip > 0: # Clipping l_prob so -log does not become too large. log_or_lin = (-tensor.log(l_prob + off) < prm.clip).astype('float32') log_or_lin = theano.gradient.disconnected_grad(log_or_lin) cost_pre = log_or_lin * -tensor.log(l_prob + off) + (1. - log_or_lin) * (1 - l_prob / tensor.exp(-prm.clip)) else: cost_pre = -tensor.log(l_prob + off) cost_RL = (R_ * cost_pre + cost_ent) * mask_ cost_RL = cost_RL[mixer:].sum(0).mean() cost = cost_sup + cost_RL if prm.idb: R0 = R[:,None] - R_mean_ R0 = theano.gradient.disconnected_grad(R0) #cost += 0.01 * ((R_idb - R0) ** 2).mean() cost += ((R0 - R_idb) ** 2).mean() else: raise ValueError('Not a valid value for the learning parameter.' + \ ' Valid options are: "supervised", "reinforce", and "q-learning".') if prm.weight_decay > 0.: for name, w in tparams.items(): #do not include bias. if not name.lower().startswith('b'): cost += prm.weight_decay * (w**2).sum() # replay memory. l_idx = l_idx.T dist = dist.dimshuffle((1,0,2)) iin = [q, q_m, root_pages, acts_p, uidx, rs_q, rs_q_m, rl_idx, rt, rR] out = [cost, R, l_idx, pages_idx, best_doc, best_answer, mask, dist] if prm.learning.lower() == 'q-learning': scan_updates = scan_updates_a + scan_updates_b if prm.update_freq > 1: scan_updates += scan_updates_c else: scan_updates = scan_updates_a updates = scan_updates + baseline_updates return iin, out, updates, is_train, sup, max_hops, k_beam, mixer, f_pred, consider_constant def get_root_pages(actions): root_pages = np.zeros((len(actions)), dtype=np.float32) for t, action in enumerate(actions): root_pages[t] = action[0] return root_pages def get_acts(actions, max_hops): # Get correct actions (supervision signal) acts_p = -np.ones((max_hops+1, len(actions)), dtype=np.int32) for t, action in enumerate(actions): for kj, title_id in enumerate(action[1:]): acts_p[kj, t] = title_id return acts_p def pred_error(f_pred, queries, actions, candidates, options, iterator, verbose=False): """ Compute the error and document recall. f_pred: Theano function computing the prediction """ n = 0. ns = 0. valid_R = 0. recall1 = 0. recall = 0. # document recall for the last page before the stop action. recall_all = 0. # document recall for all pages visited. uidx = -1 i = 0 for _, valid_index in iterator: q_i, q_m = utils.text2idx2([queries[t].lower() for t in valid_index], options['vocab'], prm.max_words_query*prm.n_consec) acts = [actions[t] for t in valid_index] cands = [candidates[t] for t in valid_index] #dummy acts that won't be used in the prediction acts_p = -np.ones((prm.max_hops_pred+1, len(q_i) * prm.k), dtype=np.int32) root_pages = get_root_pages([act[0] for act in acts]) best_doc, best_answer, R, pages_idx, selected_docs, js, _, _, _ = f_pred(q_i, q_m, root_pages, acts_p, uidx) R_binary = np.ones_like(R) R_binary[R<1.0] = 0.0 n += len(valid_index) valid_R += R.sum() all_docs = pages_idx.T.reshape((len(valid_index), (prm.max_hops_pred + 1) * prm.k)) for j in range(len(valid_index)): # get correct path. acts_p = get_acts(acts[j], prm.max_hops_pred) ns += len(acts[j]) # Compute the document recall. jc = np.minimum(np.maximum((acts_p != -1.0).astype('int32').sum(0) - 1, 0), prm.max_hops_pred) correct_docs = acts_p[jc, np.arange(acts_p.shape[1])] for correct_doc in correct_docs: # Doc recall for all pages visited recall_all += (correct_doc == all_docs[j]).any().astype('int32').sum() # doc recall for pages before stop action match = (correct_doc == selected_docs[j]).any() recall += match.astype('int32').sum() recall1 += (correct_doc == best_doc[j]).astype('int32').sum() if j == 0 and (i % prm.dispFreq == 0): print '\nQuery: ' + queries[valid_index[j]].replace('\n',' ') print 'Best document: ' + options['wiki'].get_article_title(best_doc[j]) print 'Best answer: ' + utils.idx2text(best_answer[j], options['vocabinv']) print 'Supervised Path:', for page_idx in acts_p[:-1,0]: if page_idx != -1: print '->', options['wiki'].get_article_title(page_idx), print '-> Stop' print 'Actual Path: ', for page_idx in pages_idx[:-1,0]: if page_idx != -1: print '->', options['wiki'].get_article_title(page_idx), print '-> Stop' i += 1 uidx -= 1 valid_R = valid_R / n recall1 = recall1 / n recall = recall / ns recall_all = recall_all / ns return valid_R, recall1, recall, recall_all def train(): optimizer=adam # only adam is supported by now. options = locals().copy() print 'parameters:', str(options) prm_k = vars(prm).keys() prm_d = vars(prm) prm_k.sort() for x in prm_k: if not x.startswith('__'): print x,'=', prm_d[x] print 'loading dictionary...' vocab = utils.load_vocab(prm.vocab_path, prm.n_words) options['vocab'] = vocab options['vocabinv'] = {} for k,v in vocab.items(): options['vocabinv'][v] = k print 'Loading Environment...' options['wiki'] = wiki.Wiki(prm.pages_path) if prm.compute_emb: import wiki_idx options['wikipre'] = wiki_idx.WikiIdx(prm.pages_idx_path) else: import wiki_emb options['wikipre'] = wiki_emb.WikiEmb(prm.pages_emb_path) print 'Loading Dataset...' qpp = qp.QP(prm.qp_path) q_train, q_valid, q_test = qpp.get_queries() a_train, a_valid, a_test = qpp.get_paths() c_train, c_valid, c_test = qpp.get_candidates() # get candidates obtained by the search engine if prm.aug>1: dic_thes = utils.load_synonyms() q_train = utils.augment(q_train, dic_thes) a_train = list(itertools.chain.from_iterable(itertools.repeat(x, prm.aug) for x in a_train)) c_train = list(itertools.chain.from_iterable(itertools.repeat(x, prm.aug) for x in c_train)) # This create the initial parameters as np ndarrays. # Dict name (string) -> np ndarray params, exclude_params = init_params() if prm.wordemb_path: print 'loading pre-trained word embeddings' params = load_wemb(params, vocab) options['W'] = params['W'] if prm.reload_model: load_params(prm.reload_model, params) params_next = OrderedDict() if prm.learning.lower() == 'q-learning': if prm.update_freq > 1: # copy params to params_next for kk, kv in params.items(): params_next[kk] = kv.copy() if prm.reload_mem: mem, mem_r = pkl.load(open(prm.reload_mem, 'rb')) else: mem = deque(maxlen=prm.replay_mem_size) # replay memory as circular buffer. mem_r = deque(maxlen=prm.replay_mem_size) # reward of each entry in the replay memory. print 'Building model' # This create Theano Shared Variable from the parameters. # Dict name (string) -> Theano Tensor Shared Variable # params and tparams have different copy of the weights. tparams = init_tparams(params) if prm.update_freq > 1: tparams_next = init_tparams(params_next) else: tparams_next = None baseline_vars = {} if prm.learning.lower() == 'reinforce': if prm.mov_avg: R_mean = theano.shared(0.71*np.ones((1,)), name='R_mean') R_std = theano.shared(np.ones((1,)), name='R_std') baseline_vars = {'R_mean': R_mean, 'R_std': R_std} iin, out, updates, is_train, sup, max_hops, k_beam, mixer, f_pred, consider_constant \ = build_model(tparams, tparams_next, baseline_vars, options) #get only parameters that are not in the exclude_params list tparams_ = OrderedDict([(kk, vv) for kk, vv in tparams.iteritems() if kk not in exclude_params]) total_prm = 0 learn_prm = 0 for name, arr in params.items(): if name not in exclude_params: learn_prm += arr.size total_prm += arr.size print 'Number of Parameters :', total_prm print 'Number of Learnable Parameters:', learn_prm grads = tensor.grad(out[0], wrt=itemlist(tparams_), consider_constant=consider_constant) lr = tensor.scalar(name='lr') f_grad_shared, f_update = optimizer(lr, tparams_, grads, iin, out, updates) print 'Optimization' if prm.train_size == -1: train_size = len(q_train) else: train_size = min(prm.train_size, len(q_train)) if prm.valid_size == -1: valid_size = len(q_valid) else: valid_size = min(prm.valid_size, len(q_valid)) if prm.test_size == -1: test_size = len(q_test) else: test_size = min(prm.test_size, len(q_test)) print '%d train examples' % len(q_train) print '%d valid examples' % len(q_valid) print '%d test examples' % len(q_test) history_errs = [] best_p = None if prm.validFreq == -1: validFreq = len(q_train) / prm.batch_size_train else: validFreq = prm.validFreq if prm.saveFreq == -1: saveFreq = len(q_train) / prm.batch_size_train else: saveFreq = prm.saveFreq uidx = 0 # the number of update done estop = False # early stop start_time = time.time() try: for eidx in xrange(prm.max_epochs): n_samples = 0 # Get new shuffled index for the training set. kf = get_minibatches_idx(len(q_train), prm.batch_size_train, shuffle=True) for _, train_index in kf: st = time.time() uidx += 1 is_train.set_value(1.) max_hops.set_value(prm.max_hops_train) # select training dataset k_beam.set_value(1) # Training does not use beam search # Select the random examples for this minibatch queries = [q_train[t].lower() for t in train_index] # randomly select a path of each training example actions = [] for t in train_index: a = a_train[t] actions.append(a[random.randint(0,len(a)-1)]) if prm.learning.lower() == 'supervised': sup.set_value(1.) # select supervised mode else: sup.set_value(0.) # Get correct actions (supervision signal) acts_p = get_acts(actions, prm.max_hops_train) # MIXER if prm.mixer > 0 and prm.learning.lower() == 'reinforce': mixer.set_value(max(0, prm.max_hops_train - uidx // prm.mixer)) else: if prm.learning.lower() == 'supervised': mixer.set_value(prm.max_hops_train+1) else: mixer.set_value(0) root_pages = get_root_pages(actions) # Get the BoW for the queries. q_i, q_m = utils.text2idx2(queries, vocab, prm.max_words_query*prm.n_consec) n_samples += len(queries) if uidx > 1 and prm.learning.lower() == 'q-learning': # Randomly select memories and convert them to numpy arrays. idxs = np.random.choice(np.arange(len(mem)), size=len(queries)) rvs = [] for j in range(len(mem[idxs[0]])): rv = [] for idx in idxs: rv.append(mem[idx][j]) rvs.append(np.asarray(rv)) else: rvs = [np.zeros((len(queries),prm.max_words_query*prm.n_consec),dtype=np.float32), # rs_q np.zeros((len(queries),prm.max_words_query*prm.n_consec),dtype=np.float32), # rs_q_m np.zeros((len(queries),prm.max_hops_train+1),dtype=np.int32), # rl_idx np.zeros((len(queries),prm.max_hops_train+1),dtype=np.float32), # rt np.zeros((len(queries),prm.max_hops_train+1),dtype=np.float32) # rr ] cost, R, l_idx, pages_idx, best_doc, best_answer, mask, dist \ = f_grad_shared(q_i, q_m, root_pages, acts_p, uidx, *rvs) f_update(prm.lrate) if prm.learning.lower() == 'q-learning': # update weights of the next_q_val network. if prm.update_freq > 1 and ((uidx % prm.update_freq == 0) or (uidx == prm.replay_start)): for tk, tv in tparams.items(): if tk in tparams_next: tparams_next[tk].set_value(tv.get_value().copy()) # Only update memory after freeze_mem or before replay_start. if uidx < prm.replay_start or uidx > prm.freeze_mem: # Update Replay Memory. t = np.zeros((len(queries), prm.max_hops_train+1)) rR = np.zeros((len(queries), prm.max_hops_train+1)) pr = float(np.asarray(mem_r).sum()) / max(1., float(len(mem_r))) for i in range(len(queries)): j = np.minimum(mask[:,i].sum(), prm.max_hops_train) # If the agent chooses to stop or the episode ends, # the reward will be the reward obtained with the chosen document. rR[i,j] = R[i] t[i,j] = 1. add = True if prm.prioritized_sweeping >= 0 and uidx > 1: # Prioritized_sweeping: keep the percentage of memories # with reward=1 approximately equal to <prioritized_sweeping>. if ((pr < prm.prioritized_sweeping - 0.05) and (rR[i,j] == 0.)) or ((pr > prm.prioritized_sweeping + 0.05) and (rR[i,j] == 1.)): add = False if add: mem.append([q_i[i], q_m[i], l_idx[i], t[i], rR[i]]) mem_r.append(rR[i]) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' return 1., 1., 1. #if uidx % 100 == 0: # vis_att(pages_idx[:,-1], queries[-1], alpha[:,-1,:], uidx, options) if np.mod(uidx, prm.dispFreq) == 0: print "\nQuery: " + queries[0].replace("\n"," ") print 'Supervised Path:', for i, page_idx in enumerate(acts_p[:-1,0]): if page_idx != -1: print '->', options['wiki'].get_article_title(page_idx), print '-> Stop' print 'Actual Path: ', for i, page_idx in enumerate(pages_idx[:-1,0]): if page_idx != -1: print '->', options['wiki'].get_article_title(page_idx), print '-> Stop' print 'Best Document: ' + options['wiki'].get_article_title(best_doc[0]) print 'Best Answer: ' + utils.idx2text(best_answer[0], options['vocabinv']) print('Epoch '+ str(eidx) + ' Update '+ str(uidx) + ' Cost ' + str(cost) + \ ' Reward Mean ' + str(R.mean()) + ' Reward Max ' + str(R.max()) + \ ' Reward Min ' + str(R.min()) + \ ' Q-Value Max (avg per sample) ' + str(dist.max(2).mean()) + \ ' Q-Value Mean ' + str(dist.mean())) if prm.learning.lower() == 'q-learning': pr = float(np.asarray(mem_r).sum()) / max(1., float(len(mem_r))) print 'memory replay size:', len(mem), ' positive reward:', pr print 'Time per Minibatch Update: ' + str(time.time() - st) if np.mod(uidx, validFreq) == 0 or uidx == 1: kf_train = get_minibatches_idx(len(q_train), prm.batch_size_pred, shuffle=True, max_samples=train_size) kf_valid = get_minibatches_idx(len(q_valid), prm.batch_size_pred, shuffle=True, max_samples=valid_size) kf_test = get_minibatches_idx(len(q_test), prm.batch_size_pred, shuffle=True, max_samples=test_size) is_train.set_value(0.) sup.set_value(0.) # supervised mode off mixer.set_value(0) # no supervision max_hops.set_value(prm.max_hops_pred) k_beam.set_value(prm.k) print '\nEvaluating Training Set' train_R, train_recall1, train_recall, train_recall_all, \ = pred_error(f_pred, q_train, a_train, c_train, options, kf_train) print '\nEvaluating Validation Set' valid_R, valid_recall1, valid_recall, valid_recall_all, \ = pred_error(f_pred, q_valid, a_valid, c_valid, options, kf_valid) print '\nEvaluating Test Set' test_R, test_recall1, test_recall, test_recall_all, \ = pred_error(f_pred, q_test, a_test, c_test, options, kf_test) history_errs.append([valid_recall, test_recall]) if (uidx == 0 or valid_recall >= np.array(history_errs)[:,0].min()): best_p = unzip(tparams) bad_counter = 0 print 'Reward Train', train_R, ' Valid', valid_R, ' Test', test_R print 'Recall@1 Train ' + str(train_recall1), ' Valid', valid_recall1, ' Test',test_recall1 print 'Recall@' + str(prm.k), ' Train', train_recall, ' Valid', valid_recall, ' Test',test_recall print 'Recall@' + str(prm.max_hops_pred * prm.k), ' Train', train_recall_all, ' Valid', valid_recall_all, ' Test', test_recall_all if (len(history_errs) > prm.patience and valid_recall <= np.array(history_errs)[:-prm.patience, 0].min()): bad_counter += 1 if bad_counter > prm.patience: print 'Early Stop!' estop = True break if prm.saveto and np.mod(uidx, saveFreq) == 0: print 'Saving...', if best_p is not None: params = best_p else: params = unzip(tparams) np.savez(prm.saveto, history_errs=history_errs, **params) #pkl.dump(options, open('%s.pkl' % prm.saveto, 'wb'), -1) print 'Done' if prm.learning.lower() == 'q-learning': if prm.saveto_mem and np.mod(uidx, saveFreq) == 0: pkl.dump([mem, mem_r], open(prm.saveto_mem, 'wb'), -1) print 'Seen %d samples' % n_samples if estop: break except KeyboardInterrupt: print "Training interupted" return if __name__ == '__main__': # See parameters.py for all possible parameter and their definitions. train()