Python torch.nn.RNN Examples
The following are 30
code examples of torch.nn.RNN().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.nn
, or try the search function
.
Example #1
Source File: seq_rnn.py From espnet with Apache License 2.0 | 6 votes |
def add_arguments(parser): """Add arguments to command line argument parser.""" parser.add_argument( "--type", type=str, default="lstm", nargs="?", choices=["lstm", "gru"], help="Which type of RNN to use", ) parser.add_argument( "--layer", "-l", type=int, default=2, help="Number of hidden layers" ) parser.add_argument( "--unit", "-u", type=int, default=650, help="Number of hidden units" ) parser.add_argument( "--dropout-rate", type=float, default=0.5, help="dropout probability" ) return parser
Example #2
Source File: neural_networks.py From pase with MIT License | 6 votes |
def __init__(self, options,inp_dim): super(RNN_cudnn, self).__init__() self.input_dim=inp_dim self.hidden_size=int(options['hidden_size']) self.num_layers=int(options['num_layers']) self.nonlinearity=options['nonlinearity'] self.bias=bool(strtobool(options['bias'])) self.batch_first=bool(strtobool(options['batch_first'])) self.dropout=float(options['dropout']) self.bidirectional=bool(strtobool(options['bidirectional'])) self.rnn = nn.ModuleList([nn.RNN(self.input_dim, self.hidden_size, self.num_layers, nonlinearity=self.nonlinearity,bias=self.bias,dropout=self.dropout,bidirectional=self.bidirectional)]) self.out_dim=self.hidden_size+self.bidirectional*self.hidden_size
Example #3
Source File: neural_networks.py From pase with MIT License | 6 votes |
def __init__(self, options,inp_dim): super(RNN_cudnn, self).__init__() self.input_dim=inp_dim self.hidden_size=int(options['hidden_size']) self.num_layers=int(options['num_layers']) self.nonlinearity=options['nonlinearity'] self.bias=bool(strtobool(options['bias'])) self.batch_first=bool(strtobool(options['batch_first'])) self.dropout=float(options['dropout']) self.bidirectional=bool(strtobool(options['bidirectional'])) self.rnn = nn.ModuleList([nn.RNN(self.input_dim, self.hidden_size, self.num_layers, nonlinearity=self.nonlinearity,bias=self.bias,dropout=self.dropout,bidirectional=self.bidirectional)]) self.out_dim=self.hidden_size+self.bidirectional*self.hidden_size
Example #4
Source File: rnn.py From OpenNRE with MIT License | 6 votes |
def __init__(self, input_size=50, hidden_size=256, dropout=0, bidirectional=False, num_layers=1, activation_function="tanh"): """ Args: input_size: dimention of input embedding hidden_size: hidden size dropout: dropout layer on the outputs of each RNN layer except the last layer bidirectional: if it is a bidirectional RNN num_layers: number of recurrent layers activation_function: the activation function of RNN, tanh/relu """ super().__init__() if bidirectional: hidden_size /= 2 self.rnn = nn.RNN(input_size, hidden_size, num_layers, nonlinearity=activation_function, dropout=dropout, bidirectional=bidirectional)
Example #5
Source File: esim.py From video_captioning_rl with MIT License | 6 votes |
def auto_rnn(rnn: nn.RNN, seqs, lengths, batch_first=True, init_state=None, output_last_states=False): batch_size = seqs.size(0) if batch_first else seqs.size(1) state_shape = get_state_shape(rnn, batch_size, rnn.bidirectional) if not init_state: h0 = c0 = Variable(seqs.data.new(*state_shape).zero_()) else: h0 = init_state['h0'].expand(state_shape) c0 = init_state['c0'].expand(state_shape) packed_pinputs, r_index = pack_for_rnn_seq(seqs, lengths, batch_first) output, (hn, cn) = rnn(packed_pinputs, (h0, c0)) output = unpack_from_rnn_seq(output, r_index, batch_first) if not output_last_states: return output else: return output, (hn, cn)
Example #6
Source File: recurrent.py From Character-Level-Language-Modeling-with-Deeper-Self-Attention-pytorch with MIT License | 6 votes |
def forward(self, inputs, hidden): def select_layer(h_state, i): # To work on both LSTM / GRU, RNN if isinstance(h_state, tuple): return tuple([select_layer(s, i) for s in h_state]) else: return h_state[i] next_hidden = [] for i, layer in enumerate(self.layers): next_hidden_i = layer(inputs, select_layer(hidden, i)) output = next_hidden_i[0] if isinstance(next_hidden_i, tuple) \ else next_hidden_i if i + 1 < self.num_layers: output = self.dropout(output) if self.residual and inputs.size(-1) == output.size(-1): inputs = output + inputs else: inputs = output next_hidden.append(next_hidden_i) if isinstance(hidden, tuple): next_hidden = tuple([torch.stack(h) for h in zip(*next_hidden)]) else: next_hidden = torch.stack(next_hidden) return inputs, next_hidden
Example #7
Source File: esim.py From video_captioning_rl with MIT License | 6 votes |
def auto_rnn(self, rnn: nn.RNN, seqs, lengths, batch_first=True, init_state=None, output_last_states=False): batch_size = seqs.size(0) if batch_first else seqs.size(1) state_shape = get_state_shape(rnn, batch_size, rnn.bidirectional) if not init_state: h0 = c0 = Variable(seqs.data.new(*state_shape).zero_()) else: h0 = init_state['h0'].expand(state_shape) c0 = init_state['c0'].expand(state_shape) packed_pinputs, r_index = pack_for_rnn_seq(seqs, lengths, batch_first) if self.args.cell_type == 'gru': output, hn = rnn(packed_pinputs, h0) else: output, (hn, cn) = rnn(packed_pinputs, (h0, c0)) output = unpack_from_rnn_seq(output, r_index, batch_first) if not output_last_states: return output else: return output, (hn, cn)
Example #8
Source File: neural_networks.py From pase with MIT License | 6 votes |
def __init__(self, options,inp_dim): super(RNN_cudnn, self).__init__() self.input_dim=inp_dim self.hidden_size=int(options['hidden_size']) self.num_layers=int(options['num_layers']) self.nonlinearity=options['nonlinearity'] self.bias=bool(strtobool(options['bias'])) self.batch_first=bool(strtobool(options['batch_first'])) self.dropout=float(options['dropout']) self.bidirectional=bool(strtobool(options['bidirectional'])) self.rnn = nn.ModuleList([nn.RNN(self.input_dim, self.hidden_size, self.num_layers, nonlinearity=self.nonlinearity,bias=self.bias,dropout=self.dropout,bidirectional=self.bidirectional)]) self.out_dim=self.hidden_size+self.bidirectional*self.hidden_size
Example #9
Source File: neural_networks.py From pase with MIT License | 6 votes |
def __init__(self, options,inp_dim): super(RNN_cudnn, self).__init__() self.input_dim=inp_dim self.hidden_size=int(options['hidden_size']) self.num_layers=int(options['num_layers']) self.nonlinearity=options['nonlinearity'] self.bias=bool(strtobool(options['bias'])) self.batch_first=bool(strtobool(options['batch_first'])) self.dropout=float(options['dropout']) self.bidirectional=bool(strtobool(options['bidirectional'])) self.rnn = nn.ModuleList([nn.RNN(self.input_dim, self.hidden_size, self.num_layers, nonlinearity=self.nonlinearity,bias=self.bias,dropout=self.dropout,bidirectional=self.bidirectional)]) self.out_dim=self.hidden_size+self.bidirectional*self.hidden_size
Example #10
Source File: ldnet.py From RAdam with Apache License 2.0 | 6 votes |
def __init__(self, unit, input_dim, increase_rate, droprate, layer_drop = 0): super(BasicUnit, self).__init__() rnnunit_map = {'rnn': nn.RNN, 'lstm': nn.LSTM, 'gru': nn.GRU} self.unit = unit self.layer = rnnunit_map[unit](input_dim, increase_rate, 1) if 'lstm' == self.unit: utils.init_lstm(self.layer) self.layer_drop = layer_drop self.droprate = droprate self.input_dim = input_dim self.increase_rate = increase_rate self.output_dim = input_dim + increase_rate self.init_hidden()
Example #11
Source File: densenet.py From RAdam with Apache License 2.0 | 6 votes |
def __init__(self, unit, input_dim, increase_rate, droprate): super(BasicUnit, self).__init__() rnnunit_map = {'rnn': nn.RNN, 'lstm': nn.LSTM, 'gru': nn.GRU} self.unit = unit self.layer = rnnunit_map[unit](input_dim, increase_rate, 1) if 'lstm' == self.unit: utils.init_lstm(self.layer) self.droprate = droprate self.input_dim = input_dim self.increase_rate = increase_rate self.output_dim = input_dim + increase_rate self.init_hidden()
Example #12
Source File: ddnet.py From RAdam with Apache License 2.0 | 6 votes |
def __init__(self, unit, unit_number, emb_dim, hid_dim, droprate): super(BasicUnit, self).__init__() rnnunit_map = {'rnn': nn.RNN, 'lstm': nn.LSTM, 'gru': nn.GRU, 'bnlstm': BNLSTM} self.batch_norm = (unit == 'bnlstm') self.unit_number = unit_number # self.unit_weight = nn.Parameter(torch.FloatTensor([1] * unit_number)) self.unit_list = nn.ModuleList() self.unit_list.append(rnnunit_map[unit](emb_dim, hid_dim, 1)) if unit_number > 1: self.unit_list.extend([rnnunit_map[unit](hid_dim, hid_dim, 1) for ind in range(unit_number - 1)]) self.droprate = droprate self.output_dim = emb_dim + hid_dim * unit_number self.init_hidden()
Example #13
Source File: seqlabel.py From Vanilla_NER with Apache License 2.0 | 6 votes |
def __init__(self, c_num, c_dim, c_hidden, c_layer, w_num, w_dim, w_hidden, w_layer, y_num, droprate, unit='lstm'): super(Vanilla_SeqLabel, self).__init__() rnnunit_map = {'rnn': nn.RNN, 'lstm': nn.LSTM, 'gru': nn.GRU} self.char_embed = nn.Embedding(c_num, c_dim) self.word_embed = nn.Embedding(w_num, w_dim) self.char_seq = nn.Linear(c_hidden * 2, w_dim) self.c_hidden = c_hidden self.unit_type = unit tmp_rnn_dropout = droprate if c_layer > 1 else 0 self.char_fw = rnnunit_map[unit](c_dim, c_hidden, c_layer, dropout = tmp_rnn_dropout) self.char_bw = rnnunit_map[unit](c_dim, c_hidden, c_layer, dropout = tmp_rnn_dropout) tmp_rnn_dropout = droprate if w_layer > 1 else 0 self.word_rnn = rnnunit_map[unit](w_dim * 2, w_hidden // 2, w_layer, dropout = tmp_rnn_dropout, bidirectional = True) self.y_num = y_num self.crf = CRF(w_hidden, y_num) self.drop = nn.Dropout(p = droprate)
Example #14
Source File: torch_util.py From combine-FEVER-NSMN with MIT License | 6 votes |
def get_state_shape(rnn: nn.RNN, batch_size, bidirectional=False): """ Return the state shape of a given RNN. This is helpful when you want to create a init state for RNN. Example: c0 = h0 = Variable(src_seq_p.data.new(*get_state_shape([your rnn], 3, bidirectional)).zero_()) :param rnn: nn.LSTM, nn.GRU or subclass of nn.RNN :param batch_size: :param bidirectional: :return: """ if bidirectional: return rnn.num_layers * 2, batch_size, rnn.hidden_size else: return rnn.num_layers, batch_size, rnn.hidden_size
Example #15
Source File: torch_util.py From combine-FEVER-NSMN with MIT License | 6 votes |
def auto_rnn(rnn: nn.RNN, seqs, lengths, batch_first=True, init_state=None, output_last_states=False): batch_size = seqs.size(0) if batch_first else seqs.size(1) state_shape = get_state_shape(rnn, batch_size, rnn.bidirectional) # if init_state is None: # h0 = c0 = Variable(seqs.data.new(*state_shape).zero_()) # else: # h0 = init_state[0] # rnn.num_layers, batch_size, rnn.hidden_size # c0 = init_state[1] packed_pinputs, r_index, init_state = pack_for_rnn_seq(seqs, lengths, batch_first, init_state) if len(init_state) == 0: h0 = c0 = Variable(seqs.data.new(*state_shape).zero_()) init_state = (h0, c0) output, last_state = rnn(packed_pinputs, init_state) output = unpack_from_rnn_seq(output, r_index, batch_first) if not output_last_states: return output else: last_state = reverse_indice_for_state(last_state, r_index) return output, last_state
Example #16
Source File: model.py From DeepMove with GNU General Public License v2.0 | 6 votes |
def __init__(self, parameters): super(TrajPreSimple, self).__init__() self.loc_size = parameters.loc_size self.loc_emb_size = parameters.loc_emb_size self.tim_size = parameters.tim_size self.tim_emb_size = parameters.tim_emb_size self.hidden_size = parameters.hidden_size self.use_cuda = parameters.use_cuda self.rnn_type = parameters.rnn_type self.emb_loc = nn.Embedding(self.loc_size, self.loc_emb_size) self.emb_tim = nn.Embedding(self.tim_size, self.tim_emb_size) input_size = self.loc_emb_size + self.tim_emb_size if self.rnn_type == 'GRU': self.rnn = nn.GRU(input_size, self.hidden_size, 1) elif self.rnn_type == 'LSTM': self.rnn = nn.LSTM(input_size, self.hidden_size, 1) elif self.rnn_type == 'RNN': self.rnn = nn.RNN(input_size, self.hidden_size, 1) self.init_weights() self.fc = nn.Linear(self.hidden_size, self.loc_size) self.dropout = nn.Dropout(p=parameters.dropout_p)
Example #17
Source File: drnn.py From pytorch-dilated-rnn with MIT License | 6 votes |
def __init__(self, n_input, n_hidden, n_layers, dropout=0, cell_type='GRU', batch_first=False): super(DRNN, self).__init__() self.dilations = [2 ** i for i in range(n_layers)] self.cell_type = cell_type self.batch_first = batch_first layers = [] if self.cell_type == "GRU": cell = nn.GRU elif self.cell_type == "RNN": cell = nn.RNN elif self.cell_type == "LSTM": cell = nn.LSTM else: raise NotImplementedError for i in range(n_layers): if i == 0: c = cell(n_input, n_hidden, dropout=dropout) else: c = cell(n_hidden, n_hidden, dropout=dropout) layers.append(c) self.cells = nn.Sequential(*layers)
Example #18
Source File: encoder_base_test.py From magnitude with MIT License | 6 votes |
def setUp(self): super(TestEncoderBase, self).setUp() self.lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) self.rnn = RNN(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) self.encoder_base = _EncoderBase(stateful=True) tensor = torch.rand([5, 7, 3]) tensor[1, 6:, :] = 0 tensor[3, 2:, :] = 0 self.tensor = tensor mask = torch.ones(5, 7) mask[1, 6:] = 0 mask[2, :] = 0 # <= completely masked mask[3, 2:] = 0 mask[4, :] = 0 # <= completely masked self.mask = mask self.batch_size = 5 self.num_valid = 3 sequence_lengths = get_lengths_from_binary_sequence_mask(mask) _, _, restoration_indices, sorting_indices = sort_batch_by_length(tensor, sequence_lengths) self.sorting_indices = sorting_indices self.restoration_indices = restoration_indices
Example #19
Source File: encoder_base_test.py From magnitude with MIT License | 6 votes |
def test_non_contiguous_input_states_handled(self): # Check that the encoder is robust to non-contiguous input states. # A transposition will make the tensors non-contiguous, start them off at the wrong shape # and transpose them into the right shape. encoder_base = _EncoderBase(stateful=False) initial_states = (torch.randn(5, 6, 7).permute(1, 0, 2), torch.randn(5, 6, 7).permute(1, 0, 2)) assert not initial_states[0].is_contiguous() and not initial_states[1].is_contiguous() assert initial_states[0].size() == torch.Size([6, 5, 7]) assert initial_states[1].size() == torch.Size([6, 5, 7]) # We'll pass them through an LSTM encoder and a vanilla RNN encoder to make sure it works # whether the initial states are a tuple of tensors or just a single tensor. encoder_base.sort_and_run_forward(self.lstm, self.tensor, self.mask, initial_states) encoder_base.sort_and_run_forward(self.rnn, self.tensor, self.mask, initial_states[0])
Example #20
Source File: deepspeech2.py From inference with Apache License 2.0 | 6 votes |
def __init__(self, input_size, hidden_size, rnn_type=nn.LSTM, bidirectional=False, batch_norm=True, bias=False): """Bias-free RNN wrapper with optional batch norm and bidir summation. Instantiates an RNN without bias parameters. Optionally applies a batch normalisation layer to the input with the statistics computed over all time steps. If the RNN is bidirectional, the output from the forward and backward units is summed before return. """ super().__init__() if batch_norm: self.batch_norm = OverLastDim(nn.BatchNorm1d(input_size)) self.bidirectional = bidirectional self.rnn = rnn_type(input_size=input_size, hidden_size=hidden_size, bidirectional=bidirectional, bias=bias)
Example #21
Source File: model.py From multiwoz with MIT License | 6 votes |
def whatCellType(input_size, hidden_size, cell_type, dropout_rate): if cell_type == 'rnn': cell = nn.RNN(input_size, hidden_size, dropout=dropout_rate, batch_first=False) init_gru(cell) return cell elif cell_type == 'gru': cell = nn.GRU(input_size, hidden_size, dropout=dropout_rate, batch_first=False) init_gru(cell) return cell elif cell_type == 'lstm': cell = nn.LSTM(input_size, hidden_size, dropout=dropout_rate, batch_first=False) init_lstm(cell) return cell elif cell_type == 'bigru': cell = nn.GRU(input_size, hidden_size, bidirectional=True, dropout=dropout_rate, batch_first=False) init_gru(cell) return cell elif cell_type == 'bilstm': cell = nn.LSTM(input_size, hidden_size, bidirectional=True, dropout=dropout_rate, batch_first=False) init_lstm(cell) return cell
Example #22
Source File: model.py From ConvLab with MIT License | 6 votes |
def whatCellType(input_size, hidden_size, cell_type, dropout_rate): if cell_type == 'rnn': cell = nn.RNN(input_size, hidden_size, dropout=dropout_rate, batch_first=False) init_gru(cell) return cell elif cell_type == 'gru': cell = nn.GRU(input_size, hidden_size, dropout=dropout_rate, batch_first=False) init_gru(cell) return cell elif cell_type == 'lstm': cell = nn.LSTM(input_size, hidden_size, dropout=dropout_rate, batch_first=False) init_lstm(cell) return cell elif cell_type == 'bigru': cell = nn.GRU(input_size, hidden_size, bidirectional=True, dropout=dropout_rate, batch_first=False) init_gru(cell) return cell elif cell_type == 'bilstm': cell = nn.LSTM(input_size, hidden_size, bidirectional=True, dropout=dropout_rate, batch_first=False) init_lstm(cell) return cell
Example #23
Source File: DRNN.py From ESRNN-GPU with MIT License | 6 votes |
def __init__(self, n_input, n_hidden, n_layers, dilations, dropout=0, cell_type='GRU', batch_first=False): super(DRNN, self).__init__() self.dilations = dilations self.cell_type = cell_type self.batch_first = batch_first layers = [] if self.cell_type == "GRU": cell = nn.GRU elif self.cell_type == "RNN": cell = nn.RNN elif self.cell_type == "LSTM": cell = nn.LSTM else: raise NotImplementedError for i in range(n_layers): if i == 0: c = cell(n_input, n_hidden, dropout=dropout) else: c = cell(n_hidden, n_hidden, dropout=dropout) layers.append(c) self.cells = nn.Sequential(*layers)
Example #24
Source File: model.py From DeepMove with GNU General Public License v2.0 | 5 votes |
def __init__(self, parameters): super(TrajPreAttnAvgLongUser, self).__init__() self.loc_size = parameters.loc_size self.loc_emb_size = parameters.loc_emb_size self.tim_size = parameters.tim_size self.tim_emb_size = parameters.tim_emb_size self.uid_size = parameters.uid_size self.uid_emb_size = parameters.uid_emb_size self.hidden_size = parameters.hidden_size self.attn_type = parameters.attn_type self.rnn_type = parameters.rnn_type self.use_cuda = parameters.use_cuda self.emb_loc = nn.Embedding(self.loc_size, self.loc_emb_size) self.emb_tim = nn.Embedding(self.tim_size, self.tim_emb_size) self.emb_uid = nn.Embedding(self.uid_size, self.uid_emb_size) input_size = self.loc_emb_size + self.tim_emb_size self.attn = Attn(self.attn_type, self.hidden_size) self.fc_attn = nn.Linear(input_size, self.hidden_size) if self.rnn_type == 'GRU': self.rnn = nn.GRU(input_size, self.hidden_size, 1) elif self.rnn_type == 'LSTM': self.rnn = nn.LSTM(input_size, self.hidden_size, 1) elif self.rnn_type == 'RNN': self.rnn = nn.RNN(input_size, self.hidden_size, 1) self.fc_final = nn.Linear(2 * self.hidden_size + self.uid_emb_size, self.loc_size) self.dropout = nn.Dropout(p=parameters.dropout_p) self.init_weights()
Example #25
Source File: model.py From DeepMove with GNU General Public License v2.0 | 5 votes |
def forward(self, loc, tim): h1 = Variable(torch.zeros(1, 1, self.hidden_size)) c1 = Variable(torch.zeros(1, 1, self.hidden_size)) if self.use_cuda: h1 = h1.cuda() c1 = c1.cuda() loc_emb = self.emb_loc(loc) tim_emb = self.emb_tim(tim) x = torch.cat((loc_emb, tim_emb), 2) x = self.dropout(x) if self.rnn_type == 'GRU' or self.rnn_type == 'RNN': out, h1 = self.rnn(x, h1) elif self.rnn_type == 'LSTM': out, (h1, c1) = self.rnn(x, (h1, c1)) out = out.squeeze(1) out = F.selu(out) out = self.dropout(out) y = self.fc(out) score = F.log_softmax(y) # calculate loss by NLLoss return score # ############# rnn model with attention ####################### #
Example #26
Source File: modules.py From neural_chat with MIT License | 5 votes |
def forward(self, input): """Compute scores from inputs. :param input: (bsz x seq_len x num_directions * hiddensize) tensor of states, e.g. the output states of an RNN :returns: (bsz x seqlen x num_cands) scores for each candidate """ # next compute scores over dictionary if self.numsoftmax > 1: bsz = input.size(0) seqlen = input.size(1) if input.dim() > 1 else 1 # first compute different softmax scores based on input vec # hsz => numsoftmax * esz latent = self.latent(input) active = self.dropout(self.activation(latent)) # esz => num_features logit = self.e2s(active.view(-1, self.esz)) # calculate priors: distribution over which softmax scores to use # hsz => numsoftmax prior_logit = self.prior(input).view(-1, self.numsoftmax) # softmax over numsoftmax's prior = self.softmax(prior_logit) # now combine priors with logits prob = self.softmax(logit).view(bsz * seqlen, self.numsoftmax, -1) probs = (prob * prior.unsqueeze(2)).sum(1).view(bsz, seqlen, -1) scores = probs.log() else: # hsz => esz, good time for dropout e = self.dropout(self.o2e(input)) # esz => num_features scores = self.e2s(e) if self.padding_idx == 0: pad_score = scores.new(scores.size(0), scores.size(1), 1).fill_(-NEAR_INF) scores = torch.cat([pad_score, scores], dim=-1) return scores
Example #27
Source File: model.py From l2w with GNU General Public License v3.0 | 5 votes |
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, cutoffs, dropout=0.5, tie_weights=False): super(RNNModel, self).__init__() self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) if rnn_type is 'GRU': self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout) else: try: nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type] except KeyError: raise ValueError( """An invalid option for `--model` was supplied, options are ['GRU', 'RNN_TANH' or 'RNN_RELU']""") self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout) self.decoder = nn.Linear(nhid, ntoken) if tie_weights: if nhid != ninp: raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.nhid = nhid self.nlayers = nlayers self.softmax = AdaptiveSoftmax(nhid, cutoffs) self.full = False
Example #28
Source File: modules.py From neural_chat with MIT License | 5 votes |
def forward(self, input): """Compute scores from inputs. :param input: (bsz x seq_len x num_directions * hiddensize) tensor of states, e.g. the output states of an RNN :returns: (bsz x seqlen x num_cands) scores for each candidate """ # next compute scores over dictionary if self.numsoftmax > 1: bsz = input.size(0) seqlen = input.size(1) if input.dim() > 1 else 1 # first compute different softmax scores based on input vec # hsz => numsoftmax * esz latent = self.latent(input) active = self.dropout(self.activation(latent)) # esz => num_features logit = F.linear(active.view(-1, self.esz), self.weight, self.bias) # calculate priors: distribution over which softmax scores to use # hsz => numsoftmax prior_logit = self.prior(input).view(-1, self.numsoftmax) # softmax over numsoftmax's prior = self.softmax(prior_logit) # now combine priors with logits prob = self.softmax(logit).view(bsz * seqlen, self.numsoftmax, -1) probs = (prob * prior.unsqueeze(2)).sum(1).view(bsz, seqlen, -1) scores = probs.log() else: # hsz => esz, good time for dropout e = self.dropout(self.o2e(input)) # esz => num_features scores = F.linear(e, self.weight, self.bias) if self.padding_idx >= 0: scores[:, :, self.padding_idx] = -NEAR_INF return scores
Example #29
Source File: modules.py From neural_chat with MIT License | 5 votes |
def reorder_encoder_states(self, encoder_states, indices): """Reorder encoder states according to a new set of indices.""" enc_out, hidden, attn_mask = encoder_states # make sure we swap the hidden state around, apropos multigpu settings hidden = _transpose_hidden_state(hidden) # LSTM or GRU/RNN hidden state? if isinstance(hidden, torch.Tensor): hid, cell = hidden, None else: hid, cell = hidden if not torch.is_tensor(indices): # cast indices to a tensor if needed indices = torch.LongTensor(indices).to(hid.device) hid = hid.index_select(1, indices) if cell is None: hidden = hid else: cell = cell.index_select(1, indices) hidden = (hid, cell) if self.attn_type != 'none': enc_out = enc_out.index_select(0, indices) attn_mask = attn_mask.index_select(0, indices) # and bring it back to multigpu friendliness hidden = _transpose_hidden_state(hidden) return enc_out, hidden, attn_mask
Example #30
Source File: modules.py From ParlAI with MIT License | 5 votes |
def _align_inds(self, encoder_states, cand_inds): """ Select the encoder states relevant to valid candidates. """ enc_out, hidden, attn_mask = encoder_states # LSTM or GRU/RNN hidden state? if isinstance(hidden, torch.Tensor): hid, cell = hidden, None else: hid, cell = hidden if len(cand_inds) != hid.size(1): # if the number of candidates is mismatched from the number of # hidden states, we throw out the hidden states we won't rank with cand_indices = hid.new(cand_inds) hid = hid.index_select(1, cand_indices) if cell is None: hidden = hid else: cell = cell.index_select(1, cand_indices) hidden = (hid, cell) if self.attn_type != 'none': enc_out = enc_out.index_select(0, cand_indices) attn_mask = attn_mask.index_select(0, cand_indices) return enc_out, hidden, attn_mask