Python torch.nn.RNN Examples

The following are 30 code examples of torch.nn.RNN(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.nn , or try the search function .
Example #1
Source File: seq_rnn.py    From espnet with Apache License 2.0 6 votes vote down vote up
def add_arguments(parser):
        """Add arguments to command line argument parser."""
        parser.add_argument(
            "--type",
            type=str,
            default="lstm",
            nargs="?",
            choices=["lstm", "gru"],
            help="Which type of RNN to use",
        )
        parser.add_argument(
            "--layer", "-l", type=int, default=2, help="Number of hidden layers"
        )
        parser.add_argument(
            "--unit", "-u", type=int, default=650, help="Number of hidden units"
        )
        parser.add_argument(
            "--dropout-rate", type=float, default=0.5, help="dropout probability"
        )
        return parser 
Example #2
Source File: neural_networks.py    From pase with MIT License 6 votes vote down vote up
def __init__(self, options,inp_dim):
        super(RNN_cudnn, self).__init__()
        
        self.input_dim=inp_dim
        self.hidden_size=int(options['hidden_size'])
        self.num_layers=int(options['num_layers'])
        self.nonlinearity=options['nonlinearity']
        self.bias=bool(strtobool(options['bias']))
        self.batch_first=bool(strtobool(options['batch_first']))
        self.dropout=float(options['dropout'])
        self.bidirectional=bool(strtobool(options['bidirectional']))
        
        self.rnn = nn.ModuleList([nn.RNN(self.input_dim, self.hidden_size, self.num_layers, 
                            nonlinearity=self.nonlinearity,bias=self.bias,dropout=self.dropout,bidirectional=self.bidirectional)])
         
        self.out_dim=self.hidden_size+self.bidirectional*self.hidden_size 
Example #3
Source File: neural_networks.py    From pase with MIT License 6 votes vote down vote up
def __init__(self, options,inp_dim):
        super(RNN_cudnn, self).__init__()
        
        self.input_dim=inp_dim
        self.hidden_size=int(options['hidden_size'])
        self.num_layers=int(options['num_layers'])
        self.nonlinearity=options['nonlinearity']
        self.bias=bool(strtobool(options['bias']))
        self.batch_first=bool(strtobool(options['batch_first']))
        self.dropout=float(options['dropout'])
        self.bidirectional=bool(strtobool(options['bidirectional']))
        
        self.rnn = nn.ModuleList([nn.RNN(self.input_dim, self.hidden_size, self.num_layers, 
                            nonlinearity=self.nonlinearity,bias=self.bias,dropout=self.dropout,bidirectional=self.bidirectional)])
         
        self.out_dim=self.hidden_size+self.bidirectional*self.hidden_size 
Example #4
Source File: rnn.py    From OpenNRE with MIT License 6 votes vote down vote up
def __init__(self, input_size=50, hidden_size=256, dropout=0, bidirectional=False, num_layers=1, activation_function="tanh"):
        """
        Args:
            input_size: dimention of input embedding
            hidden_size: hidden size
            dropout: dropout layer on the outputs of each RNN layer except the last layer
            bidirectional: if it is a bidirectional RNN
            num_layers: number of recurrent layers
            activation_function: the activation function of RNN, tanh/relu
        """
        super().__init__()
        if bidirectional:
            hidden_size /= 2
        self.rnn = nn.RNN(input_size, 
                          hidden_size, 
                          num_layers, 
                          nonlinearity=activation_function, 
                          dropout=dropout, 
                          bidirectional=bidirectional) 
Example #5
Source File: esim.py    From video_captioning_rl with MIT License 6 votes vote down vote up
def auto_rnn(rnn: nn.RNN, seqs, lengths, batch_first=True, init_state=None, output_last_states=False):
    batch_size = seqs.size(0) if batch_first else seqs.size(1)
    state_shape = get_state_shape(rnn, batch_size, rnn.bidirectional)

    if not init_state:
        h0 = c0 = Variable(seqs.data.new(*state_shape).zero_())
    else:
        h0 = init_state['h0'].expand(state_shape)
        c0 = init_state['c0'].expand(state_shape)

    packed_pinputs, r_index = pack_for_rnn_seq(seqs, lengths, batch_first)
    output, (hn, cn) = rnn(packed_pinputs, (h0, c0))
    output = unpack_from_rnn_seq(output, r_index, batch_first)

    if not output_last_states:
        return output
    else:
        return output, (hn, cn) 
Example #6
Source File: recurrent.py    From Character-Level-Language-Modeling-with-Deeper-Self-Attention-pytorch with MIT License 6 votes vote down vote up
def forward(self, inputs, hidden):
        def select_layer(h_state, i):  # To work on both LSTM / GRU, RNN
            if isinstance(h_state, tuple):
                return tuple([select_layer(s, i) for s in h_state])
            else:
                return h_state[i]

        next_hidden = []
        for i, layer in enumerate(self.layers):
            next_hidden_i = layer(inputs, select_layer(hidden, i))
            output = next_hidden_i[0] if isinstance(next_hidden_i, tuple) \
                else next_hidden_i
            if i + 1 < self.num_layers:
                output = self.dropout(output)
            if self.residual and inputs.size(-1) == output.size(-1):
                inputs = output + inputs
            else:
                inputs = output
            next_hidden.append(next_hidden_i)
        if isinstance(hidden, tuple):
            next_hidden = tuple([torch.stack(h) for h in zip(*next_hidden)])
        else:
            next_hidden = torch.stack(next_hidden)
        return inputs, next_hidden 
Example #7
Source File: esim.py    From video_captioning_rl with MIT License 6 votes vote down vote up
def auto_rnn(self, rnn: nn.RNN, seqs, lengths, batch_first=True, init_state=None, output_last_states=False):
        batch_size = seqs.size(0) if batch_first else seqs.size(1)
        state_shape = get_state_shape(rnn, batch_size, rnn.bidirectional)

        if not init_state:
            h0 = c0 = Variable(seqs.data.new(*state_shape).zero_())
        else:
            h0 = init_state['h0'].expand(state_shape)
            c0 = init_state['c0'].expand(state_shape)

        packed_pinputs, r_index = pack_for_rnn_seq(seqs, lengths, batch_first)
        if self.args.cell_type == 'gru':
            output, hn = rnn(packed_pinputs, h0)
        else:
            output, (hn, cn) = rnn(packed_pinputs, (h0, c0))
        output = unpack_from_rnn_seq(output, r_index, batch_first)

        if not output_last_states:
            return output
        else:
            return output, (hn, cn) 
Example #8
Source File: neural_networks.py    From pase with MIT License 6 votes vote down vote up
def __init__(self, options,inp_dim):
        super(RNN_cudnn, self).__init__()
        
        self.input_dim=inp_dim
        self.hidden_size=int(options['hidden_size'])
        self.num_layers=int(options['num_layers'])
        self.nonlinearity=options['nonlinearity']
        self.bias=bool(strtobool(options['bias']))
        self.batch_first=bool(strtobool(options['batch_first']))
        self.dropout=float(options['dropout'])
        self.bidirectional=bool(strtobool(options['bidirectional']))
        
        self.rnn = nn.ModuleList([nn.RNN(self.input_dim, self.hidden_size, self.num_layers, 
                            nonlinearity=self.nonlinearity,bias=self.bias,dropout=self.dropout,bidirectional=self.bidirectional)])
         
        self.out_dim=self.hidden_size+self.bidirectional*self.hidden_size 
Example #9
Source File: neural_networks.py    From pase with MIT License 6 votes vote down vote up
def __init__(self, options,inp_dim):
        super(RNN_cudnn, self).__init__()
        
        self.input_dim=inp_dim
        self.hidden_size=int(options['hidden_size'])
        self.num_layers=int(options['num_layers'])
        self.nonlinearity=options['nonlinearity']
        self.bias=bool(strtobool(options['bias']))
        self.batch_first=bool(strtobool(options['batch_first']))
        self.dropout=float(options['dropout'])
        self.bidirectional=bool(strtobool(options['bidirectional']))
        
        self.rnn = nn.ModuleList([nn.RNN(self.input_dim, self.hidden_size, self.num_layers, 
                            nonlinearity=self.nonlinearity,bias=self.bias,dropout=self.dropout,bidirectional=self.bidirectional)])
         
        self.out_dim=self.hidden_size+self.bidirectional*self.hidden_size 
Example #10
Source File: ldnet.py    From RAdam with Apache License 2.0 6 votes vote down vote up
def __init__(self, unit, input_dim, increase_rate, droprate, layer_drop = 0):
        super(BasicUnit, self).__init__()

        rnnunit_map = {'rnn': nn.RNN, 'lstm': nn.LSTM, 'gru': nn.GRU}

        self.unit = unit

        self.layer = rnnunit_map[unit](input_dim, increase_rate, 1)

        if 'lstm' == self.unit:
            utils.init_lstm(self.layer)

        self.layer_drop = layer_drop

        self.droprate = droprate

        self.input_dim = input_dim
        self.increase_rate = increase_rate
        self.output_dim = input_dim + increase_rate

        self.init_hidden() 
Example #11
Source File: densenet.py    From RAdam with Apache License 2.0 6 votes vote down vote up
def __init__(self, unit, input_dim, increase_rate, droprate):
        super(BasicUnit, self).__init__()

        rnnunit_map = {'rnn': nn.RNN, 'lstm': nn.LSTM, 'gru': nn.GRU}

        self.unit = unit

        self.layer = rnnunit_map[unit](input_dim, increase_rate, 1)

        if 'lstm' == self.unit:
            utils.init_lstm(self.layer)

        self.droprate = droprate

        self.input_dim = input_dim
        self.increase_rate = increase_rate
        self.output_dim = input_dim + increase_rate

        self.init_hidden() 
Example #12
Source File: ddnet.py    From RAdam with Apache License 2.0 6 votes vote down vote up
def __init__(self, unit, unit_number, emb_dim, hid_dim, droprate):
        super(BasicUnit, self).__init__()

        rnnunit_map = {'rnn': nn.RNN, 'lstm': nn.LSTM, 'gru': nn.GRU, 'bnlstm': BNLSTM}

        self.batch_norm = (unit == 'bnlstm')

        self.unit_number = unit_number
        # self.unit_weight = nn.Parameter(torch.FloatTensor([1] * unit_number))

        self.unit_list = nn.ModuleList()
        self.unit_list.append(rnnunit_map[unit](emb_dim, hid_dim, 1))
        if unit_number > 1:
            self.unit_list.extend([rnnunit_map[unit](hid_dim, hid_dim, 1) for ind in range(unit_number - 1)])

        self.droprate = droprate

        self.output_dim = emb_dim + hid_dim * unit_number

        self.init_hidden() 
Example #13
Source File: seqlabel.py    From Vanilla_NER with Apache License 2.0 6 votes vote down vote up
def __init__(self, c_num, c_dim, c_hidden, c_layer, w_num, w_dim, w_hidden, w_layer, y_num, droprate, unit='lstm'):
        super(Vanilla_SeqLabel, self).__init__()

        rnnunit_map = {'rnn': nn.RNN, 'lstm': nn.LSTM, 'gru': nn.GRU}

        self.char_embed = nn.Embedding(c_num, c_dim)
        self.word_embed = nn.Embedding(w_num, w_dim)

        self.char_seq = nn.Linear(c_hidden * 2, w_dim)

        self.c_hidden = c_hidden
        self.unit_type = unit

        tmp_rnn_dropout = droprate if c_layer > 1 else 0
        self.char_fw = rnnunit_map[unit](c_dim, c_hidden, c_layer, dropout = tmp_rnn_dropout)
        self.char_bw = rnnunit_map[unit](c_dim, c_hidden, c_layer, dropout = tmp_rnn_dropout)

        tmp_rnn_dropout = droprate if w_layer > 1 else 0
        self.word_rnn = rnnunit_map[unit](w_dim * 2, w_hidden // 2, w_layer, dropout = tmp_rnn_dropout, bidirectional = True)

        self.y_num = y_num
        self.crf = CRF(w_hidden, y_num)

        self.drop = nn.Dropout(p = droprate) 
Example #14
Source File: torch_util.py    From combine-FEVER-NSMN with MIT License 6 votes vote down vote up
def get_state_shape(rnn: nn.RNN, batch_size, bidirectional=False):
    """
    Return the state shape of a given RNN. This is helpful when you want to create a init state for RNN.

    Example:
    c0 = h0 = Variable(src_seq_p.data.new(*get_state_shape([your rnn], 3, bidirectional)).zero_())
    
    :param rnn: nn.LSTM, nn.GRU or subclass of nn.RNN
    :param batch_size:  
    :param bidirectional:  
    :return: 
    """
    if bidirectional:
        return rnn.num_layers * 2, batch_size, rnn.hidden_size
    else:
        return rnn.num_layers, batch_size, rnn.hidden_size 
Example #15
Source File: torch_util.py    From combine-FEVER-NSMN with MIT License 6 votes vote down vote up
def auto_rnn(rnn: nn.RNN, seqs, lengths, batch_first=True, init_state=None, output_last_states=False):
    batch_size = seqs.size(0) if batch_first else seqs.size(1)
    state_shape = get_state_shape(rnn, batch_size, rnn.bidirectional)

    # if init_state is None:
    #     h0 = c0 = Variable(seqs.data.new(*state_shape).zero_())
    # else:
    #     h0 = init_state[0] # rnn.num_layers, batch_size, rnn.hidden_size
    #     c0 = init_state[1]

    packed_pinputs, r_index, init_state = pack_for_rnn_seq(seqs, lengths, batch_first, init_state)

    if len(init_state) == 0:
        h0 = c0 = Variable(seqs.data.new(*state_shape).zero_())
        init_state = (h0, c0)

    output, last_state = rnn(packed_pinputs, init_state)
    output = unpack_from_rnn_seq(output, r_index, batch_first)

    if not output_last_states:
        return output
    else:
        last_state = reverse_indice_for_state(last_state, r_index)
        return output, last_state 
Example #16
Source File: model.py    From DeepMove with GNU General Public License v2.0 6 votes vote down vote up
def __init__(self, parameters):
        super(TrajPreSimple, self).__init__()
        self.loc_size = parameters.loc_size
        self.loc_emb_size = parameters.loc_emb_size
        self.tim_size = parameters.tim_size
        self.tim_emb_size = parameters.tim_emb_size
        self.hidden_size = parameters.hidden_size
        self.use_cuda = parameters.use_cuda
        self.rnn_type = parameters.rnn_type

        self.emb_loc = nn.Embedding(self.loc_size, self.loc_emb_size)
        self.emb_tim = nn.Embedding(self.tim_size, self.tim_emb_size)

        input_size = self.loc_emb_size + self.tim_emb_size

        if self.rnn_type == 'GRU':
            self.rnn = nn.GRU(input_size, self.hidden_size, 1)
        elif self.rnn_type == 'LSTM':
            self.rnn = nn.LSTM(input_size, self.hidden_size, 1)
        elif self.rnn_type == 'RNN':
            self.rnn = nn.RNN(input_size, self.hidden_size, 1)
        self.init_weights()

        self.fc = nn.Linear(self.hidden_size, self.loc_size)
        self.dropout = nn.Dropout(p=parameters.dropout_p) 
Example #17
Source File: drnn.py    From pytorch-dilated-rnn with MIT License 6 votes vote down vote up
def __init__(self, n_input, n_hidden, n_layers, dropout=0, cell_type='GRU', batch_first=False):
        super(DRNN, self).__init__()

        self.dilations = [2 ** i for i in range(n_layers)]
        self.cell_type = cell_type
        self.batch_first = batch_first

        layers = []
        if self.cell_type == "GRU":
            cell = nn.GRU
        elif self.cell_type == "RNN":
            cell = nn.RNN
        elif self.cell_type == "LSTM":
            cell = nn.LSTM
        else:
            raise NotImplementedError

        for i in range(n_layers):
            if i == 0:
                c = cell(n_input, n_hidden, dropout=dropout)
            else:
                c = cell(n_hidden, n_hidden, dropout=dropout)
            layers.append(c)
        self.cells = nn.Sequential(*layers) 
Example #18
Source File: encoder_base_test.py    From magnitude with MIT License 6 votes vote down vote up
def setUp(self):
        super(TestEncoderBase, self).setUp()
        self.lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True)
        self.rnn = RNN(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True)
        self.encoder_base = _EncoderBase(stateful=True)

        tensor = torch.rand([5, 7, 3])
        tensor[1, 6:, :] = 0
        tensor[3, 2:, :] = 0
        self.tensor = tensor
        mask = torch.ones(5, 7)
        mask[1, 6:] = 0
        mask[2, :] = 0  # <= completely masked
        mask[3, 2:] = 0
        mask[4, :] = 0  # <= completely masked
        self.mask = mask

        self.batch_size = 5
        self.num_valid = 3
        sequence_lengths = get_lengths_from_binary_sequence_mask(mask)
        _, _, restoration_indices, sorting_indices = sort_batch_by_length(tensor, sequence_lengths)
        self.sorting_indices = sorting_indices
        self.restoration_indices = restoration_indices 
Example #19
Source File: encoder_base_test.py    From magnitude with MIT License 6 votes vote down vote up
def test_non_contiguous_input_states_handled(self):
        # Check that the encoder is robust to non-contiguous input states.

        # A transposition will make the tensors non-contiguous, start them off at the wrong shape
        # and transpose them into the right shape.
        encoder_base = _EncoderBase(stateful=False)
        initial_states = (torch.randn(5, 6, 7).permute(1, 0, 2),
                          torch.randn(5, 6, 7).permute(1, 0, 2))
        assert not initial_states[0].is_contiguous() and not initial_states[1].is_contiguous()
        assert initial_states[0].size() == torch.Size([6, 5, 7])
        assert initial_states[1].size() == torch.Size([6, 5, 7])

        # We'll pass them through an LSTM encoder and a vanilla RNN encoder to make sure it works
        # whether the initial states are a tuple of tensors or just a single tensor.
        encoder_base.sort_and_run_forward(self.lstm, self.tensor, self.mask, initial_states)
        encoder_base.sort_and_run_forward(self.rnn, self.tensor, self.mask, initial_states[0]) 
Example #20
Source File: deepspeech2.py    From inference with Apache License 2.0 6 votes vote down vote up
def __init__(self, input_size, hidden_size, rnn_type=nn.LSTM,
                 bidirectional=False, batch_norm=True, bias=False):
        """Bias-free RNN wrapper with optional batch norm and bidir summation.

        Instantiates an RNN without bias parameters. Optionally applies a batch
        normalisation layer to the input with the statistics computed over all
        time steps. If the RNN is bidirectional, the output from the forward
        and backward units is summed before return.
        """
        super().__init__()
        if batch_norm:
            self.batch_norm = OverLastDim(nn.BatchNorm1d(input_size))
        self.bidirectional = bidirectional
        self.rnn = rnn_type(input_size=input_size,
                            hidden_size=hidden_size,
                            bidirectional=bidirectional,
                            bias=bias) 
Example #21
Source File: model.py    From multiwoz with MIT License 6 votes vote down vote up
def whatCellType(input_size, hidden_size, cell_type, dropout_rate):
    if cell_type == 'rnn':
        cell = nn.RNN(input_size, hidden_size, dropout=dropout_rate, batch_first=False)
        init_gru(cell)
        return cell
    elif cell_type == 'gru':
        cell = nn.GRU(input_size, hidden_size, dropout=dropout_rate, batch_first=False)
        init_gru(cell)
        return cell
    elif cell_type == 'lstm':
        cell = nn.LSTM(input_size, hidden_size, dropout=dropout_rate, batch_first=False)
        init_lstm(cell)
        return cell
    elif cell_type == 'bigru':
        cell = nn.GRU(input_size, hidden_size, bidirectional=True, dropout=dropout_rate, batch_first=False)
        init_gru(cell)
        return cell
    elif cell_type == 'bilstm':
        cell = nn.LSTM(input_size, hidden_size, bidirectional=True, dropout=dropout_rate, batch_first=False)
        init_lstm(cell)
        return cell 
Example #22
Source File: model.py    From ConvLab with MIT License 6 votes vote down vote up
def whatCellType(input_size, hidden_size, cell_type, dropout_rate):
    if cell_type == 'rnn':
        cell = nn.RNN(input_size, hidden_size, dropout=dropout_rate, batch_first=False)
        init_gru(cell)
        return cell
    elif cell_type == 'gru':
        cell = nn.GRU(input_size, hidden_size, dropout=dropout_rate, batch_first=False)
        init_gru(cell)
        return cell
    elif cell_type == 'lstm':
        cell = nn.LSTM(input_size, hidden_size, dropout=dropout_rate, batch_first=False)
        init_lstm(cell)
        return cell
    elif cell_type == 'bigru':
        cell = nn.GRU(input_size, hidden_size, bidirectional=True, dropout=dropout_rate, batch_first=False)
        init_gru(cell)
        return cell
    elif cell_type == 'bilstm':
        cell = nn.LSTM(input_size, hidden_size, bidirectional=True, dropout=dropout_rate, batch_first=False)
        init_lstm(cell)
        return cell 
Example #23
Source File: DRNN.py    From ESRNN-GPU with MIT License 6 votes vote down vote up
def __init__(self, n_input, n_hidden, n_layers, dilations, dropout=0, cell_type='GRU', batch_first=False):

        super(DRNN, self).__init__()

        self.dilations = dilations
        self.cell_type = cell_type
        self.batch_first = batch_first

        layers = []
        if self.cell_type == "GRU":
            cell = nn.GRU
        elif self.cell_type == "RNN":
            cell = nn.RNN
        elif self.cell_type == "LSTM":
            cell = nn.LSTM
        else:
            raise NotImplementedError

        for i in range(n_layers):
            if i == 0:
                c = cell(n_input, n_hidden, dropout=dropout)
            else:
                c = cell(n_hidden, n_hidden, dropout=dropout)
            layers.append(c)
        self.cells = nn.Sequential(*layers) 
Example #24
Source File: model.py    From DeepMove with GNU General Public License v2.0 5 votes vote down vote up
def __init__(self, parameters):
        super(TrajPreAttnAvgLongUser, self).__init__()
        self.loc_size = parameters.loc_size
        self.loc_emb_size = parameters.loc_emb_size
        self.tim_size = parameters.tim_size
        self.tim_emb_size = parameters.tim_emb_size
        self.uid_size = parameters.uid_size
        self.uid_emb_size = parameters.uid_emb_size
        self.hidden_size = parameters.hidden_size
        self.attn_type = parameters.attn_type
        self.rnn_type = parameters.rnn_type
        self.use_cuda = parameters.use_cuda

        self.emb_loc = nn.Embedding(self.loc_size, self.loc_emb_size)
        self.emb_tim = nn.Embedding(self.tim_size, self.tim_emb_size)
        self.emb_uid = nn.Embedding(self.uid_size, self.uid_emb_size)

        input_size = self.loc_emb_size + self.tim_emb_size
        self.attn = Attn(self.attn_type, self.hidden_size)
        self.fc_attn = nn.Linear(input_size, self.hidden_size)

        if self.rnn_type == 'GRU':
            self.rnn = nn.GRU(input_size, self.hidden_size, 1)
        elif self.rnn_type == 'LSTM':
            self.rnn = nn.LSTM(input_size, self.hidden_size, 1)
        elif self.rnn_type == 'RNN':
            self.rnn = nn.RNN(input_size, self.hidden_size, 1)

        self.fc_final = nn.Linear(2 * self.hidden_size + self.uid_emb_size, self.loc_size)
        self.dropout = nn.Dropout(p=parameters.dropout_p)
        self.init_weights() 
Example #25
Source File: model.py    From DeepMove with GNU General Public License v2.0 5 votes vote down vote up
def forward(self, loc, tim):
        h1 = Variable(torch.zeros(1, 1, self.hidden_size))
        c1 = Variable(torch.zeros(1, 1, self.hidden_size))
        if self.use_cuda:
            h1 = h1.cuda()
            c1 = c1.cuda()

        loc_emb = self.emb_loc(loc)
        tim_emb = self.emb_tim(tim)
        x = torch.cat((loc_emb, tim_emb), 2)
        x = self.dropout(x)

        if self.rnn_type == 'GRU' or self.rnn_type == 'RNN':
            out, h1 = self.rnn(x, h1)
        elif self.rnn_type == 'LSTM':
            out, (h1, c1) = self.rnn(x, (h1, c1))
        out = out.squeeze(1)
        out = F.selu(out)
        out = self.dropout(out)

        y = self.fc(out)
        score = F.log_softmax(y)  # calculate loss by NLLoss
        return score


# ############# rnn model with attention ####################### # 
Example #26
Source File: modules.py    From neural_chat with MIT License 5 votes vote down vote up
def forward(self, input):
        """Compute scores from inputs.

        :param input: (bsz x seq_len x num_directions * hiddensize) tensor of
                       states, e.g. the output states of an RNN

        :returns: (bsz x seqlen x num_cands) scores for each candidate
        """
        # next compute scores over dictionary
        if self.numsoftmax > 1:
            bsz = input.size(0)
            seqlen = input.size(1) if input.dim() > 1 else 1

            # first compute different softmax scores based on input vec
            # hsz => numsoftmax * esz
            latent = self.latent(input)
            active = self.dropout(self.activation(latent))
            # esz => num_features
            logit = self.e2s(active.view(-1, self.esz))

            # calculate priors: distribution over which softmax scores to use
            # hsz => numsoftmax
            prior_logit = self.prior(input).view(-1, self.numsoftmax)
            # softmax over numsoftmax's
            prior = self.softmax(prior_logit)

            # now combine priors with logits
            prob = self.softmax(logit).view(bsz * seqlen, self.numsoftmax, -1)
            probs = (prob * prior.unsqueeze(2)).sum(1).view(bsz, seqlen, -1)
            scores = probs.log()
        else:
            # hsz => esz, good time for dropout
            e = self.dropout(self.o2e(input))
            # esz => num_features
            scores = self.e2s(e)

        if self.padding_idx == 0:
            pad_score = scores.new(scores.size(0), scores.size(1), 1).fill_(-NEAR_INF)
            scores = torch.cat([pad_score, scores], dim=-1)

        return scores 
Example #27
Source File: model.py    From l2w with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, cutoffs, dropout=0.5, tie_weights=False):
        super(RNNModel, self).__init__()
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)
        if rnn_type is 'GRU':
            self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout)
        else:
            try:
                nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type]
            except KeyError:
                raise ValueError( """An invalid option for `--model` was supplied,
                                 options are ['GRU', 'RNN_TANH' or 'RNN_RELU']""")
            self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout)
        self.decoder = nn.Linear(nhid, ntoken)

        if tie_weights:
            if nhid != ninp:
                raise ValueError('When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.encoder.weight

        self.init_weights()

        self.rnn_type = rnn_type
        self.nhid = nhid
        self.nlayers = nlayers
        self.softmax = AdaptiveSoftmax(nhid, cutoffs)

        self.full = False 
Example #28
Source File: modules.py    From neural_chat with MIT License 5 votes vote down vote up
def forward(self, input):
        """Compute scores from inputs.

        :param input: (bsz x seq_len x num_directions * hiddensize) tensor of
                       states, e.g. the output states of an RNN

        :returns: (bsz x seqlen x num_cands) scores for each candidate
        """
        # next compute scores over dictionary
        if self.numsoftmax > 1:
            bsz = input.size(0)
            seqlen = input.size(1) if input.dim() > 1 else 1

            # first compute different softmax scores based on input vec
            # hsz => numsoftmax * esz
            latent = self.latent(input)
            active = self.dropout(self.activation(latent))
            # esz => num_features
            logit = F.linear(active.view(-1, self.esz), self.weight, self.bias)

            # calculate priors: distribution over which softmax scores to use
            # hsz => numsoftmax
            prior_logit = self.prior(input).view(-1, self.numsoftmax)
            # softmax over numsoftmax's
            prior = self.softmax(prior_logit)

            # now combine priors with logits
            prob = self.softmax(logit).view(bsz * seqlen, self.numsoftmax, -1)
            probs = (prob * prior.unsqueeze(2)).sum(1).view(bsz, seqlen, -1)
            scores = probs.log()
        else:
            # hsz => esz, good time for dropout
            e = self.dropout(self.o2e(input))
            # esz => num_features
            scores = F.linear(e, self.weight, self.bias)

        if self.padding_idx >= 0:
            scores[:, :, self.padding_idx] = -NEAR_INF

        return scores 
Example #29
Source File: modules.py    From neural_chat with MIT License 5 votes vote down vote up
def reorder_encoder_states(self, encoder_states, indices):
        """Reorder encoder states according to a new set of indices."""
        enc_out, hidden, attn_mask = encoder_states

        # make sure we swap the hidden state around, apropos multigpu settings
        hidden = _transpose_hidden_state(hidden)

        # LSTM or GRU/RNN hidden state?
        if isinstance(hidden, torch.Tensor):
            hid, cell = hidden, None
        else:
            hid, cell = hidden

        if not torch.is_tensor(indices):
            # cast indices to a tensor if needed
            indices = torch.LongTensor(indices).to(hid.device)

        hid = hid.index_select(1, indices)
        if cell is None:
            hidden = hid
        else:
            cell = cell.index_select(1, indices)
            hidden = (hid, cell)

        if self.attn_type != 'none':
            enc_out = enc_out.index_select(0, indices)
            attn_mask = attn_mask.index_select(0, indices)

        # and bring it back to multigpu friendliness
        hidden = _transpose_hidden_state(hidden)

        return enc_out, hidden, attn_mask 
Example #30
Source File: modules.py    From ParlAI with MIT License 5 votes vote down vote up
def _align_inds(self, encoder_states, cand_inds):
        """
        Select the encoder states relevant to valid candidates.
        """
        enc_out, hidden, attn_mask = encoder_states

        # LSTM or GRU/RNN hidden state?
        if isinstance(hidden, torch.Tensor):
            hid, cell = hidden, None
        else:
            hid, cell = hidden

        if len(cand_inds) != hid.size(1):
            # if the number of candidates is mismatched from the number of
            # hidden states, we throw out the hidden states we won't rank with
            cand_indices = hid.new(cand_inds)
            hid = hid.index_select(1, cand_indices)
            if cell is None:
                hidden = hid
            else:
                cell = cell.index_select(1, cand_indices)
                hidden = (hid, cell)

            if self.attn_type != 'none':
                enc_out = enc_out.index_select(0, cand_indices)
                attn_mask = attn_mask.index_select(0, cand_indices)

        return enc_out, hidden, attn_mask