Python torch.bernoulli() Examples

The following are 30 code examples of torch.bernoulli(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch , or try the search function .
Example #1
Source File:    From lung_nodule_detector with MIT License 6 votes vote down vote up
def forward(self, x0, x1, x2, x3):
        if self.p > 0 and
            coef = torch.bernoulli((1.0 - self.p) * torch.ones(8))
            out1 = coef[0] * self.block01(x0) + coef[1] * self.block11(x1) + coef[2] * self.block21(x2)
            out2 = coef[3] * self.block12(x1) + coef[4] * self.block22(x2) + coef[5] * self.block32(x3)
            out3 = coef[6] * self.block23(x2) + coef[7] * self.block33(x3)
            out1 = (1 - self.p) * (self.block01(x0) + self.block11(x1) + self.block21(x2))
            out2 = (1 - self.p) * (self.block12(x1) + self.block22(x2) + self.block32(x3))
            out3 = (1 - self.p) * (self.block23(x2) + self.block33(x3))

        if self.integrate:
            out1 += x1
            out2 += x2
            out3 += x3

        return x0, self.relu(out1), self.relu(out2), self.relu(out3) 
Example #2
Source File:    From fastHan with Apache License 2.0 6 votes vote down vote up
def drop_word(self, words):

        :param torch.LongTensor words: batch_size x max_len
        if self.word_dropout > 0 and
            with torch.no_grad():
                if self._word_sep_index:  # 不能drop sep
                    sep_mask = words.eq(self._wordpiece_unk_index)
                mask = torch.full_like(words, fill_value=self.word_dropout, dtype=torch.float, device=words.device)
                mask = torch.bernoulli(mask).eq(1)  # dropout_word越大,越多位置为1
                pad_mask =
                mask = pad_mask.__and__(mask)  # pad的位置不为unk
                words = words.masked_fill(mask, self._word_unk_index)
                if self._word_sep_index:
                    words.masked_fill_(sep_mask, self._wordpiece_unk_index)
        return words 
Example #3
Source File:    From ASFF with GNU General Public License v3.0 6 votes vote down vote up
def forward(self, x):
        if (not or self.keep_prob==1): #set keep_prob=1 to turn off dropblock
            return x
        if self.gamma is None:
            self.gamma = self.calculate_gamma(x)
        if x.type() == 'torch.cuda.HalfTensor': #TODO: not fully support for FP16 now 
            FP16 = True
            x = x.float()
            FP16 = False
        p = torch.ones_like(x) * (self.gamma)
        mask = 1 - torch.nn.functional.max_pool2d(torch.bernoulli(p),

        out =  mask * x * (mask.numel()/mask.sum())

        if FP16:
            out = out.half()
        return out 
Example #4
Source File:    From fastHan with Apache License 2.0 6 votes vote down vote up
def drop_word(self, words):

        :param torch.LongTensor words: batch_size x max_len
        if self.word_dropout > 0 and
            with torch.no_grad():
                if self._word_sep_index:  # 不能drop sep
                    sep_mask = words.eq(self._word_sep_index)
                mask = torch.full_like(words, fill_value=self.word_dropout, dtype=torch.float, device=words.device)
                mask = torch.bernoulli(mask).eq(1)  # dropout_word越大,越多位置为1
                pad_mask =
                mask = pad_mask.__and__(mask)  # pad的位置不为unk
                words = words.masked_fill(mask, self._word_unk_index)
                if self._word_sep_index:
                    words.masked_fill_(sep_mask, self._word_sep_index)
        return words 
Example #5
Source File:    From DeepLung with GNU General Public License v3.0 6 votes vote down vote up
def forward(self, x0, x1, x2, x3):
        if self.p > 0 and
            coef = torch.bernoulli((1.0 - self.p) * torch.ones(8))
            out1 = coef[0] * self.block01(x0) + coef[1] * self.block11(x1) + coef[2] * self.block21(x2)
            out2 = coef[3] * self.block12(x1) + coef[4] * self.block22(x2) + coef[5] * self.block32(x3)
            out3 = coef[6] * self.block23(x2) + coef[7] * self.block33(x3)
            out1 = (1 - self.p) * (self.block01(x0) + self.block11(x1) + self.block21(x2))
            out2 = (1 - self.p) * (self.block12(x1) + self.block22(x2) + self.block32(x3))
            out3 = (1 - self.p) * (self.block23(x2) + self.block33(x3))

        if self.integrate:
            out1 += x1
            out2 += x2
            out3 += x3

        return x0, self.relu(out1), self.relu(out2), self.relu(out3) 
Example #6
Source File:    From BERT-for-RRC-ABSA with Apache License 2.0 6 votes vote down vote up
def gen_inputs_labels(self, inputs, masked_indices):
        # We sample a few tokens in each sequence for masked-LM training (with probability mlm_probability defaults to 0.15 in Bert/RoBERTa)
        inputs = inputs.clone()
        labels = inputs.clone()
        labels[~masked_indices] = -100  # We only compute loss on masked tokens

        # 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK])
        indices_replaced = torch.bernoulli(torch.full(labels.shape, 0.8)).bool() & masked_indices
        inputs[indices_replaced] = self.tokenizer.convert_tokens_to_ids(self.tokenizer.mask_token)

        # 10% of the time, we replace masked input tokens with random word
        indices_random = torch.bernoulli(torch.full(labels.shape, 0.5)).bool() & masked_indices & ~indices_replaced
        random_words = torch.randint(len(self.tokenizer), labels.shape, dtype=torch.long)
        inputs[indices_random] = random_words[indices_random]
        # The rest of the time (10% of the time) we keep the masked input tokens unchanged
        return inputs, labels 
Example #7
Source File:    From fastNLP with Apache License 2.0 6 votes vote down vote up
def drop_word(self, words):

        :param torch.LongTensor words: batch_size x max_len
        if self.word_dropout > 0 and
            with torch.no_grad():
                mask = torch.full_like(words, fill_value=self.word_dropout, dtype=torch.float, device=words.device)
                mask = torch.bernoulli(mask).eq(1)  # dropout_word越大,越多位置为1
                pad_mask =
                mask = pad_mask.__and__(mask)  # pad的位置不为unk
                if self._word_sep_index!=-100:
                    not_sep_mask =
                    mask = mask.__and__(not_sep_mask)
                if self._word_cls_index!=-100:
                    not_cls_mask =
                    mask = mask.__and__(not_cls_mask)
                words = words.masked_fill(mask, self._word_unk_index)
        return words 
Example #8
Source File:    From fastNLP with Apache License 2.0 6 votes vote down vote up
def drop_word(self, words):

        :param torch.LongTensor words: batch_size x max_len
        if self.word_dropout > 0 and
            with torch.no_grad():
                not_sep_mask =
                not_cls_mask =
                replaceable_mask = not_sep_mask.__and__(not_cls_mask)
                mask = torch.full_like(words, fill_value=self.word_dropout, dtype=torch.float, device=words.device)
                mask = torch.bernoulli(mask).eq(1)  # dropout_word越大,越多位置为1
                pad_mask =
                mask = pad_mask.__and__(mask).__and__(replaceable_mask)  # pad的位置不为unk
                words = words.masked_fill(mask, self._wordpiece_unk_index)
        return words 
Example #9
Source File:    From amortized-variational-filtering with MIT License 6 votes vote down vote up
def sample(self, n_samples=1, resample=False):
        Draw samples from the distribution.

            n_samples (int): number of samples to draw
            resample (bool): whether to resample or just use current sample
        if self._sample is None or resample:
            assert self.mean is not None, 'Mean is None.'
            mean = self.mean
            if len(mean.size()) == 2:
                mean = mean.unsqueeze(1).repeat(1, n_samples, 1)
            elif len(mean.size()) == 4:
                mean = mean.unsqueeze(1).repeat(1, n_samples, 1, 1, 1)
            self._sample = torch.bernoulli(mean)
        return self._sample 
Example #10
Source File:    From fastNLP with Apache License 2.0 6 votes vote down vote up
def drop_word(self, words):

        :param torch.LongTensor words: batch_size x max_len
        if self.word_dropout > 0 and
            with torch.no_grad():
                mask = torch.full_like(words, fill_value=self.word_dropout, dtype=torch.float, device=words.device)
                mask = torch.bernoulli(mask).eq(1)  # dropout_word越大,越多位置为1
                pad_mask =
                mask = pad_mask.__and__(mask)  # pad的位置不为unk
                if self._word_sep_index!=-100:
                    not_sep_mask =
                    mask = mask.__and__(not_sep_mask)
                if self._word_cls_index!=-100:
                    not_cls_mask =
                    mask = mask.__and__(not_cls_mask)
                words = words.masked_fill(mask, self._word_unk_index)
        return words 
Example #11
Source File:    From ru_transformers with Apache License 2.0 6 votes vote down vote up
def mask_tokens(inputs, tokenizer, args):
    """ Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original. """
    labels = inputs.clone()
    # We sample a few tokens in each sequence for masked-LM training (with probability args.mlm_probability defaults to 0.15 in Bert/RoBERTa)
    probability_matrix = torch.full(labels.shape, args.mlm_probability)
    special_tokens_mask = [tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist()]
    probability_matrix.masked_fill_(torch.tensor(special_tokens_mask, dtype=torch.bool), value=0.0)
    masked_indices = torch.bernoulli(probability_matrix).bool()
    labels[~masked_indices] = -1  # We only compute loss on masked tokens

    # 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK])
    indices_replaced = torch.bernoulli(torch.full(labels.shape, 0.8)).bool() & masked_indices
    inputs[indices_replaced] = tokenizer.convert_tokens_to_ids(tokenizer.mask_token)

    # 10% of the time, we replace masked input tokens with random word
    indices_random = torch.bernoulli(torch.full(labels.shape, 0.5)).bool() & masked_indices & ~indices_replaced
    random_words = torch.randint(len(tokenizer), labels.shape, dtype=torch.long)
    inputs[indices_random] = random_words[indices_random]

    # The rest of the time (10% of the time) we keep the masked input tokens unchanged
    return inputs, labels

# from transformers/, adapted to tpu 
Example #12
Source File:    From pytorch-wrapper with MIT License 6 votes vote down vote up
def sub_tensor_dropout(data_tensor, dropout_p, dim, is_model_training):
    Drops (zeroes-out) random sub-Tensors of a Tensor across the specified dimension, during training.

    :param data_tensor: ND Tensor.
    :param dropout_p: The dropout probability.
    :param dim: Int that corresponds to the dimension.
    :param is_model_training: Whether the model is currently training.
    :return: ND Tensor.

    if dim < 0:
        dim = len(data_tensor.shape) + dim

    if dropout_p is None or dropout_p == 0 or not is_model_training:
        return data_tensor

    assert 0 <= dropout_p < 1, 'dropout probability must be in range [0,1)'

    dp = torch.empty(*(data_tensor.shape[:dim + 1]), dtype=torch.float, device=data_tensor.device)
    dp = torch.bernoulli(dp.fill_((1 - dropout_p)))
    dp = dp.view(list(dp.shape) + [1] * (len(data_tensor.shape) - len(dp.shape)))

    return data_tensor * dp 
Example #13
Source File:    From ru_transformers with Apache License 2.0 6 votes vote down vote up
def mask_tokens(inputs, tokenizer, args):
    """ Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original. """
    labels = inputs.clone()
    # We sample a few tokens in each sequence for masked-LM training (with probability args.mlm_probability defaults to 0.15 in Bert/RoBERTa)
    probability_matrix = torch.full(labels.shape, args.mlm_probability)
    special_tokens_mask = [tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist()]
    probability_matrix.masked_fill_(torch.tensor(special_tokens_mask, dtype=torch.bool), value=0.0)
    masked_indices = torch.bernoulli(probability_matrix).bool()
    labels[~masked_indices] = -1  # We only compute loss on masked tokens

    # 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK])
    indices_replaced = torch.bernoulli(torch.full(labels.shape, 0.8)).bool() & masked_indices
    inputs[indices_replaced] = tokenizer.convert_tokens_to_ids(tokenizer.mask_token)

    # 10% of the time, we replace masked input tokens with random word
    indices_random = torch.bernoulli(torch.full(labels.shape, 0.5)).bool() & masked_indices & ~indices_replaced
    random_words = torch.randint(len(tokenizer), labels.shape, dtype=torch.long)
    inputs[indices_random] = random_words[indices_random]

    # The rest of the time (10% of the time) we keep the masked input tokens unchanged
    return inputs, labels 
Example #14
Source File:    From torchkit with MIT License 6 votes vote down vote up
def train(self):
        for epoch in range(10):
            for it, (x, y) in enumerate(self.data_loader):
                x = torch.bernoulli(x)
                if cuda:
                    x = x.cuda()
                x = Variable(x.view(-1, 1, 28, 28))
                out = nn_.sigmoid(self.mdl((x,0))[0]).permute(0,3,1,2)
                loss = utils.bceloss(out, x).sum(1).sum(1).sum(1).mean()
                if ((it + 1) % 100) == 0:
                    print 'Epoch: [%2d] [%4d/%4d] loss: %.8f' % \
                        (epoch+1, it+1, 
                         self.data_loader.dataset.__len__() // 32,
Example #15
Source File:    From torchkit with MIT License 6 votes vote down vote up
def train(self):
        for epoch in range(10):
            for it, (x, y) in enumerate(self.data_loader):
                x = torch.bernoulli(x)
                x = Variable(x.view(-1, 784))
                out = nn_.sigmoid(self.mdl(x)[:,:,0])
                loss = utils.bceloss(out, x).sum(1).mean()
                if ((it + 1) % 10) == 0:
                    print 'Epoch: [%2d] [%4d/%4d] loss: %.8f' % \
                        (epoch+1, it+1, 
                         self.data_loader.dataset.__len__() // 32,
Example #16
Source File:    From KBGAN with MIT License 6 votes vote down vote up
def corrupt(self, src, rel, dst, keep_truth=True):
        n = len(src)
        prob = self.bern_prob[rel]
        selection = torch.bernoulli(prob).numpy().astype('bool')
        src_out = np.tile(src.numpy(), (self.n_sample, 1)).transpose()
        dst_out = np.tile(dst.numpy(), (self.n_sample, 1)).transpose()
        rel_out = rel.unsqueeze(1).expand(n, self.n_sample)
        if keep_truth:
            ent_random = choice(self.n_ent, (n, self.n_sample - 1))
            src_out[selection, 1:] = ent_random[selection]
            dst_out[~selection, 1:] = ent_random[~selection]
            ent_random = choice(self.n_ent, (n, self.n_sample))
            src_out[selection, :] = ent_random[selection]
            dst_out[~selection, :] = ent_random[~selection]
        return torch.from_numpy(src_out), rel_out, torch.from_numpy(dst_out) 
Example #17
Source File:    From didyprog with MIT License 6 votes vote down vote up
def make_data(batch, augment=False,
              singleton_idx=None, unk_idx=None,
    sentences = batch.sentences
    tags, lengths = batch.tags

    letters, letters_lengths = batch.letters
    # Data augmentation for <unk> embedding training
    if augment:
        indices = torch.zeros_like(tags)
        bernoulli = torch.FloatTensor(*tags.shape,).fill_(.3)
        bernoulli = torch.bernoulli(bernoulli).byte()
        bernoulli =
        indices = indices.byte()
        for rep in singleton_idx:
            indices = indices | (tags == rep)
        indices = indices & bernoulli
        sentences[indices] = unk_idx

    return sentences, tags, lengths, letters, letters_lengths 
Example #18
Source File:    From bindsnet with GNU Affero General Public License v3.0 6 votes vote down vote up
def test_rmax(self):
        # Connection test
        network = Network(dt=1.0)
        network.add_layer(Input(n=100, traces=True, traces_additive=True), name="input")
        network.add_layer(SRM0Nodes(n=100), name="output")
            inputs={"input": torch.bernoulli(torch.rand(250, 100)).byte()},
Example #19
Source File:    From Pytorch-NCE with MIT License 6 votes vote down vote up
def draw(self, *size):
        """Draw N samples from multinomial

            - size: the output size of samples
        max_value = self.alias.size(0)

        kk =*size).random_(0, max_value).long().view(-1)
        prob = self.prob[kk]
        alias = self.alias[kk]
        # b is whether a random number is greater than q
        b = torch.bernoulli(prob).long()
        oq = kk.mul(b)
        oj = alias.mul(1 - b)

        return (oq + oj).view(size) 
Example #20
Source File:    From PyTorch-1.x-Reinforcement-Learning-Cookbook with MIT License 6 votes vote down vote up
def run_episode(env, weight):
    state = env.reset()
    grads = []
    total_reward = 0
    is_done = False
    while not is_done:
        state = torch.from_numpy(state).float()
        z = torch.matmul(state, weight)
        probs = torch.nn.Softmax()(z)
        action = int(torch.bernoulli(probs[1]).item())
        d_softmax = torch.diag(probs) - probs.view(-1, 1) * probs
        d_log = d_softmax[action] / probs[action]
        grad = state.view(-1, 1) * d_log
        state, reward, is_done, _ = env.step(action)
        total_reward += reward
        if is_done:
    return total_reward, grads 
Example #21
Source File:    From pytorch-wrapper with MIT License 6 votes vote down vote up
def same_dropout(data_tensor, dropout_p, dim, is_model_training):
    Drops the same random elements of a Tensor across the specified dimension, during training.

    :param data_tensor: ND Tensor.
    :param dropout_p: The dropout probability.
    :param dim: Int that corresponds to the dimension.
    :param is_model_training: Whether the model is currently training.
    :return: ND Tensor.

    if dim < 0:
        dim = len(data_tensor.shape) + dim

    if dropout_p is None or dropout_p == 0 or not is_model_training:
        return data_tensor

    assert 0 <= dropout_p < 1, 'dropout probability must be in range [0,1)'

    shape = list(data_tensor.shape)
    shape[dim] = 1
    dp = torch.empty(*shape, dtype=torch.float, device=data_tensor.device)
    dp = torch.bernoulli(dp.fill_((1 - dropout_p))) / (1 - dropout_p)

    return data_tensor * dp 
Example #22
Source File:    From ru_transformers with Apache License 2.0 6 votes vote down vote up
def mask_tokens(inputs, tokenizer, args):
    """ Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original. """
    labels = inputs.clone()
    # We sample a few tokens in each sequence for masked-LM training (with probability args.mlm_probability defaults to 0.15 in Bert/RoBERTa)
    probability_matrix = torch.full(labels.shape, args.mlm_probability)
    special_tokens_mask = [tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist()]
    probability_matrix.masked_fill_(torch.tensor(special_tokens_mask, dtype=torch.bool), value=0.0)
    masked_indices = torch.bernoulli(probability_matrix).bool()
    labels[~masked_indices] = -1  # We only compute loss on masked tokens

    # 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK])
    indices_replaced = torch.bernoulli(torch.full(labels.shape, 0.8)).bool() & masked_indices
    inputs[indices_replaced] = tokenizer.convert_tokens_to_ids(tokenizer.mask_token)

    # 10% of the time, we replace masked input tokens with random word
    indices_random = torch.bernoulli(torch.full(labels.shape, 0.5)).bool() & masked_indices & ~indices_replaced
    random_words = torch.randint(len(tokenizer), labels.shape, dtype=torch.long)
    inputs[indices_random] = random_words[indices_random]

    # The rest of the time (10% of the time) we keep the masked input tokens unchanged
    return inputs, labels

# from transformers/, adapted to tpu 
Example #23
Source File:    From exbert with Apache License 2.0 5 votes vote down vote up
def mask_tokens(inputs: torch.Tensor, tokenizer: PreTrainedTokenizer, args) -> Tuple[torch.Tensor, torch.Tensor]:
    """ Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original. """

    if tokenizer.mask_token is None:
        raise ValueError(
            "This tokenizer does not have a mask token which is necessary for masked language modeling. Remove the --mlm flag if you want to use this tokenizer."

    labels = inputs.clone()
    # We sample a few tokens in each sequence for masked-LM training (with probability args.mlm_probability defaults to 0.15 in Bert/RoBERTa)
    probability_matrix = torch.full(labels.shape, args.mlm_probability)
    special_tokens_mask = [
        tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist()
    probability_matrix.masked_fill_(torch.tensor(special_tokens_mask, dtype=torch.bool), value=0.0)
    if tokenizer._pad_token is not None:
        padding_mask = labels.eq(tokenizer.pad_token_id)
        probability_matrix.masked_fill_(padding_mask, value=0.0)
    masked_indices = torch.bernoulli(probability_matrix).bool()
    labels[~masked_indices] = -100  # We only compute loss on masked tokens

    # 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK])
    indices_replaced = torch.bernoulli(torch.full(labels.shape, 0.8)).bool() & masked_indices
    inputs[indices_replaced] = tokenizer.convert_tokens_to_ids(tokenizer.mask_token)

    # 10% of the time, we replace masked input tokens with random word
    indices_random = torch.bernoulli(torch.full(labels.shape, 0.5)).bool() & masked_indices & ~indices_replaced
    random_words = torch.randint(len(tokenizer), labels.shape, dtype=torch.long)
    inputs[indices_random] = random_words[indices_random]

    # The rest of the time (10% of the time) we keep the masked input tokens unchanged
    return inputs, labels 
Example #24
Source File:    From gandissect with MIT License 5 votes vote down vote up
def sample_portion(vec, p=0.5):
    bits = torch.bernoulli(torch.zeros(vec.shape[0], dtype=torch.uint8,
        device=vec.device), p)
    return vec[bits] 
Example #25
Source File:    From torch-light with MIT License 5 votes vote down vote up
def __init__(self, dropout_prob, hidden_size, is_cuda):

        self.mask = torch.bernoulli(torch.Tensor(
            1, hidden_size).fill_(1. - dropout_prob))
        if is_cuda:
            self.mask = self.mask.cuda()
        self.dropout_prob = dropout_prob 
Example #26
Source File:    From SDNet with MIT License 5 votes vote down vote up
def seq_dropout(x, p=0, training=False):
    x: batch * len * input_size
    if training == False or p == 0:
        return x
    dropout_mask = Variable(1.0 / (1-p) * torch.bernoulli((1-p) * (, x.size(2)).zero_() + 1)), requires_grad=False)
    return dropout_mask.unsqueeze(1).expand_as(x) * x 
Example #27
Source File:    From FeatureDecoupling with MIT License 5 votes vote down vote up
def draw(self, N):
            Draw N samples from multinomial
        K = self.alias.size(0)

        kk = torch.zeros(N, dtype=torch.long, device=self.prob.device).random_(0, K)
        prob = self.prob.index_select(0, kk)
        alias = self.alias.index_select(0, kk)
        # b is whether a random number is greater than q
        b = torch.bernoulli(prob)
        oq = kk.mul(b.long())
        oj = alias.mul((1-b).long())

        return oq + oj 
Example #28
Source File:    From PyTorch_Biaffine_Dependency_Parsing with Apache License 2.0 5 votes vote down vote up
def forward(self, input, masks, initial=None):
        if self.batch_first:
            input = input.transpose(0, 1)
            masks = torch.unsqueeze(masks.transpose(0, 1), dim=2)
        max_time, batch_size, _ = input.size()
        masks = masks.expand(-1, -1, self.hidden_size)

        if initial is None:
            initial = Variable(, self.hidden_size).zero_())
            initial = (initial, initial)
        h_n = []
        c_n = []

        for layer in range(self.num_layers):
            max_time, batch_size, input_size = input.size()
            input_mask, hidden_mask = None, None
                input_mask =, input_size).fill_(1 - self.dropout_in)
                input_mask = Variable(torch.bernoulli(input_mask), requires_grad=False)
                input_mask = input_mask / (1 - self.dropout_in)
                input_mask = torch.unsqueeze(input_mask, dim=2).expand(-1, -1, max_time).permute(2, 0, 1)
                input = input * input_mask

                hidden_mask =, self.hidden_size).fill_(1 - self.dropout_out)
                hidden_mask = Variable(torch.bernoulli(hidden_mask), requires_grad=False)
                hidden_mask = hidden_mask / (1 - self.dropout_out)

            layer_output, (layer_h_n, layer_c_n) = MyLSTM._forward_rnn(cell=self.fcells[layer], \
                input=input, masks=masks, initial=initial, drop_masks=hidden_mask)
            if self.bidirectional:
                blayer_output, (blayer_h_n, blayer_c_n) = MyLSTM._forward_brnn(cell=self.bcells[layer], \
                    input=input, masks=masks, initial=initial, drop_masks=hidden_mask)

            h_n.append([layer_h_n, blayer_h_n], 1) if self.bidirectional else layer_h_n)
            c_n.append([layer_c_n, blayer_c_n], 1) if self.bidirectional else layer_c_n)
            input =[layer_output, blayer_output], 2) if self.bidirectional else layer_output

        h_n = torch.stack(h_n, 0)
        c_n = torch.stack(c_n, 0)

        return input, (h_n, c_n) 
Example #29
Source File:    From simpletransformers with Apache License 2.0 5 votes vote down vote up
def mask_tokens(inputs: torch.Tensor, tokenizer: PreTrainedTokenizer, args) -> Tuple[torch.Tensor, torch.Tensor]:
    """ Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original. """

    if tokenizer.mask_token is None:
        raise ValueError(
            "This tokenizer does not have a mask token which is necessary for masked language modeling."
            "Set 'mlm' to False in args if you want to use this tokenizer."

    labels = inputs.clone()
    # We sample a few tokens in each sequence for masked-LM training
    # (with probability args.mlm_probability defaults to 0.15 in Bert/RoBERTa)
    probability_matrix = torch.full(labels.shape, args.mlm_probability)
    special_tokens_mask = [
        tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist()
    probability_matrix.masked_fill_(torch.tensor(special_tokens_mask, dtype=torch.bool), value=0.0)
    if tokenizer._pad_token is not None:
        padding_mask = labels.eq(tokenizer.pad_token_id)
        probability_matrix.masked_fill_(padding_mask, value=0.0)
    masked_indices = torch.bernoulli(probability_matrix).bool()
    labels[~masked_indices] = -100  # We only compute loss on masked tokens

    if args.model_type == "electra" and False:
        # For ELECTRA, we replace all masked input tokens with tokenizer.mask_token
        inputs[masked_indices] = tokenizer.convert_tokens_to_ids(tokenizer.mask_token)
        # 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK])
        indices_replaced = torch.bernoulli(torch.full(labels.shape, 0.8)).bool() & masked_indices
        inputs[indices_replaced] = tokenizer.convert_tokens_to_ids(tokenizer.mask_token)

        # 10% of the time, we replace masked input tokens with random word
        indices_random = torch.bernoulli(torch.full(labels.shape, 0.5)).bool() & masked_indices & ~indices_replaced
        random_words = torch.randint(len(tokenizer), labels.shape, dtype=torch.long)
        inputs[indices_random] = random_words[indices_random]

        # The rest of the time (10% of the time) we keep the masked input tokens unchanged
    return inputs, labels 
Example #30
Source File:    From SDNet with MIT License 5 votes vote down vote up
def generate_mask(new_data, dropout_p=0.0):
    new_data = (1-dropout_p) * (new_data.zero_() + 1)
    for i in range(new_data.size(0)):
        one = random.randint(0, new_data.size(1) - 1)
        new_data[i][one] = 1
    mask = Variable(1.0/(1 - dropout_p) * torch.bernoulli(new_data), requires_grad=False)
    return mask

# Get positional scores and scores for 'yes', 'no', 'unknown' cases