Python torch.triu() Examples

The following are 30 code examples of torch.triu(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch , or try the search function .
Example #1
Source Project: TVQAplus   Author: jayleicn   File: model_utils.py    License: MIT License 6 votes vote down vote up
def find_max_triples(p1, p2, topN=5, prob_thd=None):
    """ Find a list of (k1, k2) where k1 >= k2 with the maximum values of p1[k1] * p2[k2]
    Args:
        p1 (torch.CudaTensor): (N, L) batched start_idx probabilities
        p2 (torch.CudaTensor): (N, L) batched end_idx probabilities
        topN (int): return topN pairs with highest values
        prob_thd (float):
    Returns:
        batched_sorted_triple: N * [(st_idx, ed_idx, confidence), ...]
    """
    product = torch.bmm(p1.unsqueeze(2), p2.unsqueeze(1))  # (N, L, L), end_idx >= start_idx
    upper_product = torch.stack([torch.triu(p) for p in product]
                                ).data.cpu().numpy()  # (N, L, L) the lower part becomes zeros
    batched_sorted_triple = []
    for idx, e in enumerate(upper_product):
        sorted_triple = topN_array_2d(e, topN=topN)
        if prob_thd is not None:
            sorted_triple = [t for t in sorted_triple if t[2] >= prob_thd]
        batched_sorted_triple.append(sorted_triple)
    return batched_sorted_triple 
Example #2
Source Project: nsf   Author: bayesiains   File: made_test.py    License: MIT License 6 votes vote down vote up
def test_total_mask_random(self):
        features = 10
        hidden_features = 50
        num_blocks = 5
        output_multiplier = 1

        model = made.MADE(
            features=features,
            hidden_features=hidden_features,
            num_blocks=num_blocks,
            output_multiplier=output_multiplier,
            use_residual_blocks=False,
            random_mask=True,
        )
        total_mask = model.initial_layer.mask
        for block in model.blocks:
            self.assertIsInstance(block, made.MaskedFeedforwardBlock)
            total_mask = block.linear.mask @ total_mask
        total_mask = model.final_layer.mask @ total_mask
        total_mask = (total_mask > 0).float()
        self.assertEqual(torch.triu(total_mask), torch.zeros([features, features])) 
Example #3
Source Project: meshed-memory-transformer   Author: aimagelab   File: decoders.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def forward(self, input, encoder_output, mask_encoder):
        # input (b_s, seq_len)
        b_s, seq_len = input.shape[:2]
        mask_queries = (input != self.padding_idx).unsqueeze(-1).float()  # (b_s, seq_len, 1)
        mask_self_attention = torch.triu(torch.ones((seq_len, seq_len), dtype=torch.uint8, device=input.device),
                                         diagonal=1)
        mask_self_attention = mask_self_attention.unsqueeze(0).unsqueeze(0)  # (1, 1, seq_len, seq_len)
        mask_self_attention = mask_self_attention + (input == self.padding_idx).unsqueeze(1).unsqueeze(1).byte()
        mask_self_attention = mask_self_attention.gt(0)  # (b_s, 1, seq_len, seq_len)
        if self._is_stateful:
            self.running_mask_self_attention = torch.cat([self.running_mask_self_attention, mask_self_attention], -1)
            mask_self_attention = self.running_mask_self_attention

        seq = torch.arange(1, seq_len + 1).view(1, -1).expand(b_s, -1).to(input.device)  # (b_s, seq_len)
        seq = seq.masked_fill(mask_queries.squeeze(-1) == 0, 0)
        if self._is_stateful:
            self.running_seq.add_(1)
            seq = self.running_seq

        out = self.word_emb(input) + self.pos_emb(seq)
        for i, l in enumerate(self.layers):
            out = l(out, encoder_output, mask_queries, mask_self_attention, mask_encoder)

        out = self.fc(out)
        return F.log_softmax(out, dim=-1) 
Example #4
Source Project: sodeep   Author: technicolor-research   File: model.py    License: BSD 3-Clause Clear License 6 votes vote down vote up
def comp(self, inpu):
        in_mat1 = torch.triu(inpu.repeat(inpu.size(0), 1), diagonal=1)
        in_mat2 = torch.triu(inpu.repeat(inpu.size(0), 1).t(), diagonal=1)

        comp_first = (in_mat1 - in_mat2)
        comp_second = (in_mat2 - in_mat1)

        std1 = torch.std(comp_first).item()
        std2 = torch.std(comp_second).item()

        comp_first = torch.sigmoid(comp_first * (6.8 / std1))
        comp_second = torch.sigmoid(comp_second * (6.8 / std2))

        comp_first = torch.triu(comp_first, diagonal=1)
        comp_second = torch.triu(comp_second, diagonal=1)

        return (torch.sum(comp_first, 1) + torch.sum(comp_second, 0) + 1) / inpu.size(0) 
Example #5
Source Project: translate   Author: pytorch   File: cloze_transformer_model.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def buffered_future_mask(self, tensor):
        """attend all surounding words except itself
           [[0, -inf, 0]
            [0,  0, -inf]
            [0,  0,   0]]
        The attention map is not ture diagonal since we predict y_{t+1} at time-step t
        """
        dim = tensor.size(0)
        if (
            not hasattr(self, "_future_mask")
            or self._future_mask is None
            or self._future_mask.device != tensor.device
        ):
            self._future_mask = torch.triu(
                utils.fill_with_neg_inf(tensor.new(dim, dim)), 1
            )
            self._future_mask = torch.tril(self._future_mask, 1)
        if self._future_mask.size(0) < dim:
            self._future_mask = torch.triu(
                utils.fill_with_neg_inf(self._future_mask.resize_(dim, dim)), 1
            )
            self._future_mask = torch.tril(self._future_mask, 1)
        return self._future_mask[:dim, :dim] 
Example #6
Source Project: virtex   Author: kdexd   File: textual_heads.py    License: MIT License 6 votes vote down vote up
def _generate_future_mask(
        self, size: int, dtype: torch.dtype, device: torch.device
    ) -> torch.Tensor:
        r"""
        Generate a mask for "future" positions, useful when using this module
        for language modeling.

        Parameters
        ----------
        size: int
        """
        # Default mask is for forward direction. Flip for backward direction.
        mask = torch.triu(
            torch.ones(size, size, device=device, dtype=dtype), diagonal=1
        )
        mask = mask.masked_fill(mask == 1, float("-inf"))
        return mask 
Example #7
Source Project: heat   Author: helmholtz-analytics   File: basics.py    License: MIT License 6 votes vote down vote up
def triu(m, k=0):
    """
    Returns the upper triangular part of the tensor, the other elements of the result tensor are set to 0.

    The upper triangular part of the tensor is defined as the elements on and below the diagonal.

    The argument k controls which diagonal to consider. If k=0, all elements on and below the main diagonal are
    retained. A positive value includes just as many diagonals above the main diagonal, and similarly a negative
    value excludes just as many diagonals below the main diagonal.

    Parameters
    ----------
    m : ht.DNDarray
        Input tensor for which to compute the upper triangle.
    k : int, optional
        Diagonal above which to zero elements. k=0 (default) is the main diagonal, k<0 is below and k>0 is above.

    Returns
    -------
    upper_triangle : ht.DNDarray
        Upper triangle of the input tensor.
    """
    return __tri_op(m, k, torch.triu) 
Example #8
Source Project: pt-ranking.github.io   Author: pt-ranking   File: list_probability.py    License: MIT License 6 votes vote down vote up
def log_ranking_prob_Bradley_Terry(batch_preds):
    '''
    :param batch_preds: [batch_size, list_size]
    :return:
    '''
    assert 2 == len(batch_preds.size())

    max_v = torch.max(batch_preds)
    new_batch_preds = torch.exp(batch_preds - max_v)

    batch_numerators = torch.unsqueeze(new_batch_preds, dim=2).repeat(1, 1, batch_preds.size(1))

    batch_denominaotrs = torch.unsqueeze(new_batch_preds, dim=2) + torch.unsqueeze(new_batch_preds, dim=1)

    batch_BT_probs = batch_numerators / batch_denominaotrs

    batch_log_ranking_prob = torch.sum(torch.sum(torch.triu(torch.log(batch_BT_probs), diagonal=1), dim=2), dim=1)

    return batch_log_ranking_prob 
Example #9
Source Project: exbert   Author: bhoov   File: modeling_bart.py    License: Apache License 2.0 6 votes vote down vote up
def _prepare_bart_decoder_inputs(
    config, input_ids, decoder_input_ids=None, decoder_attn_mask=None,
):
    """Prepare masks that ignore padding tokens  decoder and a causal lm mask for the decoder if
    none are provided. This mimics the default behavior in fairseq. To override it pass in masks.
    """
    pad_token_id = config.pad_token_id
    need_causal_mask = not config.output_past
    if decoder_input_ids is None:
        decoder_input_ids = shift_tokens_right(input_ids, pad_token_id)
    bsz, tgt_len = decoder_input_ids.size()[:2]
    if decoder_attn_mask is None:
        decoder_padding_mask = make_padding_mask(decoder_input_ids, pad_token_id)
        if need_causal_mask:
            causal_lm_mask = torch.triu(fill_with_neg_inf(torch.zeros(tgt_len, tgt_len)), 1)
        else:
            causal_lm_mask = None
        new_shape = (bsz, tgt_len, tgt_len)
        # make it broadcastable so can just be added to the attention coefficients
        decoder_attn_mask = _combine_masks(decoder_padding_mask, causal_lm_mask, new_shape).to(device=input_ids.device)
    assert decoder_attn_mask is None or decoder_attn_mask.shape == (bsz, 1, tgt_len, tgt_len)
    return decoder_input_ids, decoder_attn_mask 
Example #10
Source Project: texar-pytorch   Author: asyml   File: xlnet_encoder.py    License: Apache License 2.0 6 votes vote down vote up
def _create_causal_attn_mask(self,
                                 seq_len: int,
                                 mem_len: int,
                                 same_length: bool = False) -> torch.Tensor:
        r"""Create causal attention mask of shape
        `(seq_len, mem_len + seq_len)`.
        """
        assert self.r_w_bias is not None
        device = self.r_w_bias.device
        attn_mask = torch.ones(seq_len, seq_len, device=device)
        mask_u = torch.triu(attn_mask, diagonal=1)
        attn_mask_pad = torch.zeros(seq_len, mem_len, device=device)
        ret = torch.cat([attn_mask_pad, mask_u], dim=1)
        if same_length:
            mask_l = torch.tril(attn_mask, diagonal=-1)
            ret = torch.cat([ret[:, :seq_len] + mask_l, ret[:, seq_len:]], 1)
        return ret 
Example #11
Source Project: ConvLab   Author: ConvLab   File: Transformer.py    License: MIT License 5 votes vote down vote up
def get_subsequent_mask(seq):
    ''' For masking out the subsequent info. '''

    sz_b, len_s = seq.size()
    subsequent_mask = torch.triu(
        torch.ones((len_s, len_s), device=seq.device, dtype=torch.uint8), diagonal=1)
    subsequent_mask = subsequent_mask.unsqueeze(0).expand(sz_b, -1, -1)  # b x ls x ls

    return subsequent_mask 
Example #12
Source Project: transfer-nlp   Author: feedly   File: model.py    License: MIT License 5 votes vote down vote up
def forward(self, x):
        """ x has shape [batch, seq length]"""

        padding_mask = (x == self.tokenizer.vocab['[PAD]'])

        x = x.transpose(0, 1).contiguous()

        positions = torch.arange(len(x), device=x.device).unsqueeze(-1)
        h = self.tokens_embeddings(x)
        h = h + self.position_embeddings(positions).expand_as(h)
        h = self.dropout(h)

        attn_mask = None
        if self.causal:
            attn_mask = torch.full((len(x), len(x)), -float('Inf'), device=h.device, dtype=h.dtype)
            attn_mask = torch.triu(attn_mask, diagonal=1)

        for layer_norm_1, attention, layer_norm_2, feed_forward in zip(self.layer_norms_1, self.attentions,
                                                                       self.layer_norms_2, self.feed_forwards):
            h = layer_norm_1(h)
            x, _ = attention(h, h, h, attn_mask=attn_mask, need_weights=False, key_padding_mask=padding_mask)
            x = self.dropout(x)
            h = x + h

            h = layer_norm_2(h)
            x = feed_forward(h)
            x = self.dropout(x)
            h = x + h
        return h 
Example #13
Source Project: transfer-nlp   Author: feedly   File: model.py    License: MIT License 5 votes vote down vote up
def forward(self, x):
        """ x has shape [batch, seq length]"""

        padding_mask = (x == self.tokenizer.vocab['[PAD]'])

        x = x.transpose(0, 1).contiguous()

        positions = torch.arange(len(x), device=x.device).unsqueeze(-1)
        h = self.tokens_embeddings(x)
        h = h + self.position_embeddings(positions).expand_as(h)
        h = self.dropout(h)

        attn_mask = None
        if self.causal:
            attn_mask = torch.full((len(x), len(x)), -float('Inf'), device=h.device, dtype=h.dtype)
            attn_mask = torch.triu(attn_mask, diagonal=1)

        for (layer_norm_1, attention, adapter_1, layer_norm_2, feed_forward, adapter_2) \
                in zip(self.layer_norms_1, self.attentions, self.adapters_1,
                       self.layer_norms_2, self.feed_forwards, self.adapters_2):
            h = layer_norm_1(h)
            x, _ = attention(h, h, h, attn_mask=attn_mask, need_weights=False, key_padding_mask=padding_mask)
            x = self.dropout(x)

            x = adapter_1(x) + x  # Add an adapter with a skip-connection after attention module

            h = x + h

            h = layer_norm_2(h)
            x = feed_forward(h)
            x = self.dropout(x)

            x = adapter_2(x) + x  # Add an adapter with a skip-connection after feed-forward module

            h = x + h
        return h 
Example #14
Source Project: transfer-nlp   Author: feedly   File: model.py    License: MIT License 5 votes vote down vote up
def forward(self, x):
        """ x has shape [batch, seq length]"""

        padding_mask = (x == self.tokenizer.vocab['[PAD]'])

        x = x.transpose(0, 1).contiguous()

        positions = torch.arange(len(x), device=x.device).unsqueeze(-1)
        h = self.tokens_embeddings(x)
        h = h + self.position_embeddings(positions).expand_as(h)
        h = self.dropout(h)

        attn_mask = None
        if self.causal:
            attn_mask = torch.full((len(x), len(x)), -float('Inf'), device=h.device, dtype=h.dtype)
            attn_mask = torch.triu(attn_mask, diagonal=1)

        for layer_norm_1, attention, layer_norm_2, feed_forward in zip(self.layer_norms_1, self.attentions,
                                                                       self.layer_norms_2, self.feed_forwards):
            h = layer_norm_1(h)
            x, _ = attention(h, h, h, attn_mask=attn_mask, need_weights=False, key_padding_mask=padding_mask)
            x = self.dropout(x)
            h = x + h

            h = layer_norm_2(h)
            x = feed_forward(h)
            x = self.dropout(x)
            h = x + h
        return h 
Example #15
Source Project: transfer-nlp   Author: feedly   File: model.py    License: MIT License 5 votes vote down vote up
def forward(self, x):
        """ x has shape [batch, seq length]"""

        padding_mask = (x == self.tokenizer.vocab['[PAD]'])

        x = x.transpose(0, 1).contiguous()

        positions = torch.arange(len(x), device=x.device).unsqueeze(-1)
        h = self.tokens_embeddings(x)
        h = h + self.position_embeddings(positions).expand_as(h)
        h = self.dropout(h)

        attn_mask = None
        if self.causal:
            attn_mask = torch.full((len(x), len(x)), -float('Inf'), device=h.device, dtype=h.dtype)
            attn_mask = torch.triu(attn_mask, diagonal=1)

        for (layer_norm_1, attention, adapter_1, layer_norm_2, feed_forward, adapter_2) \
                in zip(self.layer_norms_1, self.attentions, self.adapters_1,
                       self.layer_norms_2, self.feed_forwards, self.adapters_2):
            h = layer_norm_1(h)
            x, _ = attention(h, h, h, attn_mask=attn_mask, need_weights=False, key_padding_mask=padding_mask)
            x = self.dropout(x)

            x = adapter_1(x) + x  # Add an adapter with a skip-connection after attention module

            h = x + h

            h = layer_norm_2(h)
            x = feed_forward(h)
            x = self.dropout(x)

            x = adapter_2(x) + x  # Add an adapter with a skip-connection after feed-forward module

            h = x + h
        return h 
Example #16
Source Project: crosentgec   Author: nusnlp   File: multihead_attention.py    License: GNU General Public License v3.0 5 votes vote down vote up
def buffered_mask(self, tensor):
        dim = tensor.size(-1)
        if self._mask is None:
            self._mask = torch.triu(utils.fill_with_neg_inf(tensor.new(dim, dim)), 1)
        if self._mask.size(0) < dim:
            self._mask = torch.triu(utils.fill_with_neg_inf(self._mask.resize_(dim, dim)), 1)
        return self._mask[:dim, :dim] 
Example #17
Source Project: fairseq   Author: pytorch   File: vggtransformer.py    License: MIT License 5 votes vote down vote up
def buffered_future_mask(self, tensor):
        dim = tensor.size(0)
        if (
            not hasattr(self, "_future_mask")
            or self._future_mask is None
            or self._future_mask.device != tensor.device
        ):
            self._future_mask = torch.triu(
                utils.fill_with_neg_inf(tensor.new(dim, dim)), 1
            )
        if self._future_mask.size(0) < dim:
            self._future_mask = torch.triu(
                utils.fill_with_neg_inf(self._future_mask.resize_(dim, dim)), 1
            )
        return self._future_mask[:dim, :dim] 
Example #18
Source Project: fairseq   Author: pytorch   File: monotonic_multihead_attention.py    License: MIT License 5 votes vote down vote up
def p_choose(self, query, key, key_padding_mask=None, attn_mask=None, incremental_state=None):
        """
        query: bsz, tgt_len
        key: bsz, src_len
        key_padding_mask: bsz, src_len
        """
        src_len, bsz, _ = key.size()
        tgt_len, bsz, _ = query.size()
        p_choose = query.new_ones(bsz, tgt_len, src_len)
        p_choose = torch.tril(p_choose, diagonal=self.waitk_lagging - 1)
        p_choose = torch.triu(p_choose, diagonal=self.waitk_lagging - 1)

        if key_padding_mask is not None and key_padding_mask[:, 0].eq(1).any():
            # Left pad source
            # add -1 to the end
            p_choose = p_choose.masked_fill(key_padding_mask.float().flip(1).unsqueeze(1).bool(), -1)
            p_choose = convert_padding_direction(p_choose.view(-1, src_len).long(), padding_idx=-1, right_to_left=True)
            p_choose = p_choose.view(bsz, tgt_len, src_len).type_as(query)
            # remove -1
            p_choose[p_choose.eq(-1)] = 0

        # Extend to each head
        p_choose = (
            p_choose.contiguous().unsqueeze(1)
            .expand(-1, self.num_heads, -1, -1).contiguous()
            .view(-1, tgt_len, src_len)
        )

        return p_choose 
Example #19
Source Project: fairseq   Author: pytorch   File: transformer.py    License: MIT License 5 votes vote down vote up
def buffered_future_mask(self, tensor):
        dim = tensor.size(0)
        # self._future_mask.device != tensor.device is not working in TorchScript. This is a workaround.
        if (
            self._future_mask.size(0) == 0
            or (not self._future_mask.device == tensor.device)
            or self._future_mask.size(0) < dim
        ):
            self._future_mask = torch.triu(
                utils.fill_with_neg_inf(torch.zeros([dim, dim])), 1
            )
        self._future_mask = self._future_mask.to(tensor)
        return self._future_mask[:dim, :dim] 
Example #20
Source Project: fairseq   Author: pytorch   File: lightconv.py    License: MIT License 5 votes vote down vote up
def buffered_future_mask(self, tensor):
        dim = tensor.size(0)
        if not hasattr(self, '_future_mask') or self._future_mask is None or self._future_mask.device != tensor.device:
            self._future_mask = torch.triu(utils.fill_with_neg_inf(tensor.new(dim, dim)), 1)
        if self._future_mask.size(0) < dim:
            self._future_mask = torch.triu(utils.fill_with_neg_inf(self._future_mask.resize_(dim, dim)), 1)
        return self._future_mask[:dim, :dim] 
Example #21
Source Project: Bert-Chinese-Text-Classification-Pytorch   Author: 649453932   File: modeling_transfo_xl.py    License: MIT License 5 votes vote down vote up
def _parallelogram_mask(self, h, w, left=False):
        mask = torch.ones((h, w)).byte()
        m = min(h, w)
        mask[:m,:m] = torch.triu(mask[:m,:m])
        mask[-m:,-m:] = torch.tril(mask[-m:,-m:])

        if left:
            return mask
        else:
            return mask.flip(0) 
Example #22
Source Project: PySyft   Author: OpenMined   File: test_operations.py    License: Apache License 2.0 5 votes vote down vote up
def test_inv_upper(hook, workers):

    bob = workers["bob"]
    alice = workers["alice"]
    crypto_prov = workers["james"]

    torch.manual_seed(0)  # Truncation might not always work so we set the random seed
    n_cols = 3
    n_rows = 3
    R = torch.triu(torch.randn([n_rows, n_cols]))
    invR = R.inverse()

    R_sh = R.fix_precision(precision_fractional=6).share(bob, alice, crypto_provider=crypto_prov)
    invR_sh = DASH._inv_upper(R_sh)
    assert ((invR - invR_sh.get().float_precision()).abs() < 1e-2).all() 
Example #23
Source Project: translate   Author: pytorch   File: transformer.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def buffered_future_mask(self, tensor):
        dim = tensor.size(0)
        if (
            not hasattr(self, "_future_mask")
            or self._future_mask is None
            or self._future_mask.device != tensor.device
        ):
            self._future_mask = torch.triu(
                utils.fill_with_neg_inf(tensor.new(dim, dim)), 1
            )
        if self._future_mask.size(0) < dim:
            self._future_mask = torch.triu(
                utils.fill_with_neg_inf(self._future_mask.resize_(dim, dim)), 1
            )
        return self._future_mask[:dim, :dim] 
Example #24
Source Project: translate   Author: pytorch   File: deliberation_networks.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def buffered_future_mask(self, tensor):
        dim = tensor.size(0)
        if (
            not hasattr(self, "_future_mask")
            or self._future_mask is None
            or self._future_mask.device != tensor.device
        ):
            self._future_mask = torch.triu(
                utils.fill_with_neg_inf(tensor.new(dim, dim)), 1
            )
        if self._future_mask.size(0) < dim:
            self._future_mask = torch.triu(
                utils.fill_with_neg_inf(self._future_mask.resize_(dim, dim)), 1
            )
        return self._future_mask[:dim, :dim] 
Example #25
Source Project: translate   Author: pytorch   File: transformer_aan.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def buffered_future_mask(self, tensor):
        dim = tensor.size(0)
        if (
            not hasattr(self, "_future_mask")
            or self._future_mask is None
            or self._future_mask.device != tensor.device
        ):
            self._future_mask = torch.triu(
                utils.fill_with_neg_inf(tensor.new(dim, dim)), 1
            )
        if self._future_mask.size(0) < dim:
            self._future_mask = torch.triu(
                utils.fill_with_neg_inf(self._future_mask.resize_(dim, dim)), 1
            )
        return self._future_mask[:dim, :dim] 
Example #26
Source Project: fastNLP   Author: fastnlp   File: Models.py    License: Apache License 2.0 5 votes vote down vote up
def get_subsequent_mask(seq):
    ''' For masking out the subsequent info. '''

    sz_b, len_s = seq.size()
    subsequent_mask = torch.triu(
        torch.ones((len_s, len_s), device=seq.device, dtype=torch.uint8), diagonal=1)
    subsequent_mask = subsequent_mask.unsqueeze(0).expand(sz_b, -1, -1)  # b x ls x ls

    return subsequent_mask 
Example #27
Source Project: graph-generation   Author: JiaxuanYou   File: model.py    License: MIT License 5 votes vote down vote up
def recover_adj_lower(self, l):
        # NOTE: Assumes 1 per minibatch
        adj = torch.zeros(self.max_num_nodes, self.max_num_nodes)
        adj[torch.triu(torch.ones(self.max_num_nodes, self.max_num_nodes)) == 1] = l
        return adj 
Example #28
Source Project: TextClassify   Author: linhaow   File: modeling_transfo_xl.py    License: Apache License 2.0 5 votes vote down vote up
def _parallelogram_mask(self, h, w, left=False):
        mask = torch.ones((h, w)).byte()
        m = min(h, w)
        mask[:m,:m] = torch.triu(mask[:m,:m])
        mask[-m:,-m:] = torch.tril(mask[-m:,-m:])

        if left:
            return mask
        else:
            return mask.flip(0) 
Example #29
Source Project: TextClassify   Author: linhaow   File: modeling_xlnet.py    License: Apache License 2.0 5 votes vote down vote up
def create_mask(self, qlen, mlen):
        """
        Creates causal attention mask. Float mask where 1.0 indicates masked, 0.0 indicates not-masked.

        Args:
            qlen: TODO Lysandre didn't fill
            mlen: TODO Lysandre didn't fill

        ::

                  same_length=False:      same_length=True:
                  <mlen > <  qlen >       <mlen > <  qlen >
               ^ [0 0 0 0 0 1 1 1 1]     [0 0 0 0 0 1 1 1 1]
                 [0 0 0 0 0 0 1 1 1]     [1 0 0 0 0 0 1 1 1]
            qlen [0 0 0 0 0 0 0 1 1]     [1 1 0 0 0 0 0 1 1]
                 [0 0 0 0 0 0 0 0 1]     [1 1 1 0 0 0 0 0 1]
               v [0 0 0 0 0 0 0 0 0]     [1 1 1 1 0 0 0 0 0]

        """
        attn_mask = torch.ones([qlen, qlen])
        mask_up = torch.triu(attn_mask, diagonal=1)
        attn_mask_pad = torch.zeros([qlen, mlen])
        ret = torch.cat([attn_mask_pad, mask_up], dim=1)
        if self.same_length:
            mask_lo = torch.tril(attn_mask, diagonal=-1)
            ret = torch.cat([ret[:, :qlen] + mask_lo, ret[:, qlen:]], dim=1)

        ret = ret.to(next(self.parameters()))
        return ret 
Example #30
Source Project: training_results_v0.5   Author: mlperf   File: multihead_attention.py    License: Apache License 2.0 5 votes vote down vote up
def buffered_mask(self, tensor):
        dim = tensor.size(-1)
        if self._mask is None:
            self._mask = torch.triu(utils.fill_with_neg_inf(tensor.new(dim, dim)), 1)
        if self._mask.size(0) < dim:
            self._mask = torch.triu(utils.fill_with_neg_inf(self._mask.resize_(dim, dim)), 1)
        return self._mask[:dim, :dim]