Python torch.tril() Examples

The following are 30 code examples of torch.tril(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch , or try the search function .
Example #1
Source File: models.py    From optnet with Apache License 2.0 6 votes vote down vote up
def __init__(self, nFeatures, args):
        super().__init__()

        nHidden, neq, nineq = 2*nFeatures-1,0,2*nFeatures-2
        assert(neq==0)

        # self.fc1 = nn.Linear(nFeatures, nHidden)
        self.M = Variable(torch.tril(torch.ones(nHidden, nHidden)).cuda())

        Q = 1e-8*torch.eye(nHidden)
        Q[:nFeatures,:nFeatures] = torch.eye(nFeatures)
        self.L = Variable(torch.potrf(Q))

        self.D = Parameter(0.3*torch.randn(nFeatures-1, nFeatures))
        # self.lam = Parameter(20.*torch.ones(1))
        self.h = Variable(torch.zeros(nineq))

        self.nFeatures = nFeatures
        self.nHidden = nHidden
        self.neq = neq
        self.nineq = nineq
        self.args = args 
Example #2
Source File: multihead_attention.py    From translate with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def apply_masks(scores, batch_size, unseen_mask, src_lengths):
    seq_len = scores.shape[-1]

    # [1, seq_len, seq_len]
    sequence_mask = torch.ones(seq_len, seq_len).unsqueeze(0).int()
    if unseen_mask:
        # [1, seq_len, seq_len]
        sequence_mask = (
            torch.tril(torch.ones(seq_len, seq_len), diagonal=0).unsqueeze(0).int()
        )

    if src_lengths is not None:
        # [batch_size, 1, seq_len]
        src_lengths_mask = create_src_lengths_mask(
            batch_size=batch_size, src_lengths=src_lengths
        ).unsqueeze(-2)

        # [batch_size, seq_len, seq_len]
        sequence_mask = sequence_mask & src_lengths_mask

    # [batch_size, 1, seq_len, seq_len]
    sequence_mask = sequence_mask.unsqueeze(1)

    scores = scores.masked_fill(sequence_mask == 0, -np.inf)
    return scores 
Example #3
Source File: gpt2.py    From fastNLP with Apache License 2.0 6 votes vote down vote up
def __init__(self, nx, n_ctx, config, scale=False):
        super(Attention, self).__init__()

        n_state = nx  # in Attention: n_state=768 (nx=n_embd)
        # [switch nx => n_state from Block to Attention to keep identical to TF implem]
        assert n_state % config.n_head == 0
        self.register_buffer("bias", torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx))
        self.n_head = config.n_head
        self.split_size = n_state
        self.scale = scale

        self.c_attn = Conv1D(n_state * 3, nx)
        self.c_proj = Conv1D(n_state, nx)
        self.attn_dropout = nn.Dropout(config.attn_pdrop)
        self.resid_dropout = nn.Dropout(config.resid_pdrop)
        self.pruned_heads = set() 
Example #4
Source File: flows.py    From pytorch-flows with MIT License 6 votes vote down vote up
def __init__(self, num_inputs):
        super(LUInvertibleMM, self).__init__()
        self.W = torch.Tensor(num_inputs, num_inputs)
        nn.init.orthogonal_(self.W)
        self.L_mask = torch.tril(torch.ones(self.W.size()), -1)
        self.U_mask = self.L_mask.t().clone()

        P, L, U = sp.linalg.lu(self.W.numpy())
        self.P = torch.from_numpy(P)
        self.L = nn.Parameter(torch.from_numpy(L))
        self.U = nn.Parameter(torch.from_numpy(U))

        S = np.diag(U)
        sign_S = np.sign(S)
        log_S = np.log(abs(S))
        self.sign_S = torch.from_numpy(sign_S)
        self.log_S = nn.Parameter(torch.from_numpy(log_S))

        self.I = torch.eye(self.L.size(0)) 
Example #5
Source File: cloze_transformer_model.py    From translate with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def buffered_future_mask(self, tensor):
        """attend all surounding words except itself
           [[0, -inf, 0]
            [0,  0, -inf]
            [0,  0,   0]]
        The attention map is not ture diagonal since we predict y_{t+1} at time-step t
        """
        dim = tensor.size(0)
        if (
            not hasattr(self, "_future_mask")
            or self._future_mask is None
            or self._future_mask.device != tensor.device
        ):
            self._future_mask = torch.triu(
                utils.fill_with_neg_inf(tensor.new(dim, dim)), 1
            )
            self._future_mask = torch.tril(self._future_mask, 1)
        if self._future_mask.size(0) < dim:
            self._future_mask = torch.triu(
                utils.fill_with_neg_inf(self._future_mask.resize_(dim, dim)), 1
            )
            self._future_mask = torch.tril(self._future_mask, 1)
        return self._future_mask[:dim, :dim] 
Example #6
Source File: average_attn.py    From ITDD with MIT License 6 votes vote down vote up
def cumulative_average_mask(self, batch_size, inputs_len):
        """
        Builds the mask to compute the cumulative average as described in
        https://arxiv.org/abs/1805.00631 -- Figure 3

        Args:
            batch_size (int): batch size
            inputs_len (int): length of the inputs

        Returns:
            (`FloatTensor`):

            * A Tensor of shape `[batch_size x input_len x input_len]`
        """

        triangle = torch.tril(torch.ones(inputs_len, inputs_len))
        weights = torch.ones(1, inputs_len) / torch.arange(
            1, inputs_len + 1, dtype=torch.float)
        mask = triangle * weights.transpose(0, 1)

        return mask.unsqueeze(0).expand(batch_size, inputs_len, inputs_len) 
Example #7
Source File: average_attn.py    From encoder-agnostic-adaptation with MIT License 6 votes vote down vote up
def cumulative_average_mask(self, batch_size, inputs_len):
        """
        Builds the mask to compute the cumulative average as described in
        :cite:`DBLP:journals/corr/abs-1805-00631` -- Figure 3

        Args:
            batch_size (int): batch size
            inputs_len (int): length of the inputs

        Returns:
            (FloatTensor):

            * A Tensor of shape ``(batch_size, input_len, input_len)``
        """

        triangle = torch.tril(torch.ones(inputs_len, inputs_len))
        weights = torch.ones(1, inputs_len) / torch.arange(
            1, inputs_len + 1, dtype=torch.float)
        mask = triangle * weights.transpose(0, 1)

        return mask.unsqueeze(0).expand(batch_size, inputs_len, inputs_len) 
Example #8
Source File: bottom_up.py    From Dispersion-based-Clustering with MIT License 6 votes vote down vote up
def select_merge_data_v2(self, u_feas, labels, linkages):
        linkages+=(np.tril(100000 * np.ones((len(u_feas), len(u_feas)))))  # blocking the triangle

        print('Linkage adding')
        for idx in range(len(u_feas)):
            for j in range(idx + 1, len(u_feas)):
                if labels[idx] == labels[j]:
                    linkages[idx, j] = 100000  # set the distance within the same cluster

        ind = np.unravel_index(np.argsort(linkages, axis=None),
                               linkages.shape)  # with axis=None all numbers are sorted and unravel_index transforms the sorted index into ind for each dimension
        idx1 = ind[0]  # the first cluster index
        idx2 = ind[1]  # the second cluster index
        print('Linkage add finished')
        return idx1, idx2


        #after 
Example #9
Source File: bottom_up.py    From Dispersion-based-Clustering with MIT License 6 votes vote down vote up
def select_merge_data(self, u_feas, label, label_to_images,  ratio_n,  dists):
        dists.add_(torch.tril(100000 * torch.ones(len(u_feas), len(u_feas))))

        cnt = torch.FloatTensor([len(label_to_images[label[idx]]) for idx in range(len(u_feas))])
        dists += ratio_n * (cnt.view(1, len(cnt)) + cnt.view(len(cnt), 1))  
        
        for idx in range(len(u_feas)):
            for j in range(idx + 1, len(u_feas)):
                if label[idx] == label[j]:
                    dists[idx, j] = 100000                 

        dists = dists.numpy()
        ind = np.unravel_index(np.argsort(dists, axis=None), dists.shape)          
        idx1 = ind[0]          
        idx2 = ind[1]          
        return idx1, idx2 
Example #10
Source File: transformer_decoder.py    From neutralizing-bias with MIT License 6 votes vote down vote up
def cumulative_average_mask(self, batch_size, inputs_len):
        """
        Builds the mask to compute the cumulative average as described in
        :cite:`DBLP:journals/corr/abs-1805-00631` -- Figure 3

        Args:
            batch_size (int): batch size
            inputs_len (int): length of the inputs

        Returns:
            (FloatTensor):

            * A Tensor of shape ``(batch_size, input_len, input_len)``
        """

        triangle = torch.tril(torch.ones(inputs_len, inputs_len))
        weights = torch.ones(1, inputs_len) / torch.arange(
            1, inputs_len + 1, dtype=torch.float)
        mask = triangle * weights.transpose(0, 1)

        return mask.unsqueeze(0).expand(batch_size, inputs_len, inputs_len) 
Example #11
Source File: transformer.py    From DISTRE with Apache License 2.0 6 votes vote down vote up
def __init__(self,
                 nx: int,
                 n_ctx: int,
                 config: TransformerConfig,
                 scale: bool = False) -> None:
        super().__init__()
        n_state = nx  # in Attention: n_state=768 (nx=n_embd)
        # [switch nx => n_state from Block to Attention to keep identical to TF implem]
        assert n_state % config.num_heads == 0
        self.register_buffer('b', torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx))
        self.n_head = config.num_heads
        self.split_size = n_state
        self.scale = scale
        self.c_attn = Conv1D(n_state * 3, 1, nx)
        self.c_proj = Conv1D(n_state, 1, nx)
        self.attn_dropout = torch.nn.Dropout(config.attention_dropout_probability)
        self.resid_dropout = torch.nn.Dropout(config.residual_dropout_probability) 
Example #12
Source File: modeling_openai.py    From TextClassify with Apache License 2.0 6 votes vote down vote up
def __init__(self, nx, n_ctx, config, scale=False):
        super(Attention, self).__init__()
        n_state = nx  # in Attention: n_state=768 (nx=n_embd)
        # [switch nx => n_state from Block to Attention to keep identical to TF implem]
        assert n_state % config.n_head == 0
        self.register_buffer("bias", torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx))
        self.n_head = config.n_head
        self.split_size = n_state
        self.scale = scale

        self.output_attentions = config.output_attentions

        self.c_attn = Conv1D(n_state * 3, nx)
        self.c_proj = Conv1D(n_state, nx)
        self.attn_dropout = nn.Dropout(config.attn_pdrop)
        self.resid_dropout = nn.Dropout(config.resid_pdrop) 
Example #13
Source File: qr.py    From tensorgrad with Apache License 2.0 6 votes vote down vote up
def _simple_qr_backward(q, r, dq, dr):
    if r.shape[-2] != r.shape[-1]:
        raise NotImplementedError("QrGrad not implemented when ncols > nrows "
                          "or full_matrices is true and ncols != nrows.")

    qdq = q.t() @ dq
    qdq_ = qdq - qdq.t()
    rdr = r @ dr.t()
    rdr_ = rdr - rdr.t()
    tril = torch.tril(qdq_ + rdr_)

    def _TriangularSolve(x, r):
        """Equiv to x @ torch.inverse(r).t() if r is upper-tri."""
        res = torch.triangular_solve(x.t(), r, upper=True, transpose=False)[0].t()
        return res

    grad_a = q @ (dr + _TriangularSolve(tril, r))
    grad_b = _TriangularSolve(dq - q @ qdq, r)
    return grad_a + grad_b 
Example #14
Source File: modeling_gpt2.py    From exbert with Apache License 2.0 6 votes vote down vote up
def __init__(self, nx, n_ctx, config, scale=False):
        super().__init__()
        self.output_attentions = config.output_attentions
        self.output_additional_info = config.output_additional_info

        n_state = nx  # in Attention: n_state=768 (nx=n_embd)
        # [switch nx => n_state from Block to Attention to keep identical to TF implem]
        assert n_state % config.n_head == 0
        self.register_buffer("bias", torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx))
        self.n_head = config.n_head
        self.split_size = n_state
        self.scale = scale

        self.c_attn = Conv1D(n_state * 3, nx)
        self.c_proj = Conv1D(n_state, nx)
        self.attn_dropout = nn.Dropout(config.attn_pdrop)
        self.resid_dropout = nn.Dropout(config.resid_pdrop)
        self.pruned_heads = set() 
Example #15
Source File: modeling_gpt2.py    From TextClassify with Apache License 2.0 6 votes vote down vote up
def __init__(self, nx, n_ctx, config, scale=False):
        super(Attention, self).__init__()
        self.output_attentions = config.output_attentions

        n_state = nx  # in Attention: n_state=768 (nx=n_embd)
        # [switch nx => n_state from Block to Attention to keep identical to TF implem]
        assert n_state % config.n_head == 0
        self.register_buffer("bias", torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx))
        self.n_head = config.n_head
        self.split_size = n_state
        self.scale = scale

        self.c_attn = Conv1D(n_state * 3, nx)
        self.c_proj = Conv1D(n_state, nx)
        self.attn_dropout = nn.Dropout(config.attn_pdrop)
        self.resid_dropout = nn.Dropout(config.resid_pdrop) 
Example #16
Source File: modeling_openai.py    From exbert with Apache License 2.0 6 votes vote down vote up
def __init__(self, nx, n_ctx, config, scale=False):
        super().__init__()
        n_state = nx  # in Attention: n_state=768 (nx=n_embd)
        # [switch nx => n_state from Block to Attention to keep identical to TF implem]
        assert n_state % config.n_head == 0
        self.register_buffer("bias", torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx))
        self.n_head = config.n_head
        self.split_size = n_state
        self.scale = scale

        self.output_attentions = config.output_attentions

        self.c_attn = Conv1D(n_state * 3, nx)
        self.c_proj = Conv1D(n_state, nx)
        self.attn_dropout = nn.Dropout(config.attn_pdrop)
        self.resid_dropout = nn.Dropout(config.resid_pdrop)
        self.pruned_heads = set() 
Example #17
Source File: attention.py    From adeptRL with GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, height, width, nb_channel, nb_head, scale=False):
        super(RelationalMHDPA, self).__init__()
        # [switch nx => n_state from Block to Attention to keep identical to TF implem]
        assert nb_channel % nb_head == 0
        seq_len = height * width
        self.register_buffer(
            "b",
            torch.tril(torch.ones(seq_len, seq_len)).view(
                1, 1, seq_len, seq_len
            ),
        )
        self.nb_head = nb_head
        self.split_size = nb_channel
        self.scale = scale
        self.projection = nn.Linear(nb_channel, nb_channel * 3)
        self.mlp = nn.Linear(nb_channel, nb_channel) 
Example #18
Source File: modeling_gpt2.py    From NLP_Toolkit with Apache License 2.0 6 votes vote down vote up
def __init__(self, nx, n_ctx, config, scale=False):
        super().__init__()
        self.output_attentions = config.output_attentions

        n_state = nx  # in Attention: n_state=768 (nx=n_embd)
        # [switch nx => n_state from Block to Attention to keep identical to TF implem]
        assert n_state % config.n_head == 0
        self.register_buffer(
            "bias", torch.tril(torch.ones((n_ctx, n_ctx), dtype=torch.uint8)).view(1, 1, n_ctx, n_ctx)
        )
        self.register_buffer("masked_bias", torch.tensor(-1e4))
        self.n_head = config.n_head
        self.split_size = n_state
        self.scale = scale

        self.c_attn = Conv1D(n_state * 3, nx)
        self.c_proj = Conv1D(n_state, nx)
        self.attn_dropout = nn.Dropout(config.attn_pdrop)
        self.resid_dropout = nn.Dropout(config.resid_pdrop)
        self.pruned_heads = set() 
Example #19
Source File: modeling_gpt2.py    From NLP_Toolkit with Apache License 2.0 6 votes vote down vote up
def __init__(self, nx, n_ctx, config, scale=False):
        super(Attention, self).__init__()
        self.output_attentions = config.output_attentions

        n_state = nx  # in Attention: n_state=768 (nx=n_embd)
        # [switch nx => n_state from Block to Attention to keep identical to TF implem]
        assert n_state % config.n_head == 0
        self.register_buffer("bias", torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx))
        self.n_head = config.n_head
        self.split_size = n_state
        self.scale = scale

        self.c_attn = Conv1D(n_state * 3, nx)
        self.c_proj = Conv1D(n_state, nx)
        self.attn_dropout = nn.Dropout(config.attn_pdrop)
        self.resid_dropout = nn.Dropout(config.resid_pdrop) 
Example #20
Source File: mask.py    From espnet with Apache License 2.0 6 votes vote down vote up
def subsequent_mask(size, device="cpu", dtype=datatype):
    """Create mask for subsequent steps (1, size, size).

    :param int size: size of mask
    :param str device: "cpu" or "cuda" or torch.Tensor.device
    :param torch.dtype dtype: result dtype
    :rtype: torch.Tensor
    >>> subsequent_mask(3)
    [[1, 0, 0],
     [1, 1, 0],
     [1, 1, 1]]
    """
    if is_torch_1_2 and dtype == torch.bool:
        # torch=1.2 doesn't support tril for bool tensor
        ret = torch.ones(size, size, device=device, dtype=torch.uint8)
        return torch.tril(ret, out=ret).type(dtype)
    else:
        ret = torch.ones(size, size, device=device, dtype=dtype)
        return torch.tril(ret, out=ret) 
Example #21
Source File: basics.py    From heat with MIT License 6 votes vote down vote up
def tril(m, k=0):
    """
    Returns the lower triangular part of the tensor, the other elements of the result tensor are set to 0.

    The lower triangular part of the tensor is defined as the elements on and below the diagonal.

    The argument k controls which diagonal to consider. If k=0, all elements on and below the main diagonal are
    retained. A positive value includes just as many diagonals above the main diagonal, and similarly a negative
    value excludes just as many diagonals below the main diagonal.

    Parameters
    ----------
    m : ht.DNDarray
        Input tensor for which to compute the lower triangle.
    k : int, optional
        Diagonal above which to zero elements. k=0 (default) is the main diagonal, k<0 is below and k>0 is above.

    Returns
    -------
    lower_triangle : ht.DNDarray
        Lower triangle of the input tensor.
    """
    return __tri_op(m, k, torch.tril) 
Example #22
Source File: openai_transformer.py    From tsalib with Apache License 2.0 6 votes vote down vote up
def __init__(self,
                 nx: int,
                 n_ctx: int,
                 config: TransformerConfig,
                 scale: bool = False) -> None:
        super().__init__()
        self.nx = nx
        n_state = nx  # in Attention: n_state=768 (nx=n_embd)
        # [switch nx => n_state from Block to Attention to keep identical to TF implem]
        assert n_state % config.num_heads == 0
        self.register_buffer('b', torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx))
        self.n_head = config.num_heads
        self.split_size = n_state
        self.scale = scale
        self.c_attn = Conv1D(n_state * 3, 1, nx)
        self.c_proj = Conv1D(n_state, 1, nx)
        self.attn_dropout = torch.nn.Dropout(config.attention_dropout_probability)
        self.resid_dropout = torch.nn.Dropout(config.residual_dropout_probability) 
Example #23
Source File: seq2seq_loader.py    From unilm with MIT License 6 votes vote down vote up
def __init__(self, vocab_words, indexer, max_len=512, max_tgt_length=128, new_segment_ids=False, mode="s2s", num_qkv=0, s2s_special_token=False, s2s_add_segment=False, s2s_share_segment=False, pos_shift=False):
        super().__init__()
        self.max_len = max_len
        self.vocab_words = vocab_words  # vocabulary (sub)words
        self.indexer = indexer  # function from token to token index
        self.max_len = max_len
        self._tril_matrix = torch.tril(torch.ones(
            (max_len, max_len), dtype=torch.long))
        self.new_segment_ids = new_segment_ids
        self.task_idx = 3   # relax projection layer for different tasks
        assert mode in ("s2s", "l2r")
        self.mode = mode
        self.max_tgt_length = max_tgt_length
        self.num_qkv = num_qkv
        self.s2s_special_token = s2s_special_token
        self.s2s_add_segment = s2s_add_segment
        self.s2s_share_segment = s2s_share_segment
        self.pos_shift = pos_shift 
Example #24
Source File: modeling_openai.py    From CCF-BDCI-Sentiment-Analysis-Baseline with Apache License 2.0 6 votes vote down vote up
def __init__(self, nx, n_ctx, config, scale=False):
        super(Attention, self).__init__()
        n_state = nx  # in Attention: n_state=768 (nx=n_embd)
        # [switch nx => n_state from Block to Attention to keep identical to TF implem]
        assert n_state % config.n_head == 0
        self.register_buffer("bias", torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx))
        self.n_head = config.n_head
        self.split_size = n_state
        self.scale = scale

        self.output_attentions = config.output_attentions

        self.c_attn = Conv1D(n_state * 3, nx)
        self.c_proj = Conv1D(n_state, nx)
        self.attn_dropout = nn.Dropout(config.attn_pdrop)
        self.resid_dropout = nn.Dropout(config.resid_pdrop) 
Example #25
Source File: s2s_loader.py    From unilm with MIT License 6 votes vote down vote up
def __init__(self, vocab_words, indexer, max_len=512, max_tgt_length=128, 
                 mode="s2s", pos_shift=False, source_type_id=0, target_type_id=1, 
                 cls_token='[CLS]', sep_token='[SEP]', pad_token='[PAD]'):
        super().__init__()
        self.max_len = max_len
        self.vocab_words = vocab_words  # vocabulary (sub)words
        self.indexer = indexer  # function from token to token index
        self.max_len = max_len
        self._tril_matrix = torch.tril(torch.ones((max_len, max_len), dtype=torch.long))
        self.task_idx = 3   # relax projection layer for different tasks
        assert mode in ("s2s", "l2r")
        self.mode = mode
        self.max_tgt_length = max_tgt_length
        self.pos_shift = pos_shift

        self.cls_token = cls_token
        self.sep_token = sep_token
        self.pad_token = pad_token

        self.source_type_id = source_type_id
        self.target_type_id = target_type_id

        self.cc = 0 
Example #26
Source File: modeling_gpt2.py    From CCF-BDCI-Sentiment-Analysis-Baseline with Apache License 2.0 6 votes vote down vote up
def __init__(self, nx, n_ctx, config, scale=False):
        super(Attention, self).__init__()
        self.output_attentions = config.output_attentions

        n_state = nx  # in Attention: n_state=768 (nx=n_embd)
        # [switch nx => n_state from Block to Attention to keep identical to TF implem]
        assert n_state % config.n_head == 0
        self.register_buffer("bias", torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx))
        self.n_head = config.n_head
        self.split_size = n_state
        self.scale = scale

        self.c_attn = Conv1D(n_state * 3, nx)
        self.c_proj = Conv1D(n_state, nx)
        self.attn_dropout = nn.Dropout(config.attn_pdrop)
        self.resid_dropout = nn.Dropout(config.resid_pdrop) 
Example #27
Source File: openai_transformer.py    From magnitude with MIT License 6 votes vote down vote up
def __init__(self,
                 nx     ,
                 n_ctx     ,
                 config                   ,
                 scale       = False)        :
        super(Attention, self).__init__()
        n_state = nx  # in Attention: n_state=768 (nx=n_embd)
        # [switch nx => n_state from Block to Attention to keep identical to TF implem]
        assert n_state % config.num_heads == 0
        self.register_buffer(u'b', torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx))
        self.n_head = config.num_heads
        self.split_size = n_state
        self.scale = scale
        self.c_attn = Conv1D(n_state * 3, 1, nx)
        self.c_proj = Conv1D(n_state, 1, nx)
        self.attn_dropout = torch.nn.Dropout(config.attention_dropout_probability)
        self.resid_dropout = torch.nn.Dropout(config.residual_dropout_probability) 
Example #28
Source File: models.py    From optnet with Apache License 2.0 6 votes vote down vote up
def __init__(self, nHidden=50, nineq=200, neq=0, eps=1e-4):
        super(LenetOptNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, kernel_size=5)
        self.conv2 = nn.Conv2d(20, 50, kernel_size=5)

        self.qp_o = nn.Linear(50*4*4, nHidden)
        self.qp_z0 = nn.Linear(50*4*4, nHidden)
        self.qp_s0 = nn.Linear(50*4*4, nineq)

        assert(neq==0)
        self.M = Variable(torch.tril(torch.ones(nHidden, nHidden)).cuda())
        self.L = Parameter(torch.tril(torch.rand(nHidden, nHidden).cuda()))
        self.G = Parameter(torch.Tensor(nineq,nHidden).uniform_(-1,1).cuda())
        # self.z0 = Parameter(torch.zeros(nHidden).cuda())
        # self.s0 = Parameter(torch.ones(nineq).cuda())

        self.nHidden = nHidden
        self.nineq = nineq
        self.neq = neq
        self.eps = eps 
Example #29
Source File: bottom_up.py    From Dispersion-based-Clustering with MIT License 6 votes vote down vote up
def select_merge_data(self, u_feas, label, label_to_images,  ratio_n,  dists):
        dists.add_(torch.tril(100000 * torch.ones(len(u_feas), len(u_feas))))#blocking the triangle

        cnt = torch.FloatTensor([len(label_to_images[label[idx]]) for idx in range(len(u_feas))])
        dists += ratio_n * (cnt.view(1, len(cnt)) + cnt.view(len(cnt), 1))  # dist += |A|+|B|
        
        for idx in range(len(u_feas)):
            for j in range(idx + 1, len(u_feas)):
                if label[idx] == label[j]:
                    dists[idx, j] = 100000                  # set the distance within the same cluster

        dists = dists.numpy()
        ind = np.unravel_index(np.argsort(dists, axis=None), dists.shape)          # with axis=None all numbers are sorted and unravel_index transforms the sorted index into ind for each dimension
        idx1 = ind[0]          # the first dimension index
        idx2 = ind[1]           # the second dimension index
        return idx1, idx2 
Example #30
Source File: modeling_transfo_xl.py    From bert_on_stilts with Apache License 2.0 5 votes vote down vote up
def _parallelogram_mask(self, h, w, left=False):
        mask = torch.ones((h, w)).byte()
        m = min(h, w)
        mask[:m,:m] = torch.triu(mask[:m,:m])
        mask[-m:,-m:] = torch.tril(mask[-m:,-m:])

        if left:
            return mask
        else:
            return mask.flip(0)