Python torch.tril() Examples
The following are 30
code examples of torch.tril().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch
, or try the search function
.
![](https://www.programcreek.com/common/static/images/search.png)
Example #1
Source File: models.py From optnet with Apache License 2.0 | 6 votes |
def __init__(self, nFeatures, args): super().__init__() nHidden, neq, nineq = 2*nFeatures-1,0,2*nFeatures-2 assert(neq==0) # self.fc1 = nn.Linear(nFeatures, nHidden) self.M = Variable(torch.tril(torch.ones(nHidden, nHidden)).cuda()) Q = 1e-8*torch.eye(nHidden) Q[:nFeatures,:nFeatures] = torch.eye(nFeatures) self.L = Variable(torch.potrf(Q)) self.D = Parameter(0.3*torch.randn(nFeatures-1, nFeatures)) # self.lam = Parameter(20.*torch.ones(1)) self.h = Variable(torch.zeros(nineq)) self.nFeatures = nFeatures self.nHidden = nHidden self.neq = neq self.nineq = nineq self.args = args
Example #2
Source File: multihead_attention.py From translate with BSD 3-Clause "New" or "Revised" License | 6 votes |
def apply_masks(scores, batch_size, unseen_mask, src_lengths): seq_len = scores.shape[-1] # [1, seq_len, seq_len] sequence_mask = torch.ones(seq_len, seq_len).unsqueeze(0).int() if unseen_mask: # [1, seq_len, seq_len] sequence_mask = ( torch.tril(torch.ones(seq_len, seq_len), diagonal=0).unsqueeze(0).int() ) if src_lengths is not None: # [batch_size, 1, seq_len] src_lengths_mask = create_src_lengths_mask( batch_size=batch_size, src_lengths=src_lengths ).unsqueeze(-2) # [batch_size, seq_len, seq_len] sequence_mask = sequence_mask & src_lengths_mask # [batch_size, 1, seq_len, seq_len] sequence_mask = sequence_mask.unsqueeze(1) scores = scores.masked_fill(sequence_mask == 0, -np.inf) return scores
Example #3
Source File: gpt2.py From fastNLP with Apache License 2.0 | 6 votes |
def __init__(self, nx, n_ctx, config, scale=False): super(Attention, self).__init__() n_state = nx # in Attention: n_state=768 (nx=n_embd) # [switch nx => n_state from Block to Attention to keep identical to TF implem] assert n_state % config.n_head == 0 self.register_buffer("bias", torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx)) self.n_head = config.n_head self.split_size = n_state self.scale = scale self.c_attn = Conv1D(n_state * 3, nx) self.c_proj = Conv1D(n_state, nx) self.attn_dropout = nn.Dropout(config.attn_pdrop) self.resid_dropout = nn.Dropout(config.resid_pdrop) self.pruned_heads = set()
Example #4
Source File: flows.py From pytorch-flows with MIT License | 6 votes |
def __init__(self, num_inputs): super(LUInvertibleMM, self).__init__() self.W = torch.Tensor(num_inputs, num_inputs) nn.init.orthogonal_(self.W) self.L_mask = torch.tril(torch.ones(self.W.size()), -1) self.U_mask = self.L_mask.t().clone() P, L, U = sp.linalg.lu(self.W.numpy()) self.P = torch.from_numpy(P) self.L = nn.Parameter(torch.from_numpy(L)) self.U = nn.Parameter(torch.from_numpy(U)) S = np.diag(U) sign_S = np.sign(S) log_S = np.log(abs(S)) self.sign_S = torch.from_numpy(sign_S) self.log_S = nn.Parameter(torch.from_numpy(log_S)) self.I = torch.eye(self.L.size(0))
Example #5
Source File: cloze_transformer_model.py From translate with BSD 3-Clause "New" or "Revised" License | 6 votes |
def buffered_future_mask(self, tensor): """attend all surounding words except itself [[0, -inf, 0] [0, 0, -inf] [0, 0, 0]] The attention map is not ture diagonal since we predict y_{t+1} at time-step t """ dim = tensor.size(0) if ( not hasattr(self, "_future_mask") or self._future_mask is None or self._future_mask.device != tensor.device ): self._future_mask = torch.triu( utils.fill_with_neg_inf(tensor.new(dim, dim)), 1 ) self._future_mask = torch.tril(self._future_mask, 1) if self._future_mask.size(0) < dim: self._future_mask = torch.triu( utils.fill_with_neg_inf(self._future_mask.resize_(dim, dim)), 1 ) self._future_mask = torch.tril(self._future_mask, 1) return self._future_mask[:dim, :dim]
Example #6
Source File: average_attn.py From ITDD with MIT License | 6 votes |
def cumulative_average_mask(self, batch_size, inputs_len): """ Builds the mask to compute the cumulative average as described in https://arxiv.org/abs/1805.00631 -- Figure 3 Args: batch_size (int): batch size inputs_len (int): length of the inputs Returns: (`FloatTensor`): * A Tensor of shape `[batch_size x input_len x input_len]` """ triangle = torch.tril(torch.ones(inputs_len, inputs_len)) weights = torch.ones(1, inputs_len) / torch.arange( 1, inputs_len + 1, dtype=torch.float) mask = triangle * weights.transpose(0, 1) return mask.unsqueeze(0).expand(batch_size, inputs_len, inputs_len)
Example #7
Source File: average_attn.py From encoder-agnostic-adaptation with MIT License | 6 votes |
def cumulative_average_mask(self, batch_size, inputs_len): """ Builds the mask to compute the cumulative average as described in :cite:`DBLP:journals/corr/abs-1805-00631` -- Figure 3 Args: batch_size (int): batch size inputs_len (int): length of the inputs Returns: (FloatTensor): * A Tensor of shape ``(batch_size, input_len, input_len)`` """ triangle = torch.tril(torch.ones(inputs_len, inputs_len)) weights = torch.ones(1, inputs_len) / torch.arange( 1, inputs_len + 1, dtype=torch.float) mask = triangle * weights.transpose(0, 1) return mask.unsqueeze(0).expand(batch_size, inputs_len, inputs_len)
Example #8
Source File: bottom_up.py From Dispersion-based-Clustering with MIT License | 6 votes |
def select_merge_data_v2(self, u_feas, labels, linkages): linkages+=(np.tril(100000 * np.ones((len(u_feas), len(u_feas))))) # blocking the triangle print('Linkage adding') for idx in range(len(u_feas)): for j in range(idx + 1, len(u_feas)): if labels[idx] == labels[j]: linkages[idx, j] = 100000 # set the distance within the same cluster ind = np.unravel_index(np.argsort(linkages, axis=None), linkages.shape) # with axis=None all numbers are sorted and unravel_index transforms the sorted index into ind for each dimension idx1 = ind[0] # the first cluster index idx2 = ind[1] # the second cluster index print('Linkage add finished') return idx1, idx2 #after
Example #9
Source File: bottom_up.py From Dispersion-based-Clustering with MIT License | 6 votes |
def select_merge_data(self, u_feas, label, label_to_images, ratio_n, dists): dists.add_(torch.tril(100000 * torch.ones(len(u_feas), len(u_feas)))) cnt = torch.FloatTensor([len(label_to_images[label[idx]]) for idx in range(len(u_feas))]) dists += ratio_n * (cnt.view(1, len(cnt)) + cnt.view(len(cnt), 1)) for idx in range(len(u_feas)): for j in range(idx + 1, len(u_feas)): if label[idx] == label[j]: dists[idx, j] = 100000 dists = dists.numpy() ind = np.unravel_index(np.argsort(dists, axis=None), dists.shape) idx1 = ind[0] idx2 = ind[1] return idx1, idx2
Example #10
Source File: transformer_decoder.py From neutralizing-bias with MIT License | 6 votes |
def cumulative_average_mask(self, batch_size, inputs_len): """ Builds the mask to compute the cumulative average as described in :cite:`DBLP:journals/corr/abs-1805-00631` -- Figure 3 Args: batch_size (int): batch size inputs_len (int): length of the inputs Returns: (FloatTensor): * A Tensor of shape ``(batch_size, input_len, input_len)`` """ triangle = torch.tril(torch.ones(inputs_len, inputs_len)) weights = torch.ones(1, inputs_len) / torch.arange( 1, inputs_len + 1, dtype=torch.float) mask = triangle * weights.transpose(0, 1) return mask.unsqueeze(0).expand(batch_size, inputs_len, inputs_len)
Example #11
Source File: transformer.py From DISTRE with Apache License 2.0 | 6 votes |
def __init__(self, nx: int, n_ctx: int, config: TransformerConfig, scale: bool = False) -> None: super().__init__() n_state = nx # in Attention: n_state=768 (nx=n_embd) # [switch nx => n_state from Block to Attention to keep identical to TF implem] assert n_state % config.num_heads == 0 self.register_buffer('b', torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx)) self.n_head = config.num_heads self.split_size = n_state self.scale = scale self.c_attn = Conv1D(n_state * 3, 1, nx) self.c_proj = Conv1D(n_state, 1, nx) self.attn_dropout = torch.nn.Dropout(config.attention_dropout_probability) self.resid_dropout = torch.nn.Dropout(config.residual_dropout_probability)
Example #12
Source File: modeling_openai.py From TextClassify with Apache License 2.0 | 6 votes |
def __init__(self, nx, n_ctx, config, scale=False): super(Attention, self).__init__() n_state = nx # in Attention: n_state=768 (nx=n_embd) # [switch nx => n_state from Block to Attention to keep identical to TF implem] assert n_state % config.n_head == 0 self.register_buffer("bias", torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx)) self.n_head = config.n_head self.split_size = n_state self.scale = scale self.output_attentions = config.output_attentions self.c_attn = Conv1D(n_state * 3, nx) self.c_proj = Conv1D(n_state, nx) self.attn_dropout = nn.Dropout(config.attn_pdrop) self.resid_dropout = nn.Dropout(config.resid_pdrop)
Example #13
Source File: qr.py From tensorgrad with Apache License 2.0 | 6 votes |
def _simple_qr_backward(q, r, dq, dr): if r.shape[-2] != r.shape[-1]: raise NotImplementedError("QrGrad not implemented when ncols > nrows " "or full_matrices is true and ncols != nrows.") qdq = q.t() @ dq qdq_ = qdq - qdq.t() rdr = r @ dr.t() rdr_ = rdr - rdr.t() tril = torch.tril(qdq_ + rdr_) def _TriangularSolve(x, r): """Equiv to x @ torch.inverse(r).t() if r is upper-tri.""" res = torch.triangular_solve(x.t(), r, upper=True, transpose=False)[0].t() return res grad_a = q @ (dr + _TriangularSolve(tril, r)) grad_b = _TriangularSolve(dq - q @ qdq, r) return grad_a + grad_b
Example #14
Source File: modeling_gpt2.py From exbert with Apache License 2.0 | 6 votes |
def __init__(self, nx, n_ctx, config, scale=False): super().__init__() self.output_attentions = config.output_attentions self.output_additional_info = config.output_additional_info n_state = nx # in Attention: n_state=768 (nx=n_embd) # [switch nx => n_state from Block to Attention to keep identical to TF implem] assert n_state % config.n_head == 0 self.register_buffer("bias", torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx)) self.n_head = config.n_head self.split_size = n_state self.scale = scale self.c_attn = Conv1D(n_state * 3, nx) self.c_proj = Conv1D(n_state, nx) self.attn_dropout = nn.Dropout(config.attn_pdrop) self.resid_dropout = nn.Dropout(config.resid_pdrop) self.pruned_heads = set()
Example #15
Source File: modeling_gpt2.py From TextClassify with Apache License 2.0 | 6 votes |
def __init__(self, nx, n_ctx, config, scale=False): super(Attention, self).__init__() self.output_attentions = config.output_attentions n_state = nx # in Attention: n_state=768 (nx=n_embd) # [switch nx => n_state from Block to Attention to keep identical to TF implem] assert n_state % config.n_head == 0 self.register_buffer("bias", torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx)) self.n_head = config.n_head self.split_size = n_state self.scale = scale self.c_attn = Conv1D(n_state * 3, nx) self.c_proj = Conv1D(n_state, nx) self.attn_dropout = nn.Dropout(config.attn_pdrop) self.resid_dropout = nn.Dropout(config.resid_pdrop)
Example #16
Source File: modeling_openai.py From exbert with Apache License 2.0 | 6 votes |
def __init__(self, nx, n_ctx, config, scale=False): super().__init__() n_state = nx # in Attention: n_state=768 (nx=n_embd) # [switch nx => n_state from Block to Attention to keep identical to TF implem] assert n_state % config.n_head == 0 self.register_buffer("bias", torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx)) self.n_head = config.n_head self.split_size = n_state self.scale = scale self.output_attentions = config.output_attentions self.c_attn = Conv1D(n_state * 3, nx) self.c_proj = Conv1D(n_state, nx) self.attn_dropout = nn.Dropout(config.attn_pdrop) self.resid_dropout = nn.Dropout(config.resid_pdrop) self.pruned_heads = set()
Example #17
Source File: attention.py From adeptRL with GNU General Public License v3.0 | 6 votes |
def __init__(self, height, width, nb_channel, nb_head, scale=False): super(RelationalMHDPA, self).__init__() # [switch nx => n_state from Block to Attention to keep identical to TF implem] assert nb_channel % nb_head == 0 seq_len = height * width self.register_buffer( "b", torch.tril(torch.ones(seq_len, seq_len)).view( 1, 1, seq_len, seq_len ), ) self.nb_head = nb_head self.split_size = nb_channel self.scale = scale self.projection = nn.Linear(nb_channel, nb_channel * 3) self.mlp = nn.Linear(nb_channel, nb_channel)
Example #18
Source File: modeling_gpt2.py From NLP_Toolkit with Apache License 2.0 | 6 votes |
def __init__(self, nx, n_ctx, config, scale=False): super().__init__() self.output_attentions = config.output_attentions n_state = nx # in Attention: n_state=768 (nx=n_embd) # [switch nx => n_state from Block to Attention to keep identical to TF implem] assert n_state % config.n_head == 0 self.register_buffer( "bias", torch.tril(torch.ones((n_ctx, n_ctx), dtype=torch.uint8)).view(1, 1, n_ctx, n_ctx) ) self.register_buffer("masked_bias", torch.tensor(-1e4)) self.n_head = config.n_head self.split_size = n_state self.scale = scale self.c_attn = Conv1D(n_state * 3, nx) self.c_proj = Conv1D(n_state, nx) self.attn_dropout = nn.Dropout(config.attn_pdrop) self.resid_dropout = nn.Dropout(config.resid_pdrop) self.pruned_heads = set()
Example #19
Source File: modeling_gpt2.py From NLP_Toolkit with Apache License 2.0 | 6 votes |
def __init__(self, nx, n_ctx, config, scale=False): super(Attention, self).__init__() self.output_attentions = config.output_attentions n_state = nx # in Attention: n_state=768 (nx=n_embd) # [switch nx => n_state from Block to Attention to keep identical to TF implem] assert n_state % config.n_head == 0 self.register_buffer("bias", torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx)) self.n_head = config.n_head self.split_size = n_state self.scale = scale self.c_attn = Conv1D(n_state * 3, nx) self.c_proj = Conv1D(n_state, nx) self.attn_dropout = nn.Dropout(config.attn_pdrop) self.resid_dropout = nn.Dropout(config.resid_pdrop)
Example #20
Source File: mask.py From espnet with Apache License 2.0 | 6 votes |
def subsequent_mask(size, device="cpu", dtype=datatype): """Create mask for subsequent steps (1, size, size). :param int size: size of mask :param str device: "cpu" or "cuda" or torch.Tensor.device :param torch.dtype dtype: result dtype :rtype: torch.Tensor >>> subsequent_mask(3) [[1, 0, 0], [1, 1, 0], [1, 1, 1]] """ if is_torch_1_2 and dtype == torch.bool: # torch=1.2 doesn't support tril for bool tensor ret = torch.ones(size, size, device=device, dtype=torch.uint8) return torch.tril(ret, out=ret).type(dtype) else: ret = torch.ones(size, size, device=device, dtype=dtype) return torch.tril(ret, out=ret)
Example #21
Source File: basics.py From heat with MIT License | 6 votes |
def tril(m, k=0): """ Returns the lower triangular part of the tensor, the other elements of the result tensor are set to 0. The lower triangular part of the tensor is defined as the elements on and below the diagonal. The argument k controls which diagonal to consider. If k=0, all elements on and below the main diagonal are retained. A positive value includes just as many diagonals above the main diagonal, and similarly a negative value excludes just as many diagonals below the main diagonal. Parameters ---------- m : ht.DNDarray Input tensor for which to compute the lower triangle. k : int, optional Diagonal above which to zero elements. k=0 (default) is the main diagonal, k<0 is below and k>0 is above. Returns ------- lower_triangle : ht.DNDarray Lower triangle of the input tensor. """ return __tri_op(m, k, torch.tril)
Example #22
Source File: openai_transformer.py From tsalib with Apache License 2.0 | 6 votes |
def __init__(self, nx: int, n_ctx: int, config: TransformerConfig, scale: bool = False) -> None: super().__init__() self.nx = nx n_state = nx # in Attention: n_state=768 (nx=n_embd) # [switch nx => n_state from Block to Attention to keep identical to TF implem] assert n_state % config.num_heads == 0 self.register_buffer('b', torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx)) self.n_head = config.num_heads self.split_size = n_state self.scale = scale self.c_attn = Conv1D(n_state * 3, 1, nx) self.c_proj = Conv1D(n_state, 1, nx) self.attn_dropout = torch.nn.Dropout(config.attention_dropout_probability) self.resid_dropout = torch.nn.Dropout(config.residual_dropout_probability)
Example #23
Source File: seq2seq_loader.py From unilm with MIT License | 6 votes |
def __init__(self, vocab_words, indexer, max_len=512, max_tgt_length=128, new_segment_ids=False, mode="s2s", num_qkv=0, s2s_special_token=False, s2s_add_segment=False, s2s_share_segment=False, pos_shift=False): super().__init__() self.max_len = max_len self.vocab_words = vocab_words # vocabulary (sub)words self.indexer = indexer # function from token to token index self.max_len = max_len self._tril_matrix = torch.tril(torch.ones( (max_len, max_len), dtype=torch.long)) self.new_segment_ids = new_segment_ids self.task_idx = 3 # relax projection layer for different tasks assert mode in ("s2s", "l2r") self.mode = mode self.max_tgt_length = max_tgt_length self.num_qkv = num_qkv self.s2s_special_token = s2s_special_token self.s2s_add_segment = s2s_add_segment self.s2s_share_segment = s2s_share_segment self.pos_shift = pos_shift
Example #24
Source File: modeling_openai.py From CCF-BDCI-Sentiment-Analysis-Baseline with Apache License 2.0 | 6 votes |
def __init__(self, nx, n_ctx, config, scale=False): super(Attention, self).__init__() n_state = nx # in Attention: n_state=768 (nx=n_embd) # [switch nx => n_state from Block to Attention to keep identical to TF implem] assert n_state % config.n_head == 0 self.register_buffer("bias", torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx)) self.n_head = config.n_head self.split_size = n_state self.scale = scale self.output_attentions = config.output_attentions self.c_attn = Conv1D(n_state * 3, nx) self.c_proj = Conv1D(n_state, nx) self.attn_dropout = nn.Dropout(config.attn_pdrop) self.resid_dropout = nn.Dropout(config.resid_pdrop)
Example #25
Source File: s2s_loader.py From unilm with MIT License | 6 votes |
def __init__(self, vocab_words, indexer, max_len=512, max_tgt_length=128, mode="s2s", pos_shift=False, source_type_id=0, target_type_id=1, cls_token='[CLS]', sep_token='[SEP]', pad_token='[PAD]'): super().__init__() self.max_len = max_len self.vocab_words = vocab_words # vocabulary (sub)words self.indexer = indexer # function from token to token index self.max_len = max_len self._tril_matrix = torch.tril(torch.ones((max_len, max_len), dtype=torch.long)) self.task_idx = 3 # relax projection layer for different tasks assert mode in ("s2s", "l2r") self.mode = mode self.max_tgt_length = max_tgt_length self.pos_shift = pos_shift self.cls_token = cls_token self.sep_token = sep_token self.pad_token = pad_token self.source_type_id = source_type_id self.target_type_id = target_type_id self.cc = 0
Example #26
Source File: modeling_gpt2.py From CCF-BDCI-Sentiment-Analysis-Baseline with Apache License 2.0 | 6 votes |
def __init__(self, nx, n_ctx, config, scale=False): super(Attention, self).__init__() self.output_attentions = config.output_attentions n_state = nx # in Attention: n_state=768 (nx=n_embd) # [switch nx => n_state from Block to Attention to keep identical to TF implem] assert n_state % config.n_head == 0 self.register_buffer("bias", torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx)) self.n_head = config.n_head self.split_size = n_state self.scale = scale self.c_attn = Conv1D(n_state * 3, nx) self.c_proj = Conv1D(n_state, nx) self.attn_dropout = nn.Dropout(config.attn_pdrop) self.resid_dropout = nn.Dropout(config.resid_pdrop)
Example #27
Source File: openai_transformer.py From magnitude with MIT License | 6 votes |
def __init__(self, nx , n_ctx , config , scale = False) : super(Attention, self).__init__() n_state = nx # in Attention: n_state=768 (nx=n_embd) # [switch nx => n_state from Block to Attention to keep identical to TF implem] assert n_state % config.num_heads == 0 self.register_buffer(u'b', torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx)) self.n_head = config.num_heads self.split_size = n_state self.scale = scale self.c_attn = Conv1D(n_state * 3, 1, nx) self.c_proj = Conv1D(n_state, 1, nx) self.attn_dropout = torch.nn.Dropout(config.attention_dropout_probability) self.resid_dropout = torch.nn.Dropout(config.residual_dropout_probability)
Example #28
Source File: models.py From optnet with Apache License 2.0 | 6 votes |
def __init__(self, nHidden=50, nineq=200, neq=0, eps=1e-4): super(LenetOptNet, self).__init__() self.conv1 = nn.Conv2d(1, 20, kernel_size=5) self.conv2 = nn.Conv2d(20, 50, kernel_size=5) self.qp_o = nn.Linear(50*4*4, nHidden) self.qp_z0 = nn.Linear(50*4*4, nHidden) self.qp_s0 = nn.Linear(50*4*4, nineq) assert(neq==0) self.M = Variable(torch.tril(torch.ones(nHidden, nHidden)).cuda()) self.L = Parameter(torch.tril(torch.rand(nHidden, nHidden).cuda())) self.G = Parameter(torch.Tensor(nineq,nHidden).uniform_(-1,1).cuda()) # self.z0 = Parameter(torch.zeros(nHidden).cuda()) # self.s0 = Parameter(torch.ones(nineq).cuda()) self.nHidden = nHidden self.nineq = nineq self.neq = neq self.eps = eps
Example #29
Source File: bottom_up.py From Dispersion-based-Clustering with MIT License | 6 votes |
def select_merge_data(self, u_feas, label, label_to_images, ratio_n, dists): dists.add_(torch.tril(100000 * torch.ones(len(u_feas), len(u_feas))))#blocking the triangle cnt = torch.FloatTensor([len(label_to_images[label[idx]]) for idx in range(len(u_feas))]) dists += ratio_n * (cnt.view(1, len(cnt)) + cnt.view(len(cnt), 1)) # dist += |A|+|B| for idx in range(len(u_feas)): for j in range(idx + 1, len(u_feas)): if label[idx] == label[j]: dists[idx, j] = 100000 # set the distance within the same cluster dists = dists.numpy() ind = np.unravel_index(np.argsort(dists, axis=None), dists.shape) # with axis=None all numbers are sorted and unravel_index transforms the sorted index into ind for each dimension idx1 = ind[0] # the first dimension index idx2 = ind[1] # the second dimension index return idx1, idx2
Example #30
Source File: modeling_transfo_xl.py From bert_on_stilts with Apache License 2.0 | 5 votes |
def _parallelogram_mask(self, h, w, left=False): mask = torch.ones((h, w)).byte() m = min(h, w) mask[:m,:m] = torch.triu(mask[:m,:m]) mask[-m:,-m:] = torch.tril(mask[-m:,-m:]) if left: return mask else: return mask.flip(0)