Python torch.where() Examples
The following are 30
code examples of torch.where().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch
, or try the search function
.
Example #1
Source File: loss.py From torch-toolbox with BSD 3-Clause "New" or "Revised" License | 7 votes |
def forward(self, x, target): similarity_matrix = x @ x.T # need gard here label_matrix = target.unsqueeze(1) == target.unsqueeze(0) negative_matrix = label_matrix.logical_not() positive_matrix = label_matrix.fill_diagonal_(False) sp = torch.where(positive_matrix, similarity_matrix, torch.zeros_like(similarity_matrix)) sn = torch.where(negative_matrix, similarity_matrix, torch.zeros_like(similarity_matrix)) ap = torch.clamp_min(1 + self.m - sp.detach(), min=0.) an = torch.clamp_min(sn.detach() + self.m, min=0.) logit_p = -self.gamma * ap * (sp - self.dp) logit_n = self.gamma * an * (sn - self.dn) logit_p = torch.where(positive_matrix, logit_p, torch.zeros_like(logit_p)) logit_n = torch.where(negative_matrix, logit_n, torch.zeros_like(logit_n)) loss = F.softplus(torch.logsumexp(logit_p, dim=1) + torch.logsumexp(logit_n, dim=1)).mean() return loss
Example #2
Source File: dqn.py From rltime with Apache License 2.0 | 6 votes |
def _calc_loss(self, errors): """Calculates the losses given the batch-wise 'td-errors' This is either squared-error or huber loss """ if self.loss_mode == "mse": return errors.pow(2) elif self.loss_mode == "huber": # Huber loss element-wise abs_errors = torch.abs(errors) return torch.where( abs_errors <= self.huber_kappa, 0.5 * errors.pow(2), self.huber_kappa * (abs_errors - (0.5 * self.huber_kappa))) else: assert(False), \ f"{self.loss_mode} is not a valid q-learning loss mode"
Example #3
Source File: loss.py From torch-toolbox with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _get_body(self, x, target): cos_t = torch.gather(x, 1, target.unsqueeze(1)) # cos(theta_yi) if self.easy_margin: cond = torch.relu(cos_t) else: cond_v = cos_t - self.threshold cond = torch.relu(cond_v) cond = cond.bool() # Apex would convert FP16 to FP32 here # cos(theta_yi + m) new_zy = torch.cos(torch.acos(cos_t) + self.m).type(cos_t.dtype) if self.easy_margin: zy_keep = cos_t else: zy_keep = cos_t - self.mm # (cos(theta_yi) - sin(pi - m)*m) new_zy = torch.where(cond, new_zy, zy_keep) diff = new_zy - cos_t # cos(theta_yi + m) - cos(theta_yi) gt_one_hot = F.one_hot(target, num_classes=self.classes) body = gt_one_hot * diff return body
Example #4
Source File: smooth_l1_loss.py From Parsing-R-CNN with MIT License | 6 votes |
def smooth_l1_loss(input, target, beta=1. / 9, size_average=True): """ very similar to the smooth_l1_loss from pytorch, but with the extra beta parameter Modified according to detectron2's fvcore, refer to https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/smooth_l1_loss.py """ if beta < 1e-5: # if beta == 0, then torch.where will result in nan gradients when # the chain rule is applied due to pytorch implementation details # (the False branch "0.5 * n ** 2 / 0" has an incoming gradient of # zeros, rather than "no gradient"). To avoid this issue, we define # small values of beta to be exactly l1 loss. loss = torch.abs(input - target) else: n = torch.abs(input - target) cond = n < beta loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) if size_average: return loss.mean() return loss.sum()
Example #5
Source File: adjust_smooth_l1_loss.py From Parsing-R-CNN with MIT License | 6 votes |
def forward(self, inputs, target, size_average=True): n = torch.abs(inputs -target) with torch.no_grad(): if torch.isnan(n.var(dim=0)).sum().item() == 0: self.running_mean = self.running_mean.to(n.device) self.running_mean *= (1 - self.momentum) self.running_mean += (self.momentum * n.mean(dim=0)) self.running_var = self.running_var.to(n.device) self.running_var *= (1 - self.momentum) self.running_var += (self.momentum * n.var(dim=0)) beta = (self.running_mean - self.running_var) beta = beta.clamp(max=self.beta, min=1e-3) beta = beta.view(-1, self.num_features).to(n.device) cond = n < beta.expand_as(n) loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) if size_average: return loss.mean() return loss.sum()
Example #6
Source File: inference.py From Parsing-R-CNN with MIT License | 6 votes |
def prepare_boxlist(self, boxes, scores, image_shape): """ Returns BoxList from `boxes` and adds probability scores information as an extra field `boxes` has shape (#detections, 4 * #classes), where each row represents a list of predicted bounding boxes for each of the object classes in the dataset (including the background class). The detections in each row originate from the same object proposal. `scores` has shape (#detection, #classes), where each row represents a list of object detection confidence scores for each of the object classes in the dataset (including the background class). `scores[i, j]`` corresponds to the box at `boxes[i, j * 4:(j + 1) * 4]`. """ boxes = boxes.reshape(-1, 4) scores = scores.reshape(-1) boxlist = BoxList(boxes, image_shape, mode="xyxy") boxlist.add_field("scores", scores) return boxlist
Example #7
Source File: activations.py From entmax with MIT License | 6 votes |
def __init__(self, dim=-1, k=None): """1.5-entmax: normalizing sparse transform (a la softmax). Solves the optimization problem: max_p <x, p> - H_1.5(p) s.t. p >= 0, sum(p) == 1. where H_1.5(p) is the Tsallis alpha-entropy with alpha=1.5. Parameters ---------- dim : int The dimension along which to apply 1.5-entmax. k : int or None number of largest elements to partial-sort over. For optimal performance, should be slightly bigger than the expected number of nonzeros in the solution. If the solution is more than k-sparse, this function is recursively called with a 2*k schedule. If `None`, full sorting is performed from the beginning. """ self.dim = dim self.k = k super(Entmax15, self).__init__()
Example #8
Source File: layers.py From graph-cnn.pytorch with MIT License | 6 votes |
def forward(self, input, adj): h = torch.mm(input, self.W) N = h.size()[0] f_1 = torch.matmul(h, self.a1) f_2 = torch.matmul(h, self.a2) e = self.leakyrelu(f_1 + f_2.transpose(0,1)) zero_vec = -9e15*torch.ones_like(e) attention = torch.where(adj > 0, e, zero_vec) attention = F.softmax(attention, dim=1) attention = F.dropout(attention, self.dropout, training=self.training) h_prime = torch.matmul(attention, h) if self.concat: return F.elu(h_prime) else: return h_prime
Example #9
Source File: functional.py From SlowFast-Network-pytorch with MIT License | 6 votes |
def glu(input, dim=-1): # type: (Tensor, int) -> Tensor r""" glu(input, dim=-1) -> Tensor The gated linear unit. Computes: .. math :: H = A \times \sigma(B) where `input` is split in half along `dim` to form `A` and `B`. See `Language Modeling with Gated Convolutional Networks <https://arxiv.org/abs/1612.08083>`_. Args: input (Tensor): input tensor dim (int): dimension on which to split the input """ if input.dim() == 0: raise RuntimeError("glu does not suppport scalars because halving size must be even") return torch._C._nn.glu(input, dim)
Example #10
Source File: functional.py From SlowFast-Network-pytorch with MIT License | 6 votes |
def linear(input, weight, bias=None): # type: (Tensor, Tensor, Optional[Tensor]) -> Tensor r""" Applies a linear transformation to the incoming data: :math:`y = xA^T + b`. Shape: - Input: :math:`(N, *, in\_features)` where `*` means any number of additional dimensions - Weight: :math:`(out\_features, in\_features)` - Bias: :math:`(out\_features)` - Output: :math:`(N, *, out\_features)` """ if input.dim() == 2 and bias is not None: # fused op is marginally faster ret = torch.addmm(torch.jit._unwrap_optional(bias), input, weight.t()) else: output = input.matmul(weight.t()) if bias is not None: output += torch.jit._unwrap_optional(bias) ret = output return ret
Example #11
Source File: layers.py From dgl with Apache License 2.0 | 6 votes |
def forward(self, g, h, weights): """ g : graph h : node features weights : scalar edge weights """ h_src, h_dst = h with g.local_scope(): g.srcdata['n'] = self.act(self.Q(self.dropout(h_src))) g.edata['w'] = weights.float() g.update_all(fn.u_mul_e('n', 'w', 'm'), fn.sum('m', 'n')) g.update_all(fn.copy_e('w', 'm'), fn.sum('m', 'ws')) n = g.dstdata['n'] ws = g.dstdata['ws'].unsqueeze(1).clamp(min=1) z = self.act(self.W(self.dropout(torch.cat([n / ws, h_dst], 1)))) z_norm = z.norm(2, 1, keepdim=True) z_norm = torch.where(z_norm == 0, torch.tensor(1.).to(z_norm), z_norm) z = z / z_norm return z
Example #12
Source File: sparse_losses.py From ITDD with MIT License | 6 votes |
def forward(ctx, input, target): """ input (FloatTensor): n x num_classes target (LongTensor): n, the indices of the target classes """ input_batch, classes = input.size() target_batch = target.size(0) aeq(input_batch, target_batch) z_k = input.gather(1, target.unsqueeze(1)).squeeze() tau_z, support_size = _threshold_and_support(input, dim=1) support = input > tau_z x = torch.where( support, input**2 - tau_z**2, torch.tensor(0.0, device=input.device) ).sum(dim=1) ctx.save_for_backward(input, target, tau_z) # clamping necessary because of numerical errors: loss should be lower # bounded by zero, but negative values near zero are possible without # the clamp return torch.clamp(x / 2 - z_k + 0.5, min=0.0)
Example #13
Source File: balanced_l1_loss.py From GCNet with Apache License 2.0 | 6 votes |
def balanced_l1_loss(pred, target, beta=1.0, alpha=0.5, gamma=1.5, reduction='mean'): assert beta > 0 assert pred.size() == target.size() and target.numel() > 0 diff = torch.abs(pred - target) b = np.e**(gamma / alpha) - 1 loss = torch.where( diff < beta, alpha / b * (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff, gamma * diff + gamma / b - alpha * beta) return loss
Example #14
Source File: identifier.py From kaggle-humpback with BSD 2-Clause "Simplified" License | 6 votes |
def forward(self, inputs, labels): cos_th = F.linear(inputs, F.normalize(self.weight)) cos_th = cos_th.clamp(-1, 1) sin_th = torch.sqrt(1.0 - torch.pow(cos_th, 2)) cos_th_m = cos_th * self.cos_m - sin_th * self.sin_m cos_th_m = torch.where(cos_th > self.th, cos_th_m, cos_th - self.mm) cond_v = cos_th - self.th cond = cond_v <= 0 cos_th_m[cond] = (cos_th - self.mm)[cond] if labels.dim() == 1: labels = labels.unsqueeze(-1) onehot = torch.zeros(cos_th.size()).cuda() onehot.scatter_(1, labels, 1) outputs = onehot * cos_th_m + (1.0 - onehot) * cos_th outputs = outputs * self.s return outputs
Example #15
Source File: smooth_l1_loss.py From mmdetection with Apache License 2.0 | 6 votes |
def smooth_l1_loss(pred, target, beta=1.0): """Smooth L1 loss. Args: pred (torch.Tensor): The prediction. target (torch.Tensor): The learning target of the prediction. beta (float, optional): The threshold in the piecewise function. Defaults to 1.0. Returns: torch.Tensor: Calculated loss """ assert beta > 0 assert pred.size() == target.size() and target.numel() > 0 diff = torch.abs(pred - target) loss = torch.where(diff < beta, 0.5 * diff * diff / beta, diff - 0.5 * beta) return loss
Example #16
Source File: balanced_l1_loss.py From mmdetection with Apache License 2.0 | 5 votes |
def balanced_l1_loss(pred, target, beta=1.0, alpha=0.5, gamma=1.5, reduction='mean'): """Calculate balanced L1 loss. Please see the `Libra R-CNN <https://arxiv.org/pdf/1904.02701.pdf>`_ Args: pred (torch.Tensor): The prediction with shape (N, 4). target (torch.Tensor): The learning target of the prediction with shape (N, 4). beta (float): The loss is a piecewise function of prediction and target and ``beta`` serves as a threshold for the difference between the prediction and target. Defaults to 1.0. alpha (float): The denominator ``alpha`` in the balanced L1 loss. Defaults to 0.5. gamma (float): The ``gamma`` in the balanced L1 loss. Defaults to 1.5. reduction (str, optional): The method that reduces the loss to a scalar. Options are "none", "mean" and "sum". Returns: torch.Tensor: The calculated loss """ assert beta > 0 assert pred.size() == target.size() and target.numel() > 0 diff = torch.abs(pred - target) b = np.e**(gamma / alpha) - 1 loss = torch.where( diff < beta, alpha / b * (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff, gamma * diff + gamma / b - alpha * beta) return loss
Example #17
Source File: dynamic_crf_layer.py From fairseq with MIT License | 5 votes |
def _compute_normalizer(self, emissions, targets=None, masks=None, beam=None): # HACK: we include "target" which is a hueristic for training # HACK: we use a beam of tokens to approximate the normalizing factor (which is bad?) beam = beam if beam is not None else self.beam batch_size, seq_len = emissions.size()[:2] if targets is not None: _emissions = emissions.scatter(2, targets[:, :, None], np.float('inf')) beam_targets = _emissions.topk(beam, 2)[1] beam_emission_scores = emissions.gather(2, beam_targets) else: beam_emission_scores, beam_targets = emissions.topk(beam, 2) beam_transition_score1 = self.E1(beam_targets[:, :-1]) # B x (T-1) x K x D beam_transition_score2 = self.E2(beam_targets[:, 1:]) # B x (T-1) x K x D beam_transition_matrix = torch.bmm( beam_transition_score1.view(-1, beam, self.rank), beam_transition_score2.view(-1, beam, self.rank).transpose(1, 2)) beam_transition_matrix = beam_transition_matrix.view(batch_size, -1, beam, beam) # compute the normalizer in the log-space score = beam_emission_scores[:, 0] # B x K for i in range(1, seq_len): next_score = score[:, :, None] + beam_transition_matrix[:, i-1] next_score = logsumexp(next_score, dim=1) + beam_emission_scores[:, i] if masks is not None: score = torch.where(masks[:, i:i+1], next_score, score) else: score = next_score # Sum (log-sum-exp) over all possible tags return logsumexp(score, dim=1)
Example #18
Source File: trainer.py From fairseq with MIT License | 5 votes |
def _reduce_and_log_stats(self, logging_outputs, sample_size, grad_norm=None): if grad_norm is not None: metrics.log_speed("ups", 1., priority=100, round=2) metrics.log_scalar("gnorm", grad_norm, priority=400, round=3) if self.args.clip_norm > 0: metrics.log_scalar( "clip", torch.where( grad_norm > self.args.clip_norm, grad_norm.new_tensor(100), grad_norm.new_tensor(0), ), priority=500, round=1, ) with metrics.aggregate() as agg: if logging_outputs is not None: self.task.reduce_metrics(logging_outputs, self.get_criterion()) del logging_outputs # extra warning for criterions that don't properly log a loss value if "loss" not in agg: if "loss" not in self._warn_once: self._warn_once.add("loss") logger.warning( "Criterion.reduce_metrics did not log a 'loss' value, " "which may break some functionality" ) metrics.log_scalar("loss", -1) # support legacy interface if self.tpu: logging_output = {} else: logging_output = agg.get_smoothed_values() logging_output["sample_size"] = sample_size for key_to_delete in ["ppl", "wps", "wpb", "bsz"]: if key_to_delete in logging_output: del logging_output[key_to_delete] return logging_output
Example #19
Source File: dqn_loss_function.py From rlgraph with Apache License 2.0 | 5 votes |
def _graph_fn_apply_huber_loss_if_necessary(self, td_delta): if self.backend == "python" or get_backend() == "python": if self.huber_loss: return np.where( condition=np.abs(td_delta) <= self.huber_delta, x=0.5 * np.square(td_delta), y=self.huber_delta * (np.abs(td_delta) - 0.5 * self.huber_delta) ) else: return 0.5 * np.square(x=td_delta) elif get_backend() == "tf": if self.huber_loss: return tf.where( condition=tf.abs(x=td_delta) <= self.huber_delta, x=0.5 * tf.square(x=td_delta), y=self.huber_delta * (tf.abs(x=td_delta) - 0.5 * self.huber_delta) ) else: return 0.5 * tf.square(x=td_delta) elif get_backend() == "pytorch": if self.huber_loss: # Not certain if arithmetics need to be expressed via torch operators. return torch.where( torch.abs(td_delta) <= self.huber_delta, # PyTorch has no `square` 0.5 * torch.pow(td_delta, 2), self.huber_delta * (torch.abs(td_delta) - 0.5 * self.huber_delta) ) else: return 0.5 * td_delta * td_delta
Example #20
Source File: atomicconv.py From dgl with Apache License 2.0 | 5 votes |
def forward(self, distances): """Apply the layer to transform edge distances. Parameters ---------- distances : Float32 tensor of shape (E, 1) Distance between end nodes of edges. E for the number of edges. Returns ------- Float32 tensor of shape (K, E, 1) Transformed edge distances. K for the number of radial filters. """ scaled_euclidean_distance = - self.rbf_kernel_scaling * \ (distances - self.rbf_kernel_means) ** 2 # (K, E, 1) rbf_kernel_results = th.exp(scaled_euclidean_distance) # (K, E, 1) cos_values = 0.5 * (th.cos(np.pi * distances / self.interaction_cutoffs) + 1) # (K, E, 1) cutoff_values = th.where( distances <= self.interaction_cutoffs, cos_values, th.zeros_like(cos_values)) # (K, E, 1) # Note that there appears to be an inconsistency between the paper and # DeepChem's implementation. In the paper, the scaled_euclidean_distance first # gets multiplied by cutoff_values, followed by exponentiation. Here we follow # the practice of DeepChem. return rbf_kernel_results * cutoff_values
Example #21
Source File: plugin.py From End-to-end-ASR-Pytorch with MIT License | 5 votes |
def forward(self, dec_state, dec_logit, label=None, return_loss=True): # Match embedding dim. log_fused_prob = None loss = None #x_emb = nn.functional.normalize(self.emb_net(dec_state),dim=-1) if self.apply_dropout: dec_state = self.dropout(dec_state) x_emb = self.emb_net(dec_state) if return_loss: # Compute embedding loss b, t = label.shape # Retrieve embedding if self.use_bert: with torch.no_grad(): y_emb = self.emb_table(label).contiguous() else: y_emb = self.emb_table(label) # Regression loss on embedding if self.distance == 'CosEmb': loss = self.measurement( x_emb.view(-1, self.dim), y_emb.view(-1, self.dim), torch.ones(1).to(dec_state.device)) else: loss = self.measurement( x_emb.view(-1, self.dim), y_emb.view(-1, self.dim)) loss = loss.view(b, t) # Mask out padding loss = torch.where(label != 0, loss, torch.zeros_like(loss)) loss = torch.mean(loss.sum(dim=-1) / (label != 0).sum(dim=-1).float()) if self.apply_fuse: log_fused_prob = self.fuse_prob(x_emb, dec_logit) return loss, log_fused_prob
Example #22
Source File: loss_functions.py From signaltrain with GNU General Public License v3.0 | 5 votes |
def smoothl1(x, x_hat, delta=0.5): # Huber loss #return torch.sum ( torch.where(torch.abs(true-pred) < delta , 0.5*((true-pred)**2), \ # delta*toch.abs(true - pred) - 0.5*(delta**2)) ) return torch.nn.SmoothL1Loss(true-pred)
Example #23
Source File: partial_convolution.py From Text_Segmentation_Image_Inpainting with GNU General Public License v3.0 | 5 votes |
def forward(self, args): x, mask = args output = self.feature_conv(x) mask_output = self.mask_conv(1 - mask) # holes are 1; else 0 mask_attention = F.tanh(mask_output) # non-holes positions are 0 output = output + mask_attention * output valid_idx = mask_attention == 0 new_mask = torch.where(valid_idx, torch.ones_like(output), F.sigmoid(mask_output)) return output, new_mask
Example #24
Source File: loss.py From Text_Segmentation_Image_Inpainting with GNU General Public License v3.0 | 5 votes |
def forward(self, input, target): # if a pixel's probability > 0.5, then assume it is true since labels might be noisy input = self.flatten_images(input) target = self.flatten_images(target) weights = torch.where(target > 0, torch.ones_like(target) * self.words_weight, # words are 1 torch.ones_like(target) * self.background_weight) bootstrap_target = self.beta * target + (1 - self.beta) * (F.sigmoid(input) > 0.5).float() return F.binary_cross_entropy_with_logits(input, bootstrap_target, weight=weights, size_average=self.size_average, reduce=self.reduce)
Example #25
Source File: loss.py From Text_Segmentation_Image_Inpainting with GNU General Public License v3.0 | 5 votes |
def forward(self, input, target): input = self.flatten_images(input) target = self.flatten_images(target) weights = torch.where(target > 0, torch.ones_like(target) * self.words_weights, # words are 1 torch.ones_like(target) * self.background_weights) pt = F.logsigmoid(-input * (target * 2 - 1)) loss = F.binary_cross_entropy_with_logits(input, target, weight=weights, size_average=True, reduce=False) loss = (pt * self.gamma).exp() * loss return loss.mean()
Example #26
Source File: functional.py From SlowFast-Network-pytorch with MIT License | 5 votes |
def beta_smooth_l1_loss(input: Tensor, target: Tensor, beta: float) -> Tensor: diff = torch.abs(input - target) loss = torch.where(diff < beta, 0.5 * diff ** 2 / beta, diff - 0.5 * beta) loss = loss.sum() / (input.numel() + 1e-8) return loss
Example #27
Source File: functional.py From SlowFast-Network-pytorch with MIT License | 5 votes |
def _smooth_l1_loss(input, target): # type: (Tensor, Tensor) -> Tensor t = torch.abs(input - target) return torch.where(t < 1, 0.5 * t ** 2, t - 0.5)
Example #28
Source File: functional.py From SlowFast-Network-pytorch with MIT License | 5 votes |
def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0, reduction='mean'): # type: (Tensor, Tensor, Tensor, Tensor, int, str) -> Tensor r"""The Connectionist Temporal Classification loss. See :class:`~torch.nn.CTCLoss` for details. .. include:: cudnn_deterministic.rst .. include:: cuda_deterministic_backward.rst Args: log_probs: :math:`(T, N, C)` where `C = number of characters in alphabet including blank`, `T = input length`, and `N = batch size`. The logarithmized probabilities of the outputs (e.g. obtained with :func:`torch.nn.functional.log_softmax`). targets: :math:`(N, S)` or `(sum(target_lengths))`. Targets (cannot be blank). In the second form, the targets are assumed to be concatenated. input_lengths: :math:`(N)`. Lengths of the inputs (must each be :math:`\leq T`) target_lengths: :math:`(N)`. Lengths of the targets blank (int, optional): Blank label. Default :math:`0`. reduction (string, optional): Specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, 'mean': the output losses will be divided by the target lengths and then the mean over the batch is taken. Default: 'mean' Example:: >>> log_probs = torch.randn(50, 16, 20).log_softmax(2).detach().requires_grad_() >>> targets = torch.randint(1, 20, (16, 30), dtype=torch.long) >>> input_lengths = torch.full((16,), 50, dtype=torch.long) >>> target_lengths = torch.randint(10,30,(16,), dtype=torch.long) >>> loss = F.ctc_loss(log_probs, targets, input_lengths, target_lengths) >>> loss.backward() """ return torch.ctc_loss(log_probs, targets, input_lengths, target_lengths, blank, _Reduction.get_enum(reduction))
Example #29
Source File: functional.py From SlowFast-Network-pytorch with MIT License | 5 votes |
def local_response_norm(input, size, alpha=1e-4, beta=0.75, k=1.): # type: (Tensor, int, float, float, float) -> Tensor r"""Applies local response normalization over an input signal composed of several input planes, where channels occupy the second dimension. Applies normalization across channels. See :class:`~torch.nn.LocalResponseNorm` for details. """ dim = input.dim() if dim < 3: raise ValueError('Expected 3D or higher dimensionality \ input (got {} dimensions)'.format(dim)) div = input.mul(input).unsqueeze(1) if dim == 3: div = pad(div, (0, 0, size // 2, (size - 1) // 2)) div = avg_pool2d(div, (size, 1), stride=1).squeeze(1) else: sizes = input.size() div = div.view(sizes[0], 1, sizes[1], sizes[2], -1) div = pad(div, (0, 0, 0, 0, size // 2, (size - 1) // 2)) div = avg_pool3d(div, (size, 1, 1), stride=1).squeeze(1) div = div.view(sizes) div = div.mul(alpha).add(k).pow(beta) return input / div # loss
Example #30
Source File: character_token_embedder.py From fairseq with MIT License | 5 votes |
def forward( self, input: torch.Tensor, ): if self.char_inputs: chars = input.view(-1, self.max_char_len) pads = chars[:, 0].eq(CHAR_PAD_IDX) eos = chars[:, 0].eq(CHAR_EOS_IDX) if eos.any(): if self.onnx_trace: chars = torch.where(eos.unsqueeze(1), chars.new_zeros(1), chars) else: chars[eos] = 0 unk = None else: flat_words = input.view(-1) chars = self.word_to_char[flat_words.type_as(self.word_to_char)].type_as(input) pads = flat_words.eq(self.vocab.pad()) eos = flat_words.eq(self.vocab.eos()) unk = flat_words.eq(self.vocab.unk()) word_embs = self._convolve(chars) if self.onnx_trace: if pads.any(): word_embs = torch.where(pads.unsqueeze(1), word_embs.new_zeros(1), word_embs) if eos.any(): word_embs = torch.where(eos.unsqueeze(1), self.symbol_embeddings[self.eos_idx], word_embs) if unk is not None and unk.any(): word_embs = torch.where(unk.unsqueeze(1), self.symbol_embeddings[self.unk_idx], word_embs) else: if pads.any(): word_embs[pads] = 0 if eos.any(): word_embs[eos] = self.symbol_embeddings[self.eos_idx] if unk is not None and unk.any(): word_embs[unk] = self.symbol_embeddings[self.unk_idx] return word_embs.view(input.size()[:2] + (-1,))