Python torch.mul() Examples
The following are 30
code examples of torch.mul().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch
, or try the search function
.
Example #1
Source File: transformation.py From weakalign with MIT License | 6 votes |
def compute_L_inverse(self,X,Y): N = X.size()[0] # num of points (along dim 0) # construct matrix K Xmat = X.expand(N,N) Ymat = Y.expand(N,N) P_dist_squared = torch.pow(Xmat-Xmat.transpose(0,1),2)+torch.pow(Ymat-Ymat.transpose(0,1),2) P_dist_squared[P_dist_squared==0]=1 # make diagonal 1 to avoid NaN in log computation K = torch.mul(P_dist_squared,torch.log(P_dist_squared)) if self.reg_factor != 0: K+=torch.eye(K.size(0),K.size(1))*self.reg_factor # construct matrix L O = torch.FloatTensor(N,1).fill_(1) Z = torch.FloatTensor(3,3).fill_(0) P = torch.cat((O,X,Y),1) L = torch.cat((torch.cat((K,P),1),torch.cat((P.transpose(0,1),Z),1)),0) Li = torch.inverse(L) if self.use_cuda: Li = Li.cuda() return Li
Example #2
Source File: grad_cam.py From grad-cam-pytorch with MIT License | 6 votes |
def generate(self, target_layer): fmaps = self._find(self.fmap_pool, target_layer) grads = self._find(self.grad_pool, target_layer) weights = F.adaptive_avg_pool2d(grads, 1) gcam = torch.mul(fmaps, weights).sum(dim=1, keepdim=True) gcam = F.relu(gcam) gcam = F.interpolate( gcam, self.image_shape, mode="bilinear", align_corners=False ) B, C, H, W = gcam.shape gcam = gcam.view(B, -1) gcam -= gcam.min(dim=1, keepdim=True)[0] gcam /= gcam.max(dim=1, keepdim=True)[0] gcam = gcam.view(B, C, H, W) return gcam
Example #3
Source File: interaction.py From DeepCTR-Torch with Apache License 2.0 | 6 votes |
def forward(self, inputs): if len(inputs.shape) != 3: raise ValueError( "Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(inputs.shape))) inputs = torch.split(inputs, 1, dim=1) if self.bilinear_type == "all": p = [torch.mul(self.bilinear(v_i), v_j) for v_i, v_j in itertools.combinations(inputs, 2)] elif self.bilinear_type == "each": p = [torch.mul(self.bilinear[i](inputs[i]), inputs[j]) for i, j in itertools.combinations(range(len(inputs)), 2)] elif self.bilinear_type == "interaction": p = [torch.mul(bilinear(v[0]), v[1]) for v, bilinear in zip(itertools.combinations(inputs, 2), self.bilinear)] else: raise NotImplementedError return torch.cat(p, dim=1)
Example #4
Source File: gridgen.py From cascade-rcnn_Pytorch with MIT License | 6 votes |
def forward(self, input1): self.batchgrid3d = torch.zeros(torch.Size([input1.size(0)]) + self.grid3d.size()) for i in range(input1.size(0)): self.batchgrid3d[i] = self.grid3d self.batchgrid3d = Variable(self.batchgrid3d) #print(self.batchgrid3d) x = torch.sum(torch.mul(self.batchgrid3d, input1[:,:,:,0:4]), 3) y = torch.sum(torch.mul(self.batchgrid3d, input1[:,:,:,4:8]), 3) z = torch.sum(torch.mul(self.batchgrid3d, input1[:,:,:,8:]), 3) #print(x) r = torch.sqrt(x**2 + y**2 + z**2) + 1e-5 #print(r) theta = torch.acos(z/r)/(np.pi/2) - 1 #phi = torch.atan(y/x) phi = torch.atan(y/(x + 1e-5)) + np.pi * x.lt(0).type(torch.FloatTensor) * (y.ge(0).type(torch.FloatTensor) - y.lt(0).type(torch.FloatTensor)) phi = phi/np.pi output = torch.cat([theta,phi], 3) return output
Example #5
Source File: additive_shared.py From PySyft with Apache License 2.0 | 6 votes |
def pow(self, power): """ Compute integer power of a number by recursion using mul This uses the following trick: - Divide power by 2 and multiply base to itself (if the power is even) - Decrement power by 1 to make it even and then follow the first step """ base = self result = 1 while power > 0: # If power is odd if power % 2 == 1: result = result * base # Divide the power by 2 power = power // 2 # Multiply base to itself base = base * base return result
Example #6
Source File: gauss.py From vmf_vae_nlp with MIT License | 6 votes |
def build_bow_rep(self, lat_code, n_sample): batch_sz = lat_code.size()[0] tup = self.estimate_param(latent_code=lat_code) mean = tup['mean'] logvar = tup['logvar'] kld = self.compute_KLD(tup) if n_sample == 1: eps = self.sample_cell(batch_size=batch_sz) vec = torch.mul(torch.exp(logvar), eps) + mean return tup, kld, vec vecs = [] for ns in range(n_sample): eps = self.sample_cell(batch_size=batch_sz) vec = torch.mul(torch.exp(logvar), eps) + mean vecs.append(vec) vecs = torch.cat(vecs, dim=0) return tup, kld, vecs
Example #7
Source File: additive_shared.py From PySyft with Apache License 2.0 | 6 votes |
def _private_mul(self, other, equation: str): """Abstractly Multiplies two tensors Args: self: an AdditiveSharingTensor other: another AdditiveSharingTensor equation: a string representation of the equation to be computed in einstein summation form """ # check to see that operation is either mul or matmul assert equation == "mul" or equation == "matmul" cmd = getattr(torch, equation) assert isinstance(other, AdditiveSharingTensor) assert len(self.child) == len(other.child) if self.crypto_provider is None: raise AttributeError("For multiplication a crypto_provider must be passed.") shares = spdz.spdz_mul(cmd, self, other, self.crypto_provider, self.field, self.dtype) return shares
Example #8
Source File: functions.py From hed-pytorch with MIT License | 6 votes |
def weighted_cross_entropy_loss(prediction, label, output_mask=False): criterion = torch.nn.CrossEntropyLoss(reduce=False) label = torch.squeeze(label.long(), dim=0) nch = prediction.shape[1] label[label >= nch] = 0 cost = criterion(prediction, label) mask = (label != 0).float() num_positive = torch.sum(mask).float() num_negative = mask.numel() - num_positive mask[mask == 1] = num_negative / (num_positive + num_negative) mask[mask == 0] = num_positive / (num_positive + num_negative) cost = torch.mul(cost, mask) if output_mask: return torch.sum(cost), (label != 0) else: return torch.sum(cost)
Example #9
Source File: rnn.py From PySyft with Apache License 2.0 | 6 votes |
def forward(self, x, hc=None): if hc is None: hc = (self.init_hidden(x), self.init_hidden(x)) h, c = hc gate_x = self.fc_xh(x) gate_h = self.fc_hh(h) x_i, x_f, x_c, x_o = gate_x.chunk(self.num_chunks, 1) h_i, h_f, h_c, h_o = gate_h.chunk(self.num_chunks, 1) inputgate = torch.sigmoid(x_i + h_i) forgetgate = torch.sigmoid(x_f + h_f) cellgate = torch.tanh(x_c + h_c) outputgate = torch.sigmoid(x_o + h_o) c_ = torch.mul(forgetgate, c) + torch.mul(inputgate, cellgate) h_ = torch.mul(outputgate, torch.tanh(c_)) return h_, c_
Example #10
Source File: networks.py From viton-gan with MIT License | 6 votes |
def compute_L_inverse(self,X,Y): N = X.size()[0] # num of points (along dim 0) # construct matrix K Xmat = X.expand(N,N) Ymat = Y.expand(N,N) P_dist_squared = torch.pow(Xmat-Xmat.transpose(0,1),2)+torch.pow(Ymat-Ymat.transpose(0,1),2) P_dist_squared[P_dist_squared==0]=1 # make diagonal 1 to avoid NaN in log computation K = torch.mul(P_dist_squared,torch.log(P_dist_squared)) # construct matrix L O = torch.FloatTensor(N,1).fill_(1) Z = torch.FloatTensor(3,3).fill_(0) P = torch.cat((O,X,Y),1) L = torch.cat((torch.cat((K,P),1),torch.cat((P.transpose(0,1),Z),1)),0) Li = torch.inverse(L) if self.use_cuda: Li = Li.cuda() return Li
Example #11
Source File: nvdm.py From vmf_vae_nlp with MIT License | 5 votes |
def forward(self, x): batch_sz = x.size()[0] linear_x = self.enc_vec(x) linear_x = self.dropout(linear_x) active_x = self.active(linear_x) linear_x_2 = self.enc_vec_2(active_x) tup, kld, vecs = self.dist.build_bow_rep(linear_x_2, self.n_sample) # vecs: n_samples, batch_sz, lat_dim if 'redundant_norm' in tup: aux_loss = tup['redundant_norm'].view(batch_sz) else: aux_loss = GVar(torch.zeros(batch_sz)) # stat avg_cos = BowVAE.check_dispersion(vecs) avg_norm = torch.mean(tup['norm']) tup['avg_cos'] = avg_cos tup['avg_norm'] = avg_norm flatten_vecs = vecs.view(self.n_sample * batch_sz, self.n_lat) flatten_vecs = self.dec_act(self.dec_linear(flatten_vecs)) logit = self.dropout(self.out(flatten_vecs)) logit = torch.nn.functional.log_softmax(logit, dim=1) logit = logit.view(self.n_sample, batch_sz, self.vocab_size) flatten_x = x.unsqueeze(0).expand(self.n_sample, batch_sz, self.vocab_size) error = torch.mul(flatten_x, logit) error = torch.mean(error, dim=0) recon_loss = -torch.sum(error, dim=1, keepdim=False) return recon_loss, kld, aux_loss, tup, vecs
Example #12
Source File: layers.py From nice_pytorch with BSD 3-Clause "New" or "Revised" License | 5 votes |
def anticoupling_law(self, a, b): return torch.mul(a, torch.reciprocal(b))
Example #13
Source File: FM_PyTorch.py From Awesome-RecSystem-Models with MIT License | 5 votes |
def forward(self, feat_index, feat_value): # Step1: 先计算得到线性的那一部分 feat_value = torch.unsqueeze(feat_value, dim=2) # None * F * 1 first_weights = self.first_weights(feat_index) # None * F * 1 first_weight_value = torch.mul(first_weights, feat_value) # None * F * 1 first_weight_value = torch.squeeze(first_weight_value, dim=2) # None * F y_first_order = torch.sum(first_weight_value, dim=1) # None # Step2: 再计算二阶部分 secd_feat_emb = self.feat_embeddings(feat_index) # None * F * K feat_emd_value = torch.mul(secd_feat_emb, feat_value) # None * F * K(广播) # sum_square part summed_feat_emb = torch.sum(feat_emd_value, 1) # None * K interaction_part1 = torch.pow(summed_feat_emb, 2) # None * K # squared_sum part squared_feat_emd_value = torch.pow(feat_emd_value, 2) # None * K interaction_part2 = torch.sum(squared_feat_emd_value, dim=1) # None * K y_secd_order = 0.5 * torch.sub(interaction_part1, interaction_part2) y_secd_order = torch.sum(y_secd_order, dim=1) output = self.bias + y_first_order + y_secd_order output = torch.unsqueeze(output, dim=1) return output
Example #14
Source File: dist.py From vmf_vae_nlp with MIT License | 5 votes |
def compute_KLD(self, tup): mean = tup['mean'] logvar = tup['logvar'] kld = -0.5 * torch.sum(1 - torch.mul(mean, mean) / self.k + 2 * logvar - torch.exp(2 * logvar) / self.k - 2, dim=1) return kld
Example #15
Source File: dist.py From vmf_vae_nlp with MIT License | 5 votes |
def build_bow_rep(self, lat_code): batch_sz = lat_code.size()[0] tup = self.estimate_param(latent_code=lat_code) kld = self.compute_KLD(tup) eps = self.sample_cell(batch_size=batch_sz) mean = tup['mean'] logvar = tup['logvar'] vec = torch.mul(torch.exp(logvar), eps) + mean return tup, kld, vec
Example #16
Source File: dist.py From vmf_vae_nlp with MIT License | 5 votes |
def build_bow_rep(self, lat_code, n_sample): batch_sz = lat_code.size()[0] tup = self.estimate_param(latent_code=lat_code) mean = tup['mean'] logvar = tup['logvar'] kld = self.compute_KLD(tup) vecs = [] for ns in range(n_sample): eps = self.sample_cell(batch_size=batch_sz) vec = torch.mul(torch.exp(logvar), eps) + mean vecs.append(vec) return tup, kld, vecs
Example #17
Source File: DeepFM_PyTorch.py From Awesome-RecSystem-Models with MIT License | 5 votes |
def forward(self, feat_index, feat_value, use_dropout=True): feat_value = torch.unsqueeze(feat_value, dim=2) # None * F * 1 # Step1: 先计算一阶线性的部分 sum_square part first_weights = self.first_weights(feat_index) # None * F * 1 first_weight_value = torch.mul(first_weights, feat_value) y_first_order = torch.sum(first_weight_value, dim=2) # None * F if use_dropout: y_first_order = nn.Dropout(self.dropout_fm[0])(y_first_order) # None * F # Step2: 再计算二阶部分 secd_feat_emb = self.feat_embeddings(feat_index) # None * F * K feat_emd_value = secd_feat_emb * feat_value # None * F * K(广播) # sum_square part summed_feat_emb = torch.sum(feat_emd_value, 1) # None * K interaction_part1 = torch.pow(summed_feat_emb, 2) # None * K # squared_sum part squared_feat_emd_value = torch.pow(feat_emd_value, 2) # None * K interaction_part2 = torch.sum(squared_feat_emd_value, dim=1) # None * K y_secd_order = 0.5 * torch.sub(interaction_part1, interaction_part2) if use_dropout: y_secd_order = nn.Dropout(self.dropout_fm[1])(y_secd_order) # Step3: Deep部分 y_deep = feat_emd_value.reshape(-1, self.num_field * self.embedding_size) # None * (F * K) if use_dropout: y_deep = nn.Dropout(self.dropout_deep[0])(y_deep) for i in range(1, len(self.layer_sizes) + 1): y_deep = getattr(self, 'linear_' + str(i))(y_deep) y_deep = getattr(self, 'batchNorm_' + str(i))(y_deep) y_deep = F.relu(y_deep) if use_dropout: y_deep = getattr(self, 'dropout_' + str(i))(y_deep) concat_input = torch.cat((y_first_order, y_secd_order, y_deep), dim=1) output = self.fc(concat_input) return output
Example #18
Source File: lednet_1.py From LEDNet with MIT License | 5 votes |
def forward(self, x): h = x.size()[2] w = x.size()[3] b1 = self.branch1(x) #b1 = Interpolate(size=(h, w), mode="bilinear")(b1) b1= interpolate(b1, size=(h, w), mode="bilinear", align_corners=True) mid = self.mid(x) x1 = self.down1(x) x2 = self.down2(x1) x3 = self.down3(x2) #x3 = Interpolate(size=(h // 4, w // 4), mode="bilinear")(x3) x3= interpolate(x3, size=(h // 4, w // 4), mode="bilinear", align_corners=True) x2 = self.conv2(x2) x = x2 + x3 #x = Interpolate(size=(h // 2, w // 2), mode="bilinear")(x) x= interpolate(x, size=(h // 2, w // 2), mode="bilinear", align_corners=True) x1 = self.conv1(x1) x = x + x1 #x = Interpolate(size=(h, w), mode="bilinear")(x) x= interpolate(x, size=(h, w), mode="bilinear", align_corners=True) x = torch.mul(x, mid) x = x + b1 return x
Example #19
Source File: additive_shared.py From PySyft with Apache License 2.0 | 5 votes |
def __mul__(self, other, **kwargs): return self.mul(other, **kwargs)
Example #20
Source File: layers.py From nice_pytorch with BSD 3-Clause "New" or "Revised" License | 5 votes |
def coupling_law(self, a, b): return torch.mul(a,b)
Example #21
Source File: pgd.py From DeepRobust with MIT License | 5 votes |
def prox_l1(self, data, alpha): """Proximal operator for l1 norm. """ data = torch.mul(torch.sign(data), torch.clamp(torch.abs(data)-alpha, min=0)) return data
Example #22
Source File: optimizers.py From ITDD with MIT License | 5 votes |
def _experimental_reshape(self, shape): temp_shape = shape[2:] if len(temp_shape) == 1: new_shape = (shape[0], shape[1]*shape[2]) else: tmp_div = len(temp_shape) // 2 + len(temp_shape) % 2 new_shape = (shape[0]*functools.reduce(operator.mul, temp_shape[tmp_div:], 1), shape[1]*functools.reduce(operator.mul, temp_shape[:tmp_div], 1)) return new_shape, copy(shape)
Example #23
Source File: copy_generator.py From ITDD with MIT License | 5 votes |
def forward(self, hidden, attn, src_map): """ Compute a distribution over the target dictionary extended by the dynamic dictionary implied by compying source words. Args: hidden (`FloatTensor`): hidden outputs `[batch*tlen, input_size]` attn (`FloatTensor`): attn for each `[batch*tlen, input_size]` src_map (`FloatTensor`): A sparse indicator matrix mapping each source word to its index in the "extended" vocab containing. `[src_len, batch, extra_words]` """ # CHECKS batch_by_tlen, _ = hidden.size() batch_by_tlen_, slen = attn.size() slen_, batch, cvocab = src_map.size() aeq(batch_by_tlen, batch_by_tlen_) aeq(slen, slen_) # Original probabilities. logits = self.linear(hidden) logits[:, self.pad_idx] = -float('inf') prob = torch.softmax(logits, 1) # Probability of copying p(z=1) batch. p_copy = torch.sigmoid(self.linear_copy(hidden)) # Probability of not copying: p_{word}(w) * (1 - p(z)) out_prob = torch.mul(prob, 1 - p_copy) mul_attn = torch.mul(attn, p_copy) copy_prob = torch.bmm( mul_attn.view(-1, batch, slen).transpose(0, 1), src_map.transpose(0, 1) ).transpose(0, 1) copy_prob = copy_prob.contiguous().view(-1, cvocab) return torch.cat([out_prob, copy_prob], 1)
Example #24
Source File: losses.py From centerpose with MIT License | 5 votes |
def dfl_ssim(img1, img2, mask, window_size=11, val_range=1, gauss='original'): # Value range can be different from 255. Other common ranges are 1 (sigmoid) and 2 (tanh). # padd = window_size//2 padd = 0 (batch, channel, height, width) = img1.size() img1, img2 = torch.mul(img1, mask), torch.mul(img2, mask) real_size = min(window_size, height, width) window = create_window(real_size, gauss=gauss).to(img1.device) # 2019.05.07. c1 = (0.01 * val_range) ** 2 c2 = (0.03 * val_range) ** 2 mu1 = F.conv2d(img1, window, padding=padd, groups=channel) mu2 = F.conv2d(img2, window, padding=padd, groups=channel) num0 = mu1 * mu2 * 2.0 mu1_sq = mu1.pow(2) mu2_sq = mu2.pow(2) den0 = mu1_sq + mu2_sq luminance = (num0 + c1) / (den0 + c1) num1 = F.conv2d(img1 * img2, window, padding=padd, groups=channel) * 2.0 den1 = F.conv2d(img1 * img1 + img2 * img2, window, padding=padd, groups=channel) cs = (num1 - num0 + c2) / (den1 - den0 + c2) ssim_val = torch.mean(luminance * cs, dim=(-3, -2)) return torch.mean((1.0 - ssim_val) / 2.0) # Classes to re-use window
Example #25
Source File: model.py From LipReading with MIT License | 5 votes |
def forward(self, input): seq_len = input.size(0) # pad the 0th dimension (T/sequence) with zeroes whose number = context # Once pytorch's padding functions have settled, should move to those. padding = torch.zeros(self.context, *(input.size()[1:])).type_as(input.data) x = torch.cat((input, Variable(padding)), 0) # add lookahead windows (with context+1 width) as a fourth dimension # for each seq-batch-feature combination x = [x[i:i + self.context + 1] for i in range(seq_len)] # TxLxNxH - sequence, context, batch, feature x = torch.stack(x) x = x.permute(0, 2, 3, 1) # TxNxHxL - sequence, batch, feature, context x = torch.mul(x, self.weight).sum(dim=3) return x
Example #26
Source File: stylegan2.py From StyleGAN2_PyTorch with MIT License | 5 votes |
def forward(self, x): tmp = torch.mul(x, x) # or x ** 2 tmp1 = torch.rsqrt(torch.mean(tmp, dim=1, keepdim=True) + self.epsilon) return x * tmp1
Example #27
Source File: loss.py From StyleGAN2_PyTorch with MIT License | 5 votes |
def G_logistic_ns_pathreg(x, D, opts, pl_decay=0.01, pl_weight=2.0): fake_images_out, fake_dlatents_out = x fake_images_out = Variable(fake_images_out, requires_grad=True).to(fake_images_out.device) fake_scores_out = D(fake_images_out) loss = F.softplus(-fake_scores_out) fake_dlatents_out = Variable(fake_dlatents_out, requires_grad=True).to(fake_dlatents_out.device) # Compute |J*y|. pl_noise = torch.randn(fake_images_out.shape) / np.sqrt(fake_images_out.shape[2] * fake_images_out.shape[3]) pl_noise = pl_noise.to(fake_images_out.device) pl_grads = grad(torch.sum(fake_images_out * pl_noise), fake_dlatents_out, retain_graph=True)[0] pl_lengths = torch.sqrt(torch.sum(torch.sum(torch.mul(pl_grads, pl_grads), dim=2), dim=1)) pl_mean = pl_decay * torch.sum(pl_lengths) # Calculate (|J*y|-a)^2. # Computes square of x element-wise # https://discuss.pytorch.org/t/computes-square-of-x-element-wise/9079 pl_penalty = torch.mul(pl_lengths - pl_mean, pl_lengths - pl_mean) # Apply weight. # Note: The division in pl_noise decreases the weight by num_pixels, and the reduce_mean # in pl_lengths decreases it by num_affine_layers. The effective weight then becomes: # # gamma_pl = pl_weight / num_pixels / num_affine_layers # = 2 / (r^2) / (log2(r) * 2 - 2) # = 1 / (r^2 * (log2(r) - 1)) # = ln(2) / (r^2 * (ln(r) - ln(2)) # reg = pl_penalty * pl_weight # fixme: only support non-lazy mode return loss + reg
Example #28
Source File: loss.py From StyleGAN2_PyTorch with MIT License | 5 votes |
def D_logistic_r1(real_img, D, gamma=10.0): # gradient penalty reals = Variable(real_img, requires_grad=True).to(real_img.device) real_logit = D(reals) real_grads = grad(torch.sum(real_logit), reals)[0] gradient_penalty = torch.sum(torch.mul(real_grads, real_grads), dim=[1, 2, 3]) return gradient_penalty * (gamma * 0.5) # ============================================================================== # R1 and R2 regularizers from the paper # "Which Training Methods for GANs do actually Converge?", Mescheder et al. 2018 # ============================================================================== # def D_logistic_r1(fake_img, real_img, D, gamma=10.0): # real_img = Variable(real_img, requires_grad=True).to(real_img.device) # fake_img = Variable(fake_img, requires_grad=True).to(fake_img.device) # # real_score = D(real_img) # fake_score = D(fake_img) # # loss = F.softplus(fake_score) # loss = loss + F.softplus(-real_score) # # # GradientPenalty # # One of the differentiated Tensors does not require grad? # # https://discuss.pytorch.org/t/one-of-the-differentiated-tensors-does-not-require-grad/54694 # real_grads = grad(torch.sum(real_score), real_img)[0] # gradient_penalty = torch.sum(torch.mul(real_grads, real_grads), dim=[1, 2, 3]) # reg = gradient_penalty * (gamma * 0.5) # # # fixme: only support non-lazy mode # return loss + reg
Example #29
Source File: self_attention.py From SemEval2019Task3 with MIT License | 5 votes |
def forward(self, inputs, input_lengths): """ Forward pass. # Arguments: inputs (Torch.Variable): Tensor of input sequences input_lengths (torch.LongTensor): Lengths of the sequences # Return: Tuple with (representations and attentions if self.return_attention else None). """ logits = inputs.matmul(self.attention_vector) unnorm_ai = (logits - logits.max()).exp() # Compute a mask for the attention on the padded sequences # See e.g. https://discuss.pytorch.org/t/self-attention-on-words-and-masking/5671/5 max_len = unnorm_ai.size(1) idxes = torch.arange(0, max_len, out=torch.LongTensor(max_len)).unsqueeze(0) if self.is_half: mask = Variable((idxes < input_lengths.unsqueeze(1)).half()).cuda() else: mask = Variable((idxes < input_lengths.unsqueeze(1)).float()).cuda() masked_weights = unnorm_ai * mask # apply mask and renormalize attention scores (weights) att_sums = masked_weights.sum(dim=1, keepdim=True) # sums per sequence attentions = masked_weights.div(att_sums) # apply attention weights weighted = torch.mul(inputs, attentions.unsqueeze(-1).expand_as(inputs)) # get the final fixed vector representations of the sentences representations = weighted.sum(dim=1) return representations, attentions
Example #30
Source File: model.py From treelstm.pytorch with MIT License | 5 votes |
def node_forward(self, inputs, child_c, child_h): child_h_sum = torch.sum(child_h, dim=0, keepdim=True) iou = self.ioux(inputs) + self.iouh(child_h_sum) i, o, u = torch.split(iou, iou.size(1) // 3, dim=1) i, o, u = F.sigmoid(i), F.sigmoid(o), F.tanh(u) f = F.sigmoid( self.fh(child_h) + self.fx(inputs).repeat(len(child_h), 1) ) fc = torch.mul(f, child_c) c = torch.mul(i, u) + torch.sum(fc, dim=0, keepdim=True) h = torch.mul(o, F.tanh(c)) return c, h