Python torch.autograd.grad() Examples
The following are 30
code examples of torch.autograd.grad().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.autograd
, or try the search function
.
Example #1
Source File: gradient_penalty.py From TreeGAN with MIT License | 7 votes |
def __call__(self, netD, real_data, fake_data): batch_size = real_data.size(0) fake_data = fake_data[:batch_size] alpha = torch.rand(batch_size, 1, 1, requires_grad=True).to(self.device) # randomly mix real and fake data interpolates = real_data + alpha * (fake_data - real_data) # compute output of D for interpolated input disc_interpolates = netD(interpolates) # compute gradients w.r.t the interpolated outputs gradients = grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=torch.ones(disc_interpolates.size()).to(self.device), create_graph=True, retain_graph=True, only_inputs=True)[0].contiguous().view(batch_size,-1) gradient_penalty = (((gradients.norm(2, dim=1) - self.gamma) / self.gamma) ** 2).mean() * self.lambdaGP return gradient_penalty
Example #2
Source File: gan_cifar10.py From wgan-gp with MIT License | 6 votes |
def calc_gradient_penalty(netD, real_data, fake_data): # print "real_data: ", real_data.size(), fake_data.size() alpha = torch.rand(BATCH_SIZE, 1) alpha = alpha.expand(BATCH_SIZE, real_data.nelement()/BATCH_SIZE).contiguous().view(BATCH_SIZE, 3, 32, 32) alpha = alpha.cuda(gpu) if use_cuda else alpha interpolates = alpha * real_data + ((1 - alpha) * fake_data) if use_cuda: interpolates = interpolates.cuda(gpu) interpolates = autograd.Variable(interpolates, requires_grad=True) disc_interpolates = netD(interpolates) gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=torch.ones(disc_interpolates.size()).cuda(gpu) if use_cuda else torch.ones( disc_interpolates.size()), create_graph=True, retain_graph=True, only_inputs=True)[0] gradients = gradients.view(gradients.size(0), -1) gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * LAMBDA return gradient_penalty # For generating samples
Example #3
Source File: cyclegan.py From DepthNets with MIT License | 6 votes |
def compute_d_norms(self, A_real_, B_real_): A_real = Variable(A_real_.data, requires_grad=True) B_real = Variable(B_real_.data, requires_grad=True) d_a_real = self.d_a(A_real) d_b_real = self.d_b(B_real) this_ones_dafake = torch.ones(d_a_real.size()) this_ones_dbfake = torch.ones(d_b_real.size()) if self.use_cuda: this_ones_dafake = this_ones_dafake.cuda() this_ones_dbfake = this_ones_dbfake.cuda() gradients_da = grad(outputs=d_a_real, inputs=A_real, grad_outputs=this_ones_dafake, create_graph=True, retain_graph=True, only_inputs=True)[0] gradients_db = grad(outputs=d_b_real, inputs=B_real, grad_outputs=this_ones_dbfake, create_graph=True, retain_graph=True, only_inputs=True)[0] gp_a = ((gradients_da.view(gradients_da.size()[0], -1).norm(2, 1) - 1) ** 2).mean() gp_b = ((gradients_db.view(gradients_db.size()[0], -1).norm(2, 1) - 1) ** 2).mean() return gp_a, gp_b
Example #4
Source File: sliced_sm.py From ncsn with GNU General Public License v3.0 | 6 votes |
def sliced_score_matching(energy_net, samples, n_particles=1): dup_samples = samples.unsqueeze(0).expand(n_particles, *samples.shape).contiguous().view(-1, *samples.shape[1:]) dup_samples.requires_grad_(True) vectors = torch.randn_like(dup_samples) vectors = vectors / torch.norm(vectors, dim=-1, keepdim=True) logp = -energy_net(dup_samples).sum() grad1 = autograd.grad(logp, dup_samples, create_graph=True)[0] gradv = torch.sum(grad1 * vectors) loss1 = torch.sum(grad1 * vectors, dim=-1) ** 2 * 0.5 grad2 = autograd.grad(gradv, dup_samples, create_graph=True)[0] loss2 = torch.sum(vectors * grad2, dim=-1) loss1 = loss1.view(n_particles, -1).mean(dim=0) loss2 = loss2.view(n_particles, -1).mean(dim=0) loss = loss1 + loss2 return loss.mean(), loss1.mean(), loss2.mean()
Example #5
Source File: darcy.py From pde-surrogate with MIT License | 6 votes |
def conv_constitutive_constraint_nonlinear_exp(input, output, sobel_filter): """Nonlinear extension of Darcy's law sigma = - exp(K * u) grad(u) Args: input: K output: u, sigma1, sigma2 """ grad_h = sobel_filter.grad_h(output[:, [0]]) grad_v = sobel_filter.grad_v(output[:, [0]]) sigma_h = - torch.exp(input * output[:, [0]]) * grad_h sigma_v = - torch.exp(input * output[:, [0]]) * grad_v return ((output[:, [1]] - sigma_h) ** 2 + (output[:, [2]] - sigma_v) ** 2).mean()
Example #6
Source File: sliced_sm.py From ncsn with GNU General Public License v3.0 | 6 votes |
def sliced_score_matching_vr(energy_net, samples, n_particles=1): dup_samples = samples.unsqueeze(0).expand(n_particles, *samples.shape).contiguous().view(-1, *samples.shape[1:]) dup_samples.requires_grad_(True) vectors = torch.randn_like(dup_samples) logp = -energy_net(dup_samples).sum() grad1 = autograd.grad(logp, dup_samples, create_graph=True)[0] loss1 = torch.sum(grad1 * grad1, dim=-1) / 2. gradv = torch.sum(grad1 * vectors) grad2 = autograd.grad(gradv, dup_samples, create_graph=True)[0] loss2 = torch.sum(vectors * grad2, dim=-1) loss1 = loss1.view(n_particles, -1).mean(dim=0) loss2 = loss2.view(n_particles, -1).mean(dim=0) loss = loss1 + loss2 return loss.mean(), loss1.mean(), loss2.mean()
Example #7
Source File: sliced_sm.py From ncsn with GNU General Public License v3.0 | 6 votes |
def sliced_score_estimation_vr(score_net, samples, n_particles=1): """ Be careful if the shape of samples is not B x x_dim!!!! """ dup_samples = samples.unsqueeze(0).expand(n_particles, *samples.shape).contiguous().view(-1, *samples.shape[1:]) dup_samples.requires_grad_(True) vectors = torch.randn_like(dup_samples) grad1 = score_net(dup_samples) gradv = torch.sum(grad1 * vectors) grad2 = autograd.grad(gradv, dup_samples, create_graph=True)[0] grad1 = grad1.view(dup_samples.shape[0], -1) loss1 = torch.sum(grad1 * grad1, dim=-1) / 2. loss2 = torch.sum((vectors * grad2).view(dup_samples.shape[0], -1), dim=-1) loss1 = loss1.view(n_particles, -1).mean(dim=0) loss2 = loss2.view(n_particles, -1).mean(dim=0) loss = loss1 + loss2 return loss.mean(), loss1.mean(), loss2.mean()
Example #8
Source File: utils.py From Text-to-Image-Synthesis with GNU General Public License v3.0 | 6 votes |
def compute_GP(netD, real_data, real_embed, fake_data, LAMBDA): BATCH_SIZE = real_data.size(0) alpha = torch.rand(BATCH_SIZE, 1) alpha = alpha.expand(BATCH_SIZE, int(real_data.nelement() / BATCH_SIZE)).contiguous().view(BATCH_SIZE, 3, 64, 64) alpha = alpha.cuda() interpolates = alpha * real_data + ((1 - alpha) * fake_data) interpolates = interpolates.cuda() interpolates = autograd.Variable(interpolates, requires_grad=True) disc_interpolates, _ = netD(interpolates, real_embed) gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=torch.ones(disc_interpolates.size()).cuda(), create_graph=True, retain_graph=True, only_inputs=True)[0] gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * LAMBDA return gradient_penalty
Example #9
Source File: lsd.py From torchsupport with MIT License | 6 votes |
def energy_loss(self, data, score, critic): vectors = self.noise_vectors(critic) grad_score = ag.grad( score, data, grad_outputs=torch.ones_like(score), create_graph=True )[0] jacobian = ag.grad( critic, data, grad_outputs=vectors, create_graph=True )[0] jacobian_term = (vectors * jacobian).view(score.size(0), -1).sum(dim=-1) critic_term = (grad_score * critic).view(score.size(0), -1).sum(dim=-1) penalty_term = (score ** 2).mean() self.current_losses["jacobian"] = float(jacobian_term.mean()) self.current_losses["critic"] = float(critic_term.mean()) self.current_losses["penalty"] = float(penalty_term.mean()) return (jacobian_term + critic_term).mean()
Example #10
Source File: samplers.py From torchsupport with MIT License | 6 votes |
def integrate(self, score, data, *args): done = False count = 0 step_count = self.steps if self.step > 0 else 10 * self.steps while not done: make_differentiable(data) make_differentiable(args) energy = score(data + self.noise * torch.randn_like(data), *args) if isinstance(energy, (list, tuple)): energy, *_ = energy gradient = ag.grad(energy, data, torch.ones_like(energy))[0] if self.max_norm: gradient = clip_grad_by_norm(gradient, self.max_norm) data = data - self.rate * gradient if self.clamp is not None: data = data.clamp(*self.clamp) data = data.detach() done = count >= step_count if self.target is not None: done = done and bool((energy.mean(dim=0) <= self.target).all()) count += 1 if (count + 1) % 500 == 0: data.random_() self.step += 1 return data
Example #11
Source File: samplers.py From torchsupport with MIT License | 6 votes |
def integrate(self, score, data, *args): data = data.clone() current_energy, *_ = score(data, *args) for idx in range(self.steps): make_differentiable(data) make_differentiable(args) energy = score(data, *args) if isinstance(energy, (list, tuple)): energy, *_ = energy gradient = ag.grad(energy, data.tensor, torch.ones_like(energy))[0] if self.max_norm: gradient = clip_grad_by_norm(gradient, self.max_norm) # attempt at gradient based local update of discrete variables: grad_prob = (-500 * gradient).softmax(dim=1) new_prob = self.noise + self.rate * grad_prob + (1 - self.noise - self.rate) * data.tensor new_val = hard_one_hot(new_prob.log()) data.tensor = new_val data = data.detach() return data
Example #12
Source File: gan_mnist.py From wgan-gp with MIT License | 6 votes |
def calc_gradient_penalty(netD, real_data, fake_data): #print real_data.size() alpha = torch.rand(BATCH_SIZE, 1) alpha = alpha.expand(real_data.size()) alpha = alpha.cuda(gpu) if use_cuda else alpha interpolates = alpha * real_data + ((1 - alpha) * fake_data) if use_cuda: interpolates = interpolates.cuda(gpu) interpolates = autograd.Variable(interpolates, requires_grad=True) disc_interpolates = netD(interpolates) gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=torch.ones(disc_interpolates.size()).cuda(gpu) if use_cuda else torch.ones( disc_interpolates.size()), create_graph=True, retain_graph=True, only_inputs=True)[0] gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * LAMBDA return gradient_penalty # ==================Definition End======================
Example #13
Source File: gan_language.py From wgan-gp with MIT License | 6 votes |
def calc_gradient_penalty(netD, real_data, fake_data): alpha = torch.rand(BATCH_SIZE, 1, 1) alpha = alpha.expand(real_data.size()) alpha = alpha.cuda(gpu) if use_cuda else alpha interpolates = alpha * real_data + ((1 - alpha) * fake_data) if use_cuda: interpolates = interpolates.cuda(gpu) interpolates = autograd.Variable(interpolates, requires_grad=True) disc_interpolates = netD(interpolates) # TODO: Make ConvBackward diffentiable gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=torch.ones(disc_interpolates.size()).cuda(gpu) if use_cuda else torch.ones( disc_interpolates.size()), create_graph=True, retain_graph=True, only_inputs=True)[0] gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * LAMBDA return gradient_penalty
Example #14
Source File: wgan_gp_loss.py From pggan-pytorch with MIT License | 6 votes |
def calc_gradient_penalty(D, real_data, fake_data, iwass_lambda, iwass_target): global mixing_factors, grad_outputs if mixing_factors is None or real_data.size(0) != mixing_factors.size(0): mixing_factors = torch.cuda.FloatTensor(real_data.size(0), 1) mixing_factors.uniform_() mixed_data = Variable(mul_rowwise(real_data, 1 - mixing_factors) + mul_rowwise(fake_data, mixing_factors), requires_grad=True) mixed_scores = D(mixed_data) if grad_outputs is None or mixed_scores.size(0) != grad_outputs.size(0): grad_outputs = torch.cuda.FloatTensor(mixed_scores.size()) grad_outputs.fill_(1.) gradients = grad(outputs=mixed_scores, inputs=mixed_data, grad_outputs=grad_outputs, create_graph=True, retain_graph=True, only_inputs=True)[0] gradients = gradients.view(gradients.size(0), -1) gradient_penalty = ((gradients.norm(2, dim=1) - iwass_target) ** 2) * iwass_lambda / (iwass_target ** 2) return gradient_penalty
Example #15
Source File: gan_toy.py From wgan-gp with MIT License | 6 votes |
def calc_gradient_penalty(netD, real_data, fake_data): alpha = torch.rand(BATCH_SIZE, 1) alpha = alpha.expand(real_data.size()) alpha = alpha.cuda() if use_cuda else alpha interpolates = alpha * real_data + ((1 - alpha) * fake_data) if use_cuda: interpolates = interpolates.cuda() interpolates = autograd.Variable(interpolates, requires_grad=True) disc_interpolates = netD(interpolates) gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=torch.ones(disc_interpolates.size()).cuda() if use_cuda else torch.ones( disc_interpolates.size()), create_graph=True, retain_graph=True, only_inputs=True)[0] gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * LAMBDA return gradient_penalty # ==================Definition End======================
Example #16
Source File: utils.py From tfm-franroldan-wav2pix with GNU General Public License v3.0 | 6 votes |
def compute_GP(netD, real_data, real_embed, fake_data, LAMBDA, project=False): #TODO: Should be improved!!!! Maybe using: https://github.com/EmilienDupont/wgan-gp/blob/master/training.py BATCH_SIZE = real_data.size(0) alpha = torch.rand(BATCH_SIZE, 1) alpha = alpha.expand(real_data.size()) alpha = alpha.cuda() interpolates = alpha * real_data + ((1 - alpha) * fake_data) interpolates = interpolates.cuda() interpolates = autograd.Variable(interpolates, requires_grad=True) disc_interpolates, _ = netD(interpolates, real_embed, project=project) gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=torch.ones(disc_interpolates.size()).cuda(), create_graph=True, retain_graph=True, only_inputs=True)[0] gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * LAMBDA return gradient_penalty
Example #17
Source File: stargan.py From PyTorch-GAN with MIT License | 6 votes |
def compute_gradient_penalty(D, real_samples, fake_samples): """Calculates the gradient penalty loss for WGAN GP""" # Random weight term for interpolation between real and fake samples alpha = Tensor(np.random.random((real_samples.size(0), 1, 1, 1))) # Get random interpolation between real and fake samples interpolates = (alpha * real_samples + ((1 - alpha) * fake_samples)).requires_grad_(True) d_interpolates, _ = D(interpolates) fake = Variable(Tensor(np.ones(d_interpolates.shape)), requires_grad=False) # Get gradient w.r.t. interpolates gradients = autograd.grad( outputs=d_interpolates, inputs=interpolates, grad_outputs=fake, create_graph=True, retain_graph=True, only_inputs=True, )[0] gradients = gradients.view(gradients.size(0), -1) gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() return gradient_penalty
Example #18
Source File: wgan_gp.py From PyTorch-GAN with MIT License | 6 votes |
def compute_gradient_penalty(D, real_samples, fake_samples): """Calculates the gradient penalty loss for WGAN GP""" # Random weight term for interpolation between real and fake samples alpha = Tensor(np.random.random((real_samples.size(0), 1, 1, 1))) # Get random interpolation between real and fake samples interpolates = (alpha * real_samples + ((1 - alpha) * fake_samples)).requires_grad_(True) d_interpolates = D(interpolates) fake = Variable(Tensor(real_samples.shape[0], 1).fill_(1.0), requires_grad=False) # Get gradient w.r.t. interpolates gradients = autograd.grad( outputs=d_interpolates, inputs=interpolates, grad_outputs=fake, create_graph=True, retain_graph=True, only_inputs=True, )[0] gradients = gradients.view(gradients.size(0), -1) gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() return gradient_penalty # ---------- # Training # ----------
Example #19
Source File: dualgan.py From PyTorch-GAN with MIT License | 6 votes |
def compute_gradient_penalty(D, real_samples, fake_samples): """Calculates the gradient penalty loss for WGAN GP""" # Random weight term for interpolation between real and fake samples alpha = FloatTensor(np.random.random((real_samples.size(0), 1, 1, 1))) # Get random interpolation between real and fake samples interpolates = (alpha * real_samples + ((1 - alpha) * fake_samples)).requires_grad_(True) validity = D(interpolates) fake = Variable(FloatTensor(np.ones(validity.shape)), requires_grad=False) # Get gradient w.r.t. interpolates gradients = autograd.grad( outputs=validity, inputs=interpolates, grad_outputs=fake, create_graph=True, retain_graph=True, only_inputs=True, )[0] gradients = gradients.view(gradients.size(0), -1) gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() return gradient_penalty
Example #20
Source File: models.py From GINN with Apache License 2.0 | 6 votes |
def gradient_penalty(net, real_data, fake_data, device): alpha = torch.rand(real_data.shape[0], 1) alpha = alpha.expand(real_data.size()) alpha = alpha.to(device) interpolates = alpha * real_data + ((1 - alpha) * fake_data) interpolates = interpolates.to(device) interpolates = autograd.Variable(interpolates, requires_grad=True) c_interpolates = net(interpolates) gradients = autograd.grad( outputs=c_interpolates, inputs=interpolates, grad_outputs=torch.ones(c_interpolates.size()).to(device), create_graph=True, retain_graph=True, only_inputs=True, )[0] gradients = gradients.view(gradients.size(0), -1) gp = ((gradients.norm(2, dim=1) - 1) ** 2).mean() return gp
Example #21
Source File: models.py From GINN with Apache License 2.0 | 6 votes |
def hard_gradient_penalty(net, real_data, fake_data, device): mask = torch.FloatTensor(real_data.shape).to(device).uniform_() > 0.5 inv_mask = ~mask mask, inv_mask = mask.float(), inv_mask.float() interpolates = mask * real_data + inv_mask * fake_data interpolates = interpolates.to(device) interpolates = autograd.Variable(interpolates, requires_grad=True) c_interpolates = net(interpolates) gradients = autograd.grad( outputs=c_interpolates, inputs=interpolates, grad_outputs=torch.ones(c_interpolates.size()).to(device), create_graph=True, retain_graph=True, only_inputs=True, )[0] gradients = gradients.view(gradients.size(0), -1) gp = (gradients.norm(2, dim=1) - 1).pow(2).mean() return gp
Example #22
Source File: utils.py From pytorch-arda with MIT License | 6 votes |
def calc_gradient_penalty(D, real_data, fake_data): """Calculatge gradient penalty for WGAN-GP.""" alpha = torch.rand(params.batch_size, 1) alpha = alpha.expand(real_data.size()) alpha = make_cuda(alpha) interpolates = make_variable(alpha * real_data + ((1 - alpha) * fake_data)) interpolates.requires_grad = True disc_interpolates = D(interpolates) gradients = grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=make_cuda( torch.ones(disc_interpolates.size())), create_graph=True, retain_graph=True, only_inputs=True)[0] gradient_penalty = params.penalty_lambda * \ ((gradients.norm(2, dim=1) - 1) ** 2).mean() return gradient_penalty
Example #23
Source File: base.py From madminer with MIT License | 6 votes |
def log_likelihood_and_score(self, theta, x, **kwargs): """ Calculates u(x), log p(x), and the score t(x) with a Gaussian base density """ if theta.shape[0] == 1: theta = theta.expand(x.shape[0], -1) if not theta.requires_grad: theta.requires_grad = True u, log_likelihood = self.log_likelihood(theta, x, **kwargs) score = grad( log_likelihood, theta, grad_outputs=torch.ones_like(log_likelihood.data), only_inputs=True, create_graph=True, )[0] return u, log_likelihood, score
Example #24
Source File: score.py From madminer with MIT License | 6 votes |
def forward(self, x, return_grad_x=False): # Track gradient wrt x if return_grad_x and not x.requires_grad: x.requires_grad = True # Forward pass t_hat = x for i, layer in enumerate(self.layers): if i > 0: t_hat = self.activation(t_hat) t_hat = layer(t_hat) # Calculate gradient if return_grad_x: x_gradient = grad(t_hat, x, grad_outputs=torch.ones_like(t_hat.data), only_inputs=True, create_graph=True)[ 0 ] return t_hat, x_gradient return t_hat
Example #25
Source File: functional.py From torchgan with MIT License | 6 votes |
def wasserstein_gradient_penalty(interpolate, d_interpolate, reduction="mean"): grad_outputs = torch.ones_like(d_interpolate) gradients = autograd.grad( outputs=d_interpolate, inputs=interpolate, grad_outputs=grad_outputs, create_graph=True, retain_graph=True, only_inputs=True, )[0] gradient_penalty = (gradients.norm(2) - 1) ** 2 return reduce(gradient_penalty, reduction) # Dragan Penalty
Example #26
Source File: functional.py From torchgan with MIT License | 6 votes |
def dragan_gradient_penalty(interpolate, d_interpolate, k=1.0, reduction="mean"): grad_outputs = torch.ones_like(d_interpolate) gradients = autograd.grad( outputs=d_interpolate, inputs=interpolate, grad_outputs=grad_outputs, create_graph=True, retain_graph=True, only_inputs=True, allow_unused=True, )[0] gradient_penalty = (gradients.norm(2) - k) ** 2 return reduce(gradient_penalty, reduction) # Auxiliary Classifier Loss
Example #27
Source File: nnBuildUnits.py From medSynthesisV1 with MIT License | 6 votes |
def calc_gradient_penalty(netD, real_data, fake_data): #print real_data.size() batch_size = real_data.shape[0] alpha = torch.randn(batch_size, 1,1,1) alpha = alpha.expand(real_data.size()) #alpha = alpha.cuda(gpu) if use_cuda else alpha alpha = alpha.cuda() interpolates = alpha * real_data + ((1 - alpha) * fake_data) # if use_cuda: # interpolates = interpolates.cuda(gpu) interpolates = interpolates.cuda() interpolates = autograd.Variable(interpolates, requires_grad=True) disc_interpolates = netD(interpolates) gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=torch.ones(disc_interpolates.size()).cuda(), create_graph=True, retain_graph=True, only_inputs=True)[0] gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() return gradient_penalty
Example #28
Source File: training.py From Self-Supervised-Gans-Pytorch with MIT License | 5 votes |
def _gradient_penalty(self, real_data, generated_data): batch_size = real_data.size()[0] # Calculate interpolation alpha = torch.rand(batch_size, 1, 1, 1) alpha = alpha.expand_as(real_data) if self.use_cuda: alpha = alpha.cuda() interpolated = alpha * real_data.data + (1 - alpha) * generated_data.data interpolated = Variable(interpolated, requires_grad=True) if self.use_cuda: interpolated = interpolated.cuda() # Calculate probability of interpolated examples _, prob_interpolated, _, _ = self.D(interpolated) # Calculate gradients of probabilities with respect to examples gradients = torch_grad(outputs=prob_interpolated, inputs=interpolated, grad_outputs=torch.ones(prob_interpolated.size()).cuda() if self.use_cuda else torch.ones( prob_interpolated.size()), create_graph=True, retain_graph=True)[0] # Gradients have shape (batch_size, num_channels, img_width, img_height), # so flatten to easily take norm per example in batch gradients = gradients.view(batch_size, -1) self.losses['gradient_norm'].append(gradients.norm(2, dim=1).sum().data) # Derivatives of the gradient close to 0 can cause problems because of # the square root, so manually calculate norm and add epsilon gradients_norm = torch.sqrt(torch.sum(gradients ** 2, dim=1) + 1e-12) # Return gradient penalty return self.gp_weight * ((gradients_norm - 1) ** 2).mean()
Example #29
Source File: ops.py From MLDG with MIT License | 5 votes |
def linear(inputs, weight, bias, meta_step_size=0.001, meta_loss=None, stop_gradient=False): if meta_loss is not None: if not stop_gradient: grad_weight = autograd.grad(meta_loss, weight, create_graph=True)[0] if bias is not None: grad_bias = autograd.grad(meta_loss, bias, create_graph=True)[0] bias_adapt = bias - grad_bias * meta_step_size else: bias_adapt = bias else: grad_weight = Variable(autograd.grad(meta_loss, weight, create_graph=True)[0].data, requires_grad=False) if bias is not None: grad_bias = Variable(autograd.grad(meta_loss, bias, create_graph=True)[0].data, requires_grad=False) bias_adapt = bias - grad_bias * meta_step_size else: bias_adapt = bias return F.linear(inputs, weight - grad_weight * meta_step_size, bias_adapt) else: return F.linear(inputs, weight, bias)
Example #30
Source File: ops.py From MLDG with MIT License | 5 votes |
def conv2d(inputs, weight, bias, meta_step_size=0.001, stride=1, padding=0, dilation=1, groups=1, meta_loss=None, stop_gradient=False): if meta_loss is not None: if not stop_gradient: grad_weight = autograd.grad(meta_loss, weight, create_graph=True)[0] if bias is not None: grad_bias = autograd.grad(meta_loss, bias, create_graph=True)[0] bias_adapt = bias - grad_bias * meta_step_size else: bias_adapt = bias else: grad_weight = Variable(autograd.grad(meta_loss, weight, create_graph=True)[0].data, requires_grad=False) if bias is not None: grad_bias = Variable(autograd.grad(meta_loss, bias, create_graph=True)[0].data, requires_grad=False) bias_adapt = bias - grad_bias * meta_step_size else: bias_adapt = bias return F.conv2d(inputs, weight - grad_weight * meta_step_size, bias_adapt, stride, padding, dilation, groups) else: return F.conv2d(inputs, weight, bias, stride, padding, dilation, groups)