Python torch.autograd.grad() Examples

The following are 30 code examples of torch.autograd.grad(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.autograd , or try the search function

Example #1

Source File: gradient_penalty.py From TreeGAN with MIT License

7 votes

def __call__(self, netD, real_data, fake_data):
        batch_size = real_data.size(0)

        fake_data = fake_data[:batch_size]
        
        alpha = torch.rand(batch_size, 1, 1, requires_grad=True).to(self.device)
        # randomly mix real and fake data
        interpolates = real_data + alpha * (fake_data - real_data)
        # compute output of D for interpolated input
        disc_interpolates = netD(interpolates)
        # compute gradients w.r.t the interpolated outputs
        
        gradients = grad(outputs=disc_interpolates, inputs=interpolates,
                         grad_outputs=torch.ones(disc_interpolates.size()).to(self.device),
                         create_graph=True, retain_graph=True, only_inputs=True)[0].contiguous().view(batch_size,-1)
                         
        gradient_penalty = (((gradients.norm(2, dim=1) - self.gamma) / self.gamma) ** 2).mean() * self.lambdaGP

        return gradient_penalty

Example #2

Source File: gan_cifar10.py From wgan-gp with MIT License

6 votes

def calc_gradient_penalty(netD, real_data, fake_data):
    # print "real_data: ", real_data.size(), fake_data.size()
    alpha = torch.rand(BATCH_SIZE, 1)
    alpha = alpha.expand(BATCH_SIZE, real_data.nelement()/BATCH_SIZE).contiguous().view(BATCH_SIZE, 3, 32, 32)
    alpha = alpha.cuda(gpu) if use_cuda else alpha

    interpolates = alpha * real_data + ((1 - alpha) * fake_data)

    if use_cuda:
        interpolates = interpolates.cuda(gpu)
    interpolates = autograd.Variable(interpolates, requires_grad=True)

    disc_interpolates = netD(interpolates)

    gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates,
                              grad_outputs=torch.ones(disc_interpolates.size()).cuda(gpu) if use_cuda else torch.ones(
                                  disc_interpolates.size()),
                              create_graph=True, retain_graph=True, only_inputs=True)[0]
    gradients = gradients.view(gradients.size(0), -1)

    gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * LAMBDA
    return gradient_penalty

# For generating samples

Example #3

Source File: cyclegan.py From DepthNets with MIT License

6 votes

def compute_d_norms(self, A_real_, B_real_):
        A_real = Variable(A_real_.data, requires_grad=True)
        B_real = Variable(B_real_.data, requires_grad=True)
        d_a_real = self.d_a(A_real)
        d_b_real = self.d_b(B_real)
        this_ones_dafake = torch.ones(d_a_real.size())
        this_ones_dbfake = torch.ones(d_b_real.size())
        if self.use_cuda:
            this_ones_dafake = this_ones_dafake.cuda()
            this_ones_dbfake = this_ones_dbfake.cuda()
        gradients_da = grad(outputs=d_a_real,
                            inputs=A_real,
                            grad_outputs=this_ones_dafake,
                            create_graph=True,
                            retain_graph=True,
                            only_inputs=True)[0]
        gradients_db = grad(outputs=d_b_real,
                            inputs=B_real,
                            grad_outputs=this_ones_dbfake,
                            create_graph=True,
                            retain_graph=True,
                            only_inputs=True)[0]
        gp_a = ((gradients_da.view(gradients_da.size()[0], -1).norm(2, 1) - 1) ** 2).mean()
        gp_b = ((gradients_db.view(gradients_db.size()[0], -1).norm(2, 1) - 1) ** 2).mean()
        return gp_a, gp_b

Example #4

Source File: sliced_sm.py From ncsn with GNU General Public License v3.0

6 votes

def sliced_score_matching(energy_net, samples, n_particles=1):
    dup_samples = samples.unsqueeze(0).expand(n_particles, *samples.shape).contiguous().view(-1, *samples.shape[1:])
    dup_samples.requires_grad_(True)
    vectors = torch.randn_like(dup_samples)
    vectors = vectors / torch.norm(vectors, dim=-1, keepdim=True)

    logp = -energy_net(dup_samples).sum()
    grad1 = autograd.grad(logp, dup_samples, create_graph=True)[0]
    gradv = torch.sum(grad1 * vectors)
    loss1 = torch.sum(grad1 * vectors, dim=-1) ** 2 * 0.5
    grad2 = autograd.grad(gradv, dup_samples, create_graph=True)[0]
    loss2 = torch.sum(vectors * grad2, dim=-1)

    loss1 = loss1.view(n_particles, -1).mean(dim=0)
    loss2 = loss2.view(n_particles, -1).mean(dim=0)
    loss = loss1 + loss2
    return loss.mean(), loss1.mean(), loss2.mean()

Example #5

Source File: darcy.py From pde-surrogate with MIT License

6 votes

def conv_constitutive_constraint_nonlinear_exp(input, output, sobel_filter):
    """Nonlinear extension of Darcy's law
        sigma = - exp(K * u) grad(u)

    Args:
        input: K
        output: u, sigma1, sigma2
    """
    grad_h = sobel_filter.grad_h(output[:, [0]])
    grad_v = sobel_filter.grad_v(output[:, [0]])

    sigma_h = - torch.exp(input * output[:, [0]]) * grad_h
    sigma_v = - torch.exp(input * output[:, [0]]) * grad_v

    return ((output[:, [1]] - sigma_h) ** 2 
        + (output[:, [2]] - sigma_v) ** 2).mean()

Example #6

Source File: sliced_sm.py From ncsn with GNU General Public License v3.0

6 votes

def sliced_score_matching_vr(energy_net, samples, n_particles=1):
    dup_samples = samples.unsqueeze(0).expand(n_particles, *samples.shape).contiguous().view(-1, *samples.shape[1:])
    dup_samples.requires_grad_(True)
    vectors = torch.randn_like(dup_samples)

    logp = -energy_net(dup_samples).sum()
    grad1 = autograd.grad(logp, dup_samples, create_graph=True)[0]
    loss1 = torch.sum(grad1 * grad1, dim=-1) / 2.
    gradv = torch.sum(grad1 * vectors)
    grad2 = autograd.grad(gradv, dup_samples, create_graph=True)[0]
    loss2 = torch.sum(vectors * grad2, dim=-1)

    loss1 = loss1.view(n_particles, -1).mean(dim=0)
    loss2 = loss2.view(n_particles, -1).mean(dim=0)

    loss = loss1 + loss2
    return loss.mean(), loss1.mean(), loss2.mean()

Example #7

Source File: sliced_sm.py From ncsn with GNU General Public License v3.0

6 votes

def sliced_score_estimation_vr(score_net, samples, n_particles=1):
    """
    Be careful if the shape of samples is not B x x_dim!!!!
    """
    dup_samples = samples.unsqueeze(0).expand(n_particles, *samples.shape).contiguous().view(-1, *samples.shape[1:])
    dup_samples.requires_grad_(True)
    vectors = torch.randn_like(dup_samples)

    grad1 = score_net(dup_samples)
    gradv = torch.sum(grad1 * vectors)
    grad2 = autograd.grad(gradv, dup_samples, create_graph=True)[0]

    grad1 = grad1.view(dup_samples.shape[0], -1)
    loss1 = torch.sum(grad1 * grad1, dim=-1) / 2.

    loss2 = torch.sum((vectors * grad2).view(dup_samples.shape[0], -1), dim=-1)

    loss1 = loss1.view(n_particles, -1).mean(dim=0)
    loss2 = loss2.view(n_particles, -1).mean(dim=0)

    loss = loss1 + loss2
    return loss.mean(), loss1.mean(), loss2.mean()

Example #8

Source File: utils.py From Text-to-Image-Synthesis with GNU General Public License v3.0

6 votes

def compute_GP(netD, real_data, real_embed, fake_data, LAMBDA):
        BATCH_SIZE = real_data.size(0)
        alpha = torch.rand(BATCH_SIZE, 1)
        alpha = alpha.expand(BATCH_SIZE, int(real_data.nelement() / BATCH_SIZE)).contiguous().view(BATCH_SIZE, 3, 64, 64)
        alpha = alpha.cuda()

        interpolates = alpha * real_data + ((1 - alpha) * fake_data)

        interpolates = interpolates.cuda()

        interpolates = autograd.Variable(interpolates, requires_grad=True)

        disc_interpolates, _ = netD(interpolates, real_embed)

        gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates,
                                  grad_outputs=torch.ones(disc_interpolates.size()).cuda(),
                                  create_graph=True, retain_graph=True, only_inputs=True)[0]

        gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * LAMBDA

        return gradient_penalty

Example #9

Source File: lsd.py From torchsupport with MIT License

6 votes

def energy_loss(self, data, score, critic):
    vectors = self.noise_vectors(critic)
    grad_score = ag.grad(
      score, data,
      grad_outputs=torch.ones_like(score),
      create_graph=True
    )[0]
    jacobian = ag.grad(
      critic, data,
      grad_outputs=vectors,
      create_graph=True
    )[0]
    jacobian_term = (vectors * jacobian).view(score.size(0), -1).sum(dim=-1)
    critic_term = (grad_score * critic).view(score.size(0), -1).sum(dim=-1)

    penalty_term = (score ** 2).mean()

    self.current_losses["jacobian"] = float(jacobian_term.mean())
    self.current_losses["critic"] = float(critic_term.mean())
    self.current_losses["penalty"] = float(penalty_term.mean())

    return (jacobian_term + critic_term).mean()

Example #10

Source File: samplers.py From torchsupport with MIT License

6 votes

def integrate(self, score, data, *args):
    done = False
    count = 0
    step_count = self.steps if self.step > 0 else 10 * self.steps
    while not done:
      make_differentiable(data)
      make_differentiable(args)
      energy = score(data + self.noise * torch.randn_like(data), *args)
      if isinstance(energy, (list, tuple)):
        energy, *_ = energy
      gradient = ag.grad(energy, data, torch.ones_like(energy))[0]
      if self.max_norm:
        gradient = clip_grad_by_norm(gradient, self.max_norm)
      data = data - self.rate * gradient
      if self.clamp is not None:
        data = data.clamp(*self.clamp)
      data = data.detach()
      done = count >= step_count
      if self.target is not None:
        done = done and bool((energy.mean(dim=0) <= self.target).all())
      count += 1
      if (count + 1) % 500 == 0:
        data.random_()
    self.step += 1
    return data

Example #11

Source File: samplers.py From torchsupport with MIT License

6 votes

def integrate(self, score, data, *args):
    data = data.clone()
    current_energy, *_ = score(data, *args)
    for idx in range(self.steps):
      make_differentiable(data)
      make_differentiable(args)

      energy = score(data, *args)
      if isinstance(energy, (list, tuple)):
        energy, *_ = energy

      gradient = ag.grad(energy, data.tensor, torch.ones_like(energy))[0]
      if self.max_norm:
        gradient = clip_grad_by_norm(gradient, self.max_norm)

      # attempt at gradient based local update of discrete variables:
      grad_prob = (-500 * gradient).softmax(dim=1)
      new_prob = self.noise + self.rate * grad_prob + (1 - self.noise - self.rate) * data.tensor
      new_val = hard_one_hot(new_prob.log())
      data.tensor = new_val

      data = data.detach()

    return data

Example #12

Source File: gan_mnist.py From wgan-gp with MIT License

6 votes

def calc_gradient_penalty(netD, real_data, fake_data):
    #print real_data.size()
    alpha = torch.rand(BATCH_SIZE, 1)
    alpha = alpha.expand(real_data.size())
    alpha = alpha.cuda(gpu) if use_cuda else alpha

    interpolates = alpha * real_data + ((1 - alpha) * fake_data)

    if use_cuda:
        interpolates = interpolates.cuda(gpu)
    interpolates = autograd.Variable(interpolates, requires_grad=True)

    disc_interpolates = netD(interpolates)

    gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates,
                              grad_outputs=torch.ones(disc_interpolates.size()).cuda(gpu) if use_cuda else torch.ones(
                                  disc_interpolates.size()),
                              create_graph=True, retain_graph=True, only_inputs=True)[0]

    gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * LAMBDA
    return gradient_penalty

# ==================Definition End======================

Example #13

Source File: gan_language.py From wgan-gp with MIT License

6 votes

def calc_gradient_penalty(netD, real_data, fake_data):
    alpha = torch.rand(BATCH_SIZE, 1, 1)
    alpha = alpha.expand(real_data.size())
    alpha = alpha.cuda(gpu) if use_cuda else alpha

    interpolates = alpha * real_data + ((1 - alpha) * fake_data)

    if use_cuda:
        interpolates = interpolates.cuda(gpu)
    interpolates = autograd.Variable(interpolates, requires_grad=True)

    disc_interpolates = netD(interpolates)

    # TODO: Make ConvBackward diffentiable
    gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates,
                              grad_outputs=torch.ones(disc_interpolates.size()).cuda(gpu) if use_cuda else torch.ones(
                                  disc_interpolates.size()),
                              create_graph=True, retain_graph=True, only_inputs=True)[0]

    gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * LAMBDA
    return gradient_penalty

Example #14

Source File: wgan_gp_loss.py From pggan-pytorch with MIT License

6 votes

def calc_gradient_penalty(D, real_data, fake_data, iwass_lambda, iwass_target):
    global mixing_factors, grad_outputs
    if mixing_factors is None or real_data.size(0) != mixing_factors.size(0):
        mixing_factors = torch.cuda.FloatTensor(real_data.size(0), 1)
    mixing_factors.uniform_()

    mixed_data = Variable(mul_rowwise(real_data, 1 - mixing_factors) + mul_rowwise(fake_data, mixing_factors), requires_grad=True)
    mixed_scores = D(mixed_data)
    if grad_outputs is None or mixed_scores.size(0) != grad_outputs.size(0):
        grad_outputs = torch.cuda.FloatTensor(mixed_scores.size())
        grad_outputs.fill_(1.)

    gradients = grad(outputs=mixed_scores, inputs=mixed_data,
                     grad_outputs=grad_outputs,
                     create_graph=True, retain_graph=True,
                     only_inputs=True)[0]
    gradients = gradients.view(gradients.size(0), -1)

    gradient_penalty = ((gradients.norm(2, dim=1) - iwass_target) ** 2) * iwass_lambda / (iwass_target ** 2)

    return gradient_penalty

Example #15

Source File: gan_toy.py From wgan-gp with MIT License

6 votes

def calc_gradient_penalty(netD, real_data, fake_data):
    alpha = torch.rand(BATCH_SIZE, 1)
    alpha = alpha.expand(real_data.size())
    alpha = alpha.cuda() if use_cuda else alpha

    interpolates = alpha * real_data + ((1 - alpha) * fake_data)

    if use_cuda:
        interpolates = interpolates.cuda()
    interpolates = autograd.Variable(interpolates, requires_grad=True)

    disc_interpolates = netD(interpolates)

    gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates,
                              grad_outputs=torch.ones(disc_interpolates.size()).cuda() if use_cuda else torch.ones(
                                  disc_interpolates.size()),
                              create_graph=True, retain_graph=True, only_inputs=True)[0]

    gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * LAMBDA
    return gradient_penalty

# ==================Definition End======================

Example #16

Source File: utils.py From tfm-franroldan-wav2pix with GNU General Public License v3.0

6 votes

def compute_GP(netD, real_data, real_embed, fake_data, LAMBDA, project=False):
        #TODO: Should be improved!!!! Maybe using: https://github.com/EmilienDupont/wgan-gp/blob/master/training.py
        BATCH_SIZE = real_data.size(0)
        alpha = torch.rand(BATCH_SIZE, 1)
        alpha = alpha.expand(real_data.size())
        alpha = alpha.cuda()

        interpolates = alpha * real_data + ((1 - alpha) * fake_data)

        interpolates = interpolates.cuda()

        interpolates = autograd.Variable(interpolates, requires_grad=True)
        disc_interpolates, _ = netD(interpolates, real_embed, project=project)

        gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates,
                                  grad_outputs=torch.ones(disc_interpolates.size()).cuda(),
                                  create_graph=True, retain_graph=True, only_inputs=True)[0]

        gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * LAMBDA

        return gradient_penalty

Example #17

Source File: stargan.py From PyTorch-GAN with MIT License

6 votes

def compute_gradient_penalty(D, real_samples, fake_samples):
    """Calculates the gradient penalty loss for WGAN GP"""
    # Random weight term for interpolation between real and fake samples
    alpha = Tensor(np.random.random((real_samples.size(0), 1, 1, 1)))
    # Get random interpolation between real and fake samples
    interpolates = (alpha * real_samples + ((1 - alpha) * fake_samples)).requires_grad_(True)
    d_interpolates, _ = D(interpolates)
    fake = Variable(Tensor(np.ones(d_interpolates.shape)), requires_grad=False)
    # Get gradient w.r.t. interpolates
    gradients = autograd.grad(
        outputs=d_interpolates,
        inputs=interpolates,
        grad_outputs=fake,
        create_graph=True,
        retain_graph=True,
        only_inputs=True,
    )[0]
    gradients = gradients.view(gradients.size(0), -1)
    gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean()
    return gradient_penalty

Example #18

Source File: wgan_gp.py From PyTorch-GAN with MIT License

6 votes

def compute_gradient_penalty(D, real_samples, fake_samples):
    """Calculates the gradient penalty loss for WGAN GP"""
    # Random weight term for interpolation between real and fake samples
    alpha = Tensor(np.random.random((real_samples.size(0), 1, 1, 1)))
    # Get random interpolation between real and fake samples
    interpolates = (alpha * real_samples + ((1 - alpha) * fake_samples)).requires_grad_(True)
    d_interpolates = D(interpolates)
    fake = Variable(Tensor(real_samples.shape[0], 1).fill_(1.0), requires_grad=False)
    # Get gradient w.r.t. interpolates
    gradients = autograd.grad(
        outputs=d_interpolates,
        inputs=interpolates,
        grad_outputs=fake,
        create_graph=True,
        retain_graph=True,
        only_inputs=True,
    )[0]
    gradients = gradients.view(gradients.size(0), -1)
    gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean()
    return gradient_penalty


# ----------
#  Training
# ----------

Example #19

Source File: dualgan.py From PyTorch-GAN with MIT License

6 votes

def compute_gradient_penalty(D, real_samples, fake_samples):
    """Calculates the gradient penalty loss for WGAN GP"""
    # Random weight term for interpolation between real and fake samples
    alpha = FloatTensor(np.random.random((real_samples.size(0), 1, 1, 1)))
    # Get random interpolation between real and fake samples
    interpolates = (alpha * real_samples + ((1 - alpha) * fake_samples)).requires_grad_(True)
    validity = D(interpolates)
    fake = Variable(FloatTensor(np.ones(validity.shape)), requires_grad=False)
    # Get gradient w.r.t. interpolates
    gradients = autograd.grad(
        outputs=validity,
        inputs=interpolates,
        grad_outputs=fake,
        create_graph=True,
        retain_graph=True,
        only_inputs=True,
    )[0]
    gradients = gradients.view(gradients.size(0), -1)
    gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean()
    return gradient_penalty

Example #20

Source File: models.py From GINN with Apache License 2.0

6 votes

def gradient_penalty(net, real_data, fake_data, device):

    alpha = torch.rand(real_data.shape[0], 1)
    alpha = alpha.expand(real_data.size())
    alpha = alpha.to(device)

    interpolates = alpha * real_data + ((1 - alpha) * fake_data)
    interpolates = interpolates.to(device)
    interpolates = autograd.Variable(interpolates, requires_grad=True)

    c_interpolates = net(interpolates)

    gradients = autograd.grad(
        outputs=c_interpolates,
        inputs=interpolates,
        grad_outputs=torch.ones(c_interpolates.size()).to(device),
        create_graph=True,
        retain_graph=True,
        only_inputs=True,
    )[0]

    gradients = gradients.view(gradients.size(0), -1)
    gp = ((gradients.norm(2, dim=1) - 1) ** 2).mean()
    return gp

Example #21

Source File: models.py From GINN with Apache License 2.0

6 votes

def hard_gradient_penalty(net, real_data, fake_data, device):

    mask = torch.FloatTensor(real_data.shape).to(device).uniform_() > 0.5
    inv_mask = ~mask
    mask, inv_mask = mask.float(), inv_mask.float()

    interpolates = mask * real_data + inv_mask * fake_data
    interpolates = interpolates.to(device)
    interpolates = autograd.Variable(interpolates, requires_grad=True)
    c_interpolates = net(interpolates)

    gradients = autograd.grad(
        outputs=c_interpolates,
        inputs=interpolates,
        grad_outputs=torch.ones(c_interpolates.size()).to(device),
        create_graph=True,
        retain_graph=True,
        only_inputs=True,
    )[0]

    gradients = gradients.view(gradients.size(0), -1)
    gp = (gradients.norm(2, dim=1) - 1).pow(2).mean()
    return gp

Example #22

Source File: utils.py From pytorch-arda with MIT License

6 votes

def calc_gradient_penalty(D, real_data, fake_data):
    """Calculatge gradient penalty for WGAN-GP."""
    alpha = torch.rand(params.batch_size, 1)
    alpha = alpha.expand(real_data.size())
    alpha = make_cuda(alpha)

    interpolates = make_variable(alpha * real_data + ((1 - alpha) * fake_data))
    interpolates.requires_grad = True

    disc_interpolates = D(interpolates)

    gradients = grad(outputs=disc_interpolates,
                     inputs=interpolates,
                     grad_outputs=make_cuda(
                         torch.ones(disc_interpolates.size())),
                     create_graph=True,
                     retain_graph=True,
                     only_inputs=True)[0]

    gradient_penalty = params.penalty_lambda * \
        ((gradients.norm(2, dim=1) - 1) ** 2).mean()

    return gradient_penalty

Example #23

Source File: base.py From madminer with MIT License

6 votes

def log_likelihood_and_score(self, theta, x, **kwargs):
        """ Calculates u(x), log p(x), and the score t(x) with a Gaussian base density """

        if theta.shape[0] == 1:
            theta = theta.expand(x.shape[0], -1)

        if not theta.requires_grad:
            theta.requires_grad = True

        u, log_likelihood = self.log_likelihood(theta, x, **kwargs)

        score = grad(
            log_likelihood,
            theta,
            grad_outputs=torch.ones_like(log_likelihood.data),
            only_inputs=True,
            create_graph=True,
        )[0]

        return u, log_likelihood, score

Example #24

Source File: score.py From madminer with MIT License

6 votes

def forward(self, x, return_grad_x=False):
        # Track gradient wrt x
        if return_grad_x and not x.requires_grad:
            x.requires_grad = True

        # Forward pass
        t_hat = x

        for i, layer in enumerate(self.layers):
            if i > 0:
                t_hat = self.activation(t_hat)
            t_hat = layer(t_hat)

        # Calculate gradient
        if return_grad_x:
            x_gradient = grad(t_hat, x, grad_outputs=torch.ones_like(t_hat.data), only_inputs=True, create_graph=True)[
                0
            ]

            return t_hat, x_gradient

        return t_hat

Example #25

Source File: functional.py From torchgan with MIT License

6 votes

def wasserstein_gradient_penalty(interpolate, d_interpolate, reduction="mean"):
    grad_outputs = torch.ones_like(d_interpolate)
    gradients = autograd.grad(
        outputs=d_interpolate,
        inputs=interpolate,
        grad_outputs=grad_outputs,
        create_graph=True,
        retain_graph=True,
        only_inputs=True,
    )[0]

    gradient_penalty = (gradients.norm(2) - 1) ** 2
    return reduce(gradient_penalty, reduction)


# Dragan Penalty

Example #26

Source File: functional.py From torchgan with MIT License

6 votes

def dragan_gradient_penalty(interpolate, d_interpolate, k=1.0, reduction="mean"):
    grad_outputs = torch.ones_like(d_interpolate)
    gradients = autograd.grad(
        outputs=d_interpolate,
        inputs=interpolate,
        grad_outputs=grad_outputs,
        create_graph=True,
        retain_graph=True,
        only_inputs=True,
        allow_unused=True,
    )[0]

    gradient_penalty = (gradients.norm(2) - k) ** 2
    return reduce(gradient_penalty, reduction)


# Auxiliary Classifier Loss

Example #27

Source File: nnBuildUnits.py From medSynthesisV1 with MIT License

6 votes

def calc_gradient_penalty(netD, real_data, fake_data):
    #print real_data.size()
    batch_size = real_data.shape[0]
    alpha = torch.randn(batch_size, 1,1,1)
    alpha = alpha.expand(real_data.size())
    #alpha = alpha.cuda(gpu) if use_cuda else alpha
    alpha = alpha.cuda()
    
    interpolates = alpha * real_data + ((1 - alpha) * fake_data)

#     if use_cuda:
#         interpolates = interpolates.cuda(gpu)
    interpolates = interpolates.cuda()
    interpolates = autograd.Variable(interpolates, requires_grad=True)

    disc_interpolates = netD(interpolates)

    gradients = autograd.grad(outputs=disc_interpolates, inputs=interpolates,
                              grad_outputs=torch.ones(disc_interpolates.size()).cuda(),
                              create_graph=True, retain_graph=True, only_inputs=True)[0]

    gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean()
    return gradient_penalty

Example #28

Source File: training.py From Self-Supervised-Gans-Pytorch with MIT License

5 votes

def _gradient_penalty(self, real_data, generated_data):
        batch_size = real_data.size()[0]

        # Calculate interpolation
        alpha = torch.rand(batch_size, 1, 1, 1)
        alpha = alpha.expand_as(real_data)
        if self.use_cuda:
            alpha = alpha.cuda()
        interpolated = alpha * real_data.data + (1 - alpha) * generated_data.data
        interpolated = Variable(interpolated, requires_grad=True)
        if self.use_cuda:
            interpolated = interpolated.cuda()

        # Calculate probability of interpolated examples
        _, prob_interpolated, _, _ = self.D(interpolated)

        # Calculate gradients of probabilities with respect to examples
        gradients = torch_grad(outputs=prob_interpolated, inputs=interpolated,
                               grad_outputs=torch.ones(prob_interpolated.size()).cuda() if self.use_cuda else torch.ones(
                               prob_interpolated.size()),
                               create_graph=True, retain_graph=True)[0]

        # Gradients have shape (batch_size, num_channels, img_width, img_height),
        # so flatten to easily take norm per example in batch
        gradients = gradients.view(batch_size, -1)
        self.losses['gradient_norm'].append(gradients.norm(2, dim=1).sum().data)

        # Derivatives of the gradient close to 0 can cause problems because of
        # the square root, so manually calculate norm and add epsilon
        gradients_norm = torch.sqrt(torch.sum(gradients ** 2, dim=1) + 1e-12)

        # Return gradient penalty
        return self.gp_weight * ((gradients_norm - 1) ** 2).mean()

Example #29

Source File: ops.py From MLDG with MIT License

5 votes

def linear(inputs, weight, bias, meta_step_size=0.001, meta_loss=None, stop_gradient=False):
    if meta_loss is not None:

        if not stop_gradient:
            grad_weight = autograd.grad(meta_loss, weight, create_graph=True)[0]

            if bias is not None:
                grad_bias = autograd.grad(meta_loss, bias, create_graph=True)[0]
                bias_adapt = bias - grad_bias * meta_step_size
            else:
                bias_adapt = bias

        else:
            grad_weight = Variable(autograd.grad(meta_loss, weight, create_graph=True)[0].data, requires_grad=False)

            if bias is not None:
                grad_bias = Variable(autograd.grad(meta_loss, bias, create_graph=True)[0].data, requires_grad=False)
                bias_adapt = bias - grad_bias * meta_step_size
            else:
                bias_adapt = bias

        return F.linear(inputs,
                        weight - grad_weight * meta_step_size,
                        bias_adapt)
    else:
        return F.linear(inputs, weight, bias)

Example #30

Source File: ops.py From MLDG with MIT License

5 votes

def conv2d(inputs, weight, bias, meta_step_size=0.001, stride=1, padding=0, dilation=1, groups=1, meta_loss=None,
           stop_gradient=False):
    if meta_loss is not None:

        if not stop_gradient:
            grad_weight = autograd.grad(meta_loss, weight, create_graph=True)[0]

            if bias is not None:
                grad_bias = autograd.grad(meta_loss, bias, create_graph=True)[0]
                bias_adapt = bias - grad_bias * meta_step_size
            else:
                bias_adapt = bias

        else:
            grad_weight = Variable(autograd.grad(meta_loss, weight, create_graph=True)[0].data,
                                   requires_grad=False)
            if bias is not None:
                grad_bias = Variable(autograd.grad(meta_loss, bias, create_graph=True)[0].data, requires_grad=False)
                bias_adapt = bias - grad_bias * meta_step_size
            else:
                bias_adapt = bias

        return F.conv2d(inputs,
                        weight - grad_weight * meta_step_size,
                        bias_adapt, stride,
                        padding,
                        dilation, groups)
    else:
        return F.conv2d(inputs, weight, bias, stride, padding, dilation, groups)