Python torch.symeig() Examples

The following are 22 code examples of torch.symeig(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch , or try the search function .
Example #1
Source File: autograd_hacks.py    From autograd-hacks with The Unlicense 6 votes vote down vote up
def symsqrt(a, cond=None, return_rank=False, dtype=torch.float32):
    """Symmetric square root of a positive semi-definite matrix.
    See https://github.com/pytorch/pytorch/issues/25481"""

    s, u = torch.symeig(a, eigenvectors=True)
    cond_dict = {torch.float32: 1e3 * 1.1920929e-07, torch.float64: 1E6 * 2.220446049250313e-16}

    if cond in [None, -1]:
        cond = cond_dict[dtype]

    above_cutoff = (abs(s) > cond * torch.max(abs(s)))

    psigma_diag = torch.sqrt(s[above_cutoff])
    u = u[:, above_cutoff]

    B = u @ torch.diag(psigma_diag) @ u.t()
    if return_rank:
        return B, len(psigma_diag)
    else:
        return B 
Example #2
Source File: derivatives.py    From tntorch with GNU Lesser General Public License v3.0 6 votes vote down vote up
def active_subspace(t):
    """
    Compute the main variational directions of a tensor.

    Reference: P. Constantine et al. `"Discovering an Active Subspace in a Single-Diode Solar Cell Model" (2017) <https://arxiv.org/pdf/1406.7607.pdf>`_

    See also P. Constantine's `data set repository <https://github.com/paulcon/as-data-sets/blob/master/README.md>`_.

    :param t: input tensor
    :return: (eigvals, eigvecs): an array and a matrix, encoding the eigenpairs in descending order
    """

    grad = tn.gradient(t, dim='all')

    M = torch.zeros(t.dim(), t.dim())
    for i in range(t.dim()):
        for j in range(i, t.dim()):
            M[i, j] = tn.dot(grad[i], grad[j]) / t.size
            M[j, i] = M[i, j]

    w, v = torch.symeig(M, eigenvectors=True)
    idx = range(t.dim()-1, -1, -1)
    w = w[idx]
    v = v[:, idx]
    return w, v 
Example #3
Source File: test_power.py    From tensorgrad with Apache License 2.0 6 votes vote down vote up
def test_forward():
    torch.manual_seed(42)
    N = 100
    tol = 1E-8
    dtype = torch.float64
    A = torch.randn(N, N, dtype=dtype)
    A = A+A.t()

    w, v = torch.symeig(A, eigenvectors=True)
    idx = torch.argmax(w.abs())

    v_exact = v[:, idx] 
    v_exact = v_exact[0].sign() * v_exact
    
    x0 = torch.rand(N, dtype=dtype)
    x0 = x0/x0.norm()
    x = FixedPoint.apply(A, x0, tol)

    assert(torch.allclose(v_exact, x, rtol=tol, atol=tol)) 
Example #4
Source File: lanczos.py    From gpytorch with MIT License 6 votes vote down vote up
def lanczos_tridiag_to_diag(t_mat):
    """
    Given a num_init_vecs x num_batch x k x k tridiagonal matrix t_mat,
    returns a num_init_vecs x num_batch x k set of eigenvalues
    and a num_init_vecs x num_batch x k x k set of eigenvectors.

    TODO: make the eigenvalue computations done in batch mode.
    """
    orig_device = t_mat.device
    if t_mat.size(-1) < 32:
        retr = torch.symeig(t_mat.cpu(), eigenvectors=True)
    else:
        retr = torch.symeig(t_mat, eigenvectors=True)

    evals, evecs = retr
    mask = evals.ge(0)
    evecs = evecs * mask.type_as(evecs).unsqueeze(-2)
    evals = evals.masked_fill_(~mask, 1)

    return evals.to(orig_device), evecs.to(orig_device) 
Example #5
Source File: gdc.py    From pytorch_geometric with MIT License 6 votes vote down vote up
def __expm__(self, matrix, symmetric):
        r"""Calculates matrix exponential.

        Args:
            matrix (Tensor): Matrix to take exponential of.
            symmetric (bool): Specifies whether the matrix is symmetric.

        :rtype: (:class:`Tensor`)
        """
        if symmetric:
            e, V = torch.symeig(matrix, eigenvectors=True)
            diff_mat = V @ torch.diag(e.exp()) @ V.t()
        else:
            diff_mat_np = expm(matrix.cpu().numpy())
            diff_mat = torch.Tensor(diff_mat_np).to(matrix.device)
        return diff_mat 
Example #6
Source File: kfac_full_pruner.py    From EigenDamage-Pytorch with MIT License 5 votes vote down vote up
def _update_inv(self):
            assert self.steps > 0, 'At least one step before update inverse!'
            eps = 1e-15
            for idx, m in enumerate(self.modules):
                # m_aa, m_gg = normalize_factors(self.m_aa[m], self.m_gg[m])
                m_aa, m_gg = self.m_aa[m], self.m_gg[m]
                self.d_a[m], self.Q_a[m] = torch.symeig(m_aa / self.steps, eigenvectors=True)
                self.d_g[m], self.Q_g[m] = torch.symeig(m_gg / self.steps, eigenvectors=True)
                self.d_a[m].mul_((self.d_a[m] > eps).float())
                self.d_g[m].mul_((self.d_g[m] > eps).float())

            self._inversed = True
            self.iter += 1 
Example #7
Source File: kfac_eigen_pruner.py    From EigenDamage-Pytorch with MIT License 5 votes vote down vote up
def _update_inv(self):
        assert self.steps > 0, 'At least one step before update inverse!'
        eps = 1e-10
        for idx, m in enumerate(self.modules):
            m_aa, m_gg = self.m_aa[m], self.m_gg[m]
            self.d_a[m], Q_a = torch.symeig(m_aa, eigenvectors=True)
            self.d_g[m], Q_g = torch.symeig(m_gg, eigenvectors=True)

            self.d_a[m].mul_((self.d_a[m] > eps).float())
            self.d_g[m].mul_((self.d_g[m] > eps).float())

            # == write summary ==
            name = m.__class__.__name__
            eigs = (self.d_g[m].view(-1, 1) @ self.d_a[m].view(1, -1)).view(-1).cpu().data.numpy()
            self.writer.add_histogram('eigen/%s_%d' % (name, idx), eigs, self.iter)

            if self.Q_a.get(m, None) is None:
                # print('(%d)Q_a %s is None.' % (idx, m))
                self.Q_a[m] = [Q_a]  # absorb the eigen basis
            else:
                # self.Q_a[m] = [Q_a, self.Q_a[m]]
                prev_Q_a, prev_Q_g = get_rotation_layer_weights(self.model, m)
                prev_Q_a = prev_Q_a.view(prev_Q_a.size(0), prev_Q_a.size(1)).transpose(1, 0)
                prev_Q_g = prev_Q_g.view(prev_Q_g.size(0), prev_Q_g.size(1))
                self.Q_a[m] = [Q_a, prev_Q_a]

            if self.Q_g.get(m, None) is None:
                self.Q_g[m] = [Q_g]
            else:
                self.Q_g[m] = [Q_g, prev_Q_g]
        self._inversed = True
        self.iter += 1 
Example #8
Source File: test_added_diag_lazy_tensor.py    From gpytorch with MIT License 5 votes vote down vote up
def test_precond_solve(self):
        seed = 4
        torch.random.manual_seed(seed)

        tensor = torch.randn(1000, 800)
        diag = torch.abs(torch.randn(1000))

        standard_lt = AddedDiagLazyTensor(RootLazyTensor(tensor), DiagLazyTensor(diag))
        evals, evecs = torch.symeig(standard_lt.evaluate(), eigenvectors=True)

        # this preconditioner is a simple example of near deflation
        def nonstandard_preconditioner(self):
            top_100_evecs = evecs[:, :100]
            top_100_evals = evals[:100] + 0.2 * torch.randn(100)

            precond_lt = RootLazyTensor(top_100_evecs @ torch.diag(top_100_evals ** 0.5))
            logdet = top_100_evals.log().sum()

            def precond_closure(rhs):
                rhs2 = top_100_evecs.t() @ rhs
                return top_100_evecs @ torch.diag(1.0 / top_100_evals) @ rhs2

            return precond_closure, precond_lt, logdet

        overrode_lt = AddedDiagLazyTensor(
            RootLazyTensor(tensor), DiagLazyTensor(diag), preconditioner_override=nonstandard_preconditioner
        )

        # compute a solve - mostly to make sure that we can actually perform the solve
        rhs = torch.randn(1000, 1)
        standard_solve = standard_lt.inv_matmul(rhs)
        overrode_solve = overrode_lt.inv_matmul(rhs)

        # gut checking that our preconditioner is not breaking anything
        self.assertEqual(standard_solve.shape, overrode_solve.shape)
        self.assertLess(torch.norm(standard_solve - overrode_solve) / standard_solve.norm(), 1.0) 
Example #9
Source File: util.py    From CrypTen with MIT License 5 votes vote down vote up
def pca(data, components):
    """
    Finds the `components` top principal components of the data.
    """
    assert components > 0 and components < data.size(1), "incorrect # of PCA dimensions"
    # We switch to numpy here as torch.symeig gave strange results.
    dtype = data.dtype
    data = data.numpy()
    data -= np.mean(data, axis=0, keepdims=True)
    cov = np.cov(data.T)
    L, V = nla.eigh(cov)
    return torch.tensor(V[:, -components:], dtype=dtype) 
Example #10
Source File: eigh.py    From tensorgrad with Apache License 2.0 5 votes vote down vote up
def forward(self, A):
        w, v = torch.symeig(A, eigenvectors=True)

        self.save_for_backward(w, v)
        return w, v 
Example #11
Source File: eigenmt.py    From tensorqtl with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def compute_tests(genotypes_t, var_thresh=0.99, variant_window=200):
    """determine effective number of independent variants (M_eff)"""
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # break into windows
    windows = torch.split(genotypes_t, variant_window)

    if len(windows)>1:
        shrunk_cov_t, shrinkage_t = lw_shrink(torch.transpose(torch.stack(windows[:-1]), 1, 2))

        n_samples, n_features = windows[0].T.shape
        # indices of diagonals
        ix = torch.LongTensor(np.array([np.arange(0, n_features**2, n_features+1)+i*n_features**2 for i in range(shrunk_cov_t.shape[0])])).to(device)
        shrunk_precision_t = torch.zeros(shrunk_cov_t.shape).to(device)
        shrunk_precision_t.view(-1)[ix] = shrunk_cov_t.view(-1)[ix].pow(-0.5)
        shrunk_cor_t = torch.matmul(torch.matmul(shrunk_precision_t, shrunk_cov_t), shrunk_precision_t)
        eigenvalues_t,_ = torch.symeig(shrunk_cor_t, eigenvectors=False)

    # last window
    shrunk_cov0_t, shrinkage0_t = lw_shrink(windows[-1].t())
    shrunk_precision0_t = torch.diag(torch.diag(shrunk_cov0_t).pow(-0.5))
    shrunk_cor0_t = torch.mm(torch.mm(shrunk_precision0_t, shrunk_cov0_t), shrunk_precision0_t)
    eigenvalues0_t,_ = torch.symeig(shrunk_cor0_t, eigenvectors=False)

    if len(windows)>1:
        eigenvalues = list(eigenvalues_t.cpu().numpy())
        eigenvalues.append(eigenvalues0_t.cpu().numpy())
    else:
        eigenvalues = [eigenvalues0_t.cpu().numpy()]

    m_eff = 0
    for ev,m in zip(eigenvalues, [i.shape[0] for i in windows]):
        ev[ev < 0] = 0
        m_eff += find_num_eigs(ev, m, var_thresh=var_thresh)

    return m_eff 
Example #12
Source File: qmc.py    From botorch with MIT License 5 votes vote down vote up
def __init__(
        self,
        mean: Tensor,
        cov: Tensor,
        seed: Optional[int] = None,
        inv_transform: bool = False,
    ) -> None:
        r"""Engine for qMC sampling from a multivariate Normal `N(\mu, \Sigma)`.

        Args:
            mean: The mean vector.
            cov: The covariance matrix.
            seed: The seed with which to seed the random number generator of the
                underlying SobolEngine.
            inv_transform: If True, use inverse transform instead of Box-Muller.
        """
        # validate inputs
        if not cov.shape[0] == cov.shape[1]:
            raise ValueError("Covariance matrix is not square.")
        if not mean.shape[0] == cov.shape[0]:
            raise ValueError("Dimension mismatch between mean and covariance.")
        if not torch.allclose(cov, cov.transpose(-1, -2)):
            raise ValueError("Covariance matrix is not symmetric.")
        self._mean = mean
        self._normal_engine = NormalQMCEngine(
            d=mean.shape[0], seed=seed, inv_transform=inv_transform
        )
        # compute Cholesky decomp; if it fails, do the eigendecomposition
        try:
            self._corr_matrix = torch.cholesky(cov).transpose(-1, -2)
        except RuntimeError:
            eigval, eigvec = torch.symeig(cov, eigenvectors=True)
            if not torch.all(eigval >= -1e-8):
                raise ValueError("Covariance matrix not PSD.")
            eigval_root = eigval.clamp_min(0.0).sqrt()
            self._corr_matrix = (eigvec * eigval_root).transpose(-1, -2) 
Example #13
Source File: ekfac.py    From EKFAC-pytorch with MIT License 5 votes vote down vote up
def _compute_kfe(self, group, state):
        """Computes the covariances."""
        mod = group['mod']
        x = self.state[group['mod']]['x']
        gy = self.state[group['mod']]['gy']
        # Computation of xxt
        if group['layer_type'] == 'Conv2d':
            if not self.sua:
                x = F.conv2d(x, group['gathering_filter'],
                             stride=mod.stride, padding=mod.padding,
                             groups=mod.in_channels)
            x = x.data.permute(1, 0, 2, 3).contiguous().view(x.shape[1], -1)
        else:
            x = x.data.t()
        if mod.bias is not None:
            ones = torch.ones_like(x[:1])
            x = torch.cat([x, ones], dim=0)
        xxt = torch.mm(x, x.t()) / float(x.shape[1])
        Ex, state['kfe_x'] = torch.symeig(xxt, eigenvectors=True)
        # Computation of ggt
        if group['layer_type'] == 'Conv2d':
            gy = gy.data.permute(1, 0, 2, 3)
            state['num_locations'] = gy.shape[2] * gy.shape[3]
            gy = gy.contiguous().view(gy.shape[0], -1)
        else:
            gy = gy.data.t()
            state['num_locations'] = 1
        ggt = torch.mm(gy, gy.t()) / float(gy.shape[1])
        Eg, state['kfe_gy'] = torch.symeig(ggt, eigenvectors=True)
        state['m2'] = Eg.unsqueeze(1) * Ex.unsqueeze(0) * state['num_locations']
        if group['layer_type'] == 'Conv2d' and self.sua:
            ws = group['params'][0].grad.data.size()
            state['m2'] = state['m2'].view(Eg.size(0), Ex.size(0), 1, 1).expand(-1, -1, ws[2], ws[3]) 
Example #14
Source File: kfac.py    From midlevel-reps with MIT License 4 votes vote down vote up
def step(self):
        # Add weight decay
        if self.weight_decay > 0:
            for p in self.model.parameters():
                p.grad.data.add_(self.weight_decay, p.data)

        updates = {}
        for i, m in enumerate(self.modules):
            assert len(list(m.parameters())
                       ) == 1, "Can handle only one parameter at the moment"
            classname = m.__class__.__name__
            p = next(m.parameters())

            la = self.damping + self.weight_decay

            if self.steps % self.Tf == 0:
                # My asynchronous implementation exists, I will add it later.
                # Experimenting with different ways to this in PyTorch.
                self.d_a[m], self.Q_a[m] = torch.symeig(
                    self.m_aa[m], eigenvectors=True)
                self.d_g[m], self.Q_g[m] = torch.symeig(
                    self.m_gg[m], eigenvectors=True)

                self.d_a[m].mul_((self.d_a[m] > 1e-6).float())
                self.d_g[m].mul_((self.d_g[m] > 1e-6).float())

            if classname == 'Conv2d':
                p_grad_mat = p.grad.data.view(p.grad.data.size(0), -1)
            else:
                p_grad_mat = p.grad.data

            v1 = self.Q_g[m].t() @ p_grad_mat @ self.Q_a[m]
            v2 = v1 / (
                self.d_g[m].unsqueeze(1) * self.d_a[m].unsqueeze(0) + la)
            v = self.Q_g[m] @ v2 @ self.Q_a[m].t()

            v = v.view(p.grad.data.size())
            updates[p] = v

        vg_sum = 0
        for p in self.model.parameters():
            v = updates[p]
            vg_sum += (v * p.grad.data * self.lr * self.lr).sum()

        nu = min(1, math.sqrt(self.kl_clip / vg_sum))

        for p in self.model.parameters():
            v = updates[p]
            p.grad.data.copy_(v)
            p.grad.data.mul_(nu)

        self.optim.step()
        self.steps += 1 
Example #15
Source File: kfac.py    From Deep-Reinforcement-Learning-Hands-On with MIT License 4 votes vote down vote up
def step(self):
        # Add weight decay
        if self.weight_decay > 0:
            for p in self.model.parameters():
                p.grad.data.add_(self.weight_decay, p.data)

        updates = {}
        for i, m in enumerate(self.modules):
            assert len(list(m.parameters())
                       ) == 1, "Can handle only one parameter at the moment"
            classname = m.__class__.__name__
            p = next(m.parameters())

            la = self.damping + self.weight_decay

            if self.steps % self.Tf == 0:
                # My asynchronous implementation exists, I will add it later.
                # Experimenting with different ways to this in PyTorch.
                self.d_a[m], self.Q_a[m] = torch.symeig(
                    self.m_aa[m], eigenvectors=True)
                self.d_g[m], self.Q_g[m] = torch.symeig(
                    self.m_gg[m], eigenvectors=True)

                self.d_a[m].mul_((self.d_a[m] > 1e-6).float())
                self.d_g[m].mul_((self.d_g[m] > 1e-6).float())

            if classname == 'Conv2d':
                p_grad_mat = p.grad.data.view(p.grad.data.size(0), -1)
            else:
                p_grad_mat = p.grad.data

            v1 = self.Q_g[m].t() @ p_grad_mat @ self.Q_a[m]
            v2 = v1 / (
                self.d_g[m].unsqueeze(1) * self.d_a[m].unsqueeze(0) + la)
            v = self.Q_g[m] @ v2 @ self.Q_a[m].t()

            v = v.view(p.grad.data.size())
            updates[p] = v

        vg_sum = 0
        for p in self.model.parameters():
            if p not in updates:
#                print("Not found in updates: %s" % p)
                continue
            v = updates[p]
            vg_sum += (v * p.grad.data * self.lr * self.lr).sum()

        nu = min(1, math.sqrt(self.kl_clip / vg_sum))

        for p in self.model.parameters():
            if p not in updates:
#                print("Not found in updates: %s" % p)
                continue
            v = updates[p]
            p.grad.data.copy_(v)
            p.grad.data.mul_(nu)

        self.optim.step()
        self.steps += 1 
Example #16
Source File: kfac.py    From carla-rl with MIT License 4 votes vote down vote up
def step(self):
        # Add weight decay
        if self.weight_decay > 0:
            for p in self.model.parameters():
                p.grad.data.add_(self.weight_decay, p.data)

        updates = {}
        for i, m in enumerate(self.modules):
            assert len(list(m.parameters())
                       ) == 1, "Can handle only one parameter at the moment"
            classname = m.__class__.__name__
            p = next(m.parameters())

            la = self.damping + self.weight_decay

            if self.steps % self.Tf == 0:
                # My asynchronous implementation exists, I will add it later.
                # Experimenting with different ways to this in PyTorch.
                self.d_a[m], self.Q_a[m] = torch.symeig(
                    self.m_aa[m], eigenvectors=True)
                self.d_g[m], self.Q_g[m] = torch.symeig(
                    self.m_gg[m], eigenvectors=True)

                self.d_a[m].mul_((self.d_a[m] > 1e-6).float())
                self.d_g[m].mul_((self.d_g[m] > 1e-6).float())

            if classname == 'Conv2d':
                p_grad_mat = p.grad.data.view(p.grad.data.size(0), -1)
            else:
                p_grad_mat = p.grad.data

            v1 = self.Q_g[m].t() @ p_grad_mat @ self.Q_a[m]
            v2 = v1 / (
                self.d_g[m].unsqueeze(1) * self.d_a[m].unsqueeze(0) + la)
            v = self.Q_g[m] @ v2 @ self.Q_a[m].t()

            v = v.view(p.grad.data.size())
            updates[p] = v

        vg_sum = 0
        for p in self.model.parameters():
            v = updates[p]
            vg_sum += (v * p.grad.data * self.lr * self.lr).sum()

        nu = min(1, math.sqrt(self.kl_clip / vg_sum))

        for p in self.model.parameters():
            v = updates[p]
            p.grad.data.copy_(v)
            p.grad.data.mul_(nu)

        self.optim.step()
        self.steps += 1 
Example #17
Source File: PCACell.py    From EchoTorch with GNU General Public License v3.0 4 votes vote down vote up
def finalize(self):
        """
        Finalize training with LU factorization or Pseudo-inverse
        """
        # Reshape average
        xTx, avg, tlen = self._fix(self.xTx, self.xTx_avg, self.tlen)

        # Reshape
        self.avg = avg.unsqueeze(0)

        # We need more observations than variables
        if self.tlen < self.input_dim:
            raise Exception(u"The number of observations ({}) is larger than  the number of input variables ({})".format(self.tlen, self.input_dim))
        # end if

        # Total variance
        total_var = torch.diag(xTx).sum()

        # Compute and sort eigenvalues
        d, v = torch.symeig(xTx, eigenvectors=True)

        # Check for negative eigenvalues
        if float(d.min()) < 0:
            # raise Exception(u"Got negative eigenvalues ({}). You may either set output_dim to be smaller".format(d))
            pass
        # end if

        # Indexes
        indexes = range(d.size(0)-1, -1, -1)

        # Sort by descending order
        d = torch.take(d, Variable(torch.LongTensor(indexes)))
        v = v[:, indexes]

        # Explained covariance
        self.explained_variance = torch.sum(d) / total_var

        # Store eigenvalues
        self.d = d[:self.output_dim]

        # Store eigenvectors
        self.v = v[:, :self.output_dim]

        # Total variance
        self.total_variance = total_var

        # Stop training
        self.train(False)
    # end finalize

    # Get explained variance 
Example #18
Source File: GDPP_loss.py    From pytorch_GAN_zoo with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def GDPPLoss(phiFake, phiReal, backward=True):
    r"""
    Implementation of the GDPP loss. Can be used with any kind of GAN
    architecture.

    Args:

        phiFake (tensor) : last feature layer of the discriminator on real data
        phiReal (tensor) : last feature layer of the discriminator on fake data
        backward (bool)  : should we perform the backward operation ?

    Returns:

        Loss's value. The backward operation in performed within this operator
    """
    def compute_diversity(phi):
        phi = F.normalize(phi, p=2, dim=1)
        SB = torch.mm(phi, phi.t())
        eigVals, eigVecs = torch.symeig(SB, eigenvectors=True)
        return eigVals, eigVecs

    def normalize_min_max(eigVals):
        minV, maxV = torch.min(eigVals), torch.max(eigVals)
        if abs(minV - maxV) < 1e-10:
            return eigVals
        return (eigVals - minV) / (maxV - minV)

    fakeEigVals, fakeEigVecs = compute_diversity(phiFake)
    realEigVals, realEigVecs = compute_diversity(phiReal)

    # Scaling factor to make the two losses operating in comparable ranges.
    magnitudeLoss = 0.0001 * F.mse_loss(target=realEigVals, input=fakeEigVals)
    structureLoss = -torch.sum(torch.mul(fakeEigVecs, realEigVecs), 0)
    normalizedRealEigVals = normalize_min_max(realEigVals)
    weightedStructureLoss = torch.sum(
        torch.mul(normalizedRealEigVals, structureLoss))
    gdppLoss = magnitudeLoss + weightedStructureLoss

    if backward:
        gdppLoss.backward(retain_graph=True)

    return gdppLoss.item() 
Example #19
Source File: kfac.py    From gym-miniworld with Apache License 2.0 4 votes vote down vote up
def step(self):
        # Add weight decay
        if self.weight_decay > 0:
            for p in self.model.parameters():
                p.grad.data.add_(self.weight_decay, p.data)

        updates = {}
        for i, m in enumerate(self.modules):
            assert len(list(m.parameters())
                       ) == 1, "Can handle only one parameter at the moment"
            classname = m.__class__.__name__
            p = next(m.parameters())

            la = self.damping + self.weight_decay

            if self.steps % self.Tf == 0:
                # My asynchronous implementation exists, I will add it later.
                # Experimenting with different ways to this in PyTorch.
                self.d_a[m], self.Q_a[m] = torch.symeig(
                    self.m_aa[m], eigenvectors=True)
                self.d_g[m], self.Q_g[m] = torch.symeig(
                    self.m_gg[m], eigenvectors=True)

                self.d_a[m].mul_((self.d_a[m] > 1e-6).float())
                self.d_g[m].mul_((self.d_g[m] > 1e-6).float())

            if classname == 'Conv2d':
                p_grad_mat = p.grad.data.view(p.grad.data.size(0), -1)
            else:
                p_grad_mat = p.grad.data

            v1 = self.Q_g[m].t() @ p_grad_mat @ self.Q_a[m]
            v2 = v1 / (
                self.d_g[m].unsqueeze(1) * self.d_a[m].unsqueeze(0) + la)
            v = self.Q_g[m] @ v2 @ self.Q_a[m].t()

            v = v.view(p.grad.data.size())
            updates[p] = v

        vg_sum = 0
        for p in self.model.parameters():
            v = updates[p]
            vg_sum += (v * p.grad.data * self.lr * self.lr).sum()

        nu = min(1, math.sqrt(self.kl_clip / vg_sum))

        for p in self.model.parameters():
            v = updates[p]
            p.grad.data.copy_(v)
            p.grad.data.mul_(nu)

        self.optim.step()
        self.steps += 1 
Example #20
Source File: kfac.py    From pytorch-a2c-ppo-acktr-gail with MIT License 4 votes vote down vote up
def step(self):
        # Add weight decay
        if self.weight_decay > 0:
            for p in self.model.parameters():
                p.grad.data.add_(self.weight_decay, p.data)

        updates = {}
        for i, m in enumerate(self.modules):
            assert len(list(m.parameters())
                       ) == 1, "Can handle only one parameter at the moment"
            classname = m.__class__.__name__
            p = next(m.parameters())

            la = self.damping + self.weight_decay

            if self.steps % self.Tf == 0:
                # My asynchronous implementation exists, I will add it later.
                # Experimenting with different ways to this in PyTorch.
                self.d_a[m], self.Q_a[m] = torch.symeig(
                    self.m_aa[m], eigenvectors=True)
                self.d_g[m], self.Q_g[m] = torch.symeig(
                    self.m_gg[m], eigenvectors=True)

                self.d_a[m].mul_((self.d_a[m] > 1e-6).float())
                self.d_g[m].mul_((self.d_g[m] > 1e-6).float())

            if classname == 'Conv2d':
                p_grad_mat = p.grad.data.view(p.grad.data.size(0), -1)
            else:
                p_grad_mat = p.grad.data

            v1 = self.Q_g[m].t() @ p_grad_mat @ self.Q_a[m]
            v2 = v1 / (
                self.d_g[m].unsqueeze(1) * self.d_a[m].unsqueeze(0) + la)
            v = self.Q_g[m] @ v2 @ self.Q_a[m].t()

            v = v.view(p.grad.data.size())
            updates[p] = v

        vg_sum = 0
        for p in self.model.parameters():
            v = updates[p]
            vg_sum += (v * p.grad.data * self.lr * self.lr).sum()

        nu = min(1, math.sqrt(self.kl_clip / vg_sum))

        for p in self.model.parameters():
            v = updates[p]
            p.grad.data.copy_(v)
            p.grad.data.mul_(nu)

        self.optim.step()
        self.steps += 1 
Example #21
Source File: kfac.py    From dal with MIT License 4 votes vote down vote up
def step(self):
        # Add weight decay
        if self.weight_decay > 0:
            for p in self.model.parameters():
                p.grad.data.add_(self.weight_decay, p.data)

        updates = {}
        for i, m in enumerate(self.modules):
            assert len(list(m.parameters())
                       ) == 1, "Can handle only one parameter at the moment"
            classname = m.__class__.__name__
            p = next(m.parameters())

            la = self.damping + self.weight_decay

            if self.steps % self.Tf == 0:
                # My asynchronous implementation exists, I will add it later.
                # Experimenting with different ways to this in PyTorch.
                self.d_a[m], self.Q_a[m] = torch.symeig(
                    self.m_aa[m], eigenvectors=True)
                self.d_g[m], self.Q_g[m] = torch.symeig(
                    self.m_gg[m], eigenvectors=True)

                self.d_a[m].mul_((self.d_a[m] > 1e-6).float())
                self.d_g[m].mul_((self.d_g[m] > 1e-6).float())

            if classname == 'Conv2d':
                p_grad_mat = p.grad.data.view(p.grad.data.size(0), -1)
            else:
                p_grad_mat = p.grad.data

            v1 = self.Q_g[m].t() @ p_grad_mat @ self.Q_a[m]
            v2 = v1 / (
                self.d_g[m].unsqueeze(1) * self.d_a[m].unsqueeze(0) + la)
            v = self.Q_g[m] @ v2 @ self.Q_a[m].t()

            v = v.view(p.grad.data.size())
            updates[p] = v

        vg_sum = 0
        for p in self.model.parameters():
            v = updates[p]
            vg_sum += (v * p.grad.data * self.lr * self.lr).sum()

        nu = min(1, math.sqrt(self.kl_clip / vg_sum))

        for p in self.model.parameters():
            v = updates[p]
            p.grad.data.copy_(v)
            p.grad.data.mul_(nu)

        self.optim.step()
        self.steps += 1 
Example #22
Source File: kfac.py    From pytorch-pommerman-rl with MIT License 4 votes vote down vote up
def step(self):
        # Add weight decay
        if self.weight_decay > 0:
            for p in self.model.parameters():
                p.grad.data.add_(self.weight_decay, p.data)

        updates = {}
        for i, m in enumerate(self.modules):
            assert len(list(m.parameters())
                       ) == 1, "Can handle only one parameter at the moment"
            classname = m.__class__.__name__
            p = next(m.parameters())

            la = self.damping + self.weight_decay

            if self.steps % self.Tf == 0:
                # My asynchronous implementation exists, I will add it later.
                # Experimenting with different ways to this in PyTorch.
                self.d_a[m], self.Q_a[m] = torch.symeig(
                    self.m_aa[m], eigenvectors=True)
                self.d_g[m], self.Q_g[m] = torch.symeig(
                    self.m_gg[m], eigenvectors=True)

                self.d_a[m].mul_((self.d_a[m] > 1e-6).float())
                self.d_g[m].mul_((self.d_g[m] > 1e-6).float())

            if classname == 'Conv2d':
                p_grad_mat = p.grad.data.view(p.grad.data.size(0), -1)
            else:
                p_grad_mat = p.grad.data

            v1 = self.Q_g[m].t() @ p_grad_mat @ self.Q_a[m]
            v2 = v1 / (
                self.d_g[m].unsqueeze(1) * self.d_a[m].unsqueeze(0) + la)
            v = self.Q_g[m] @ v2 @ self.Q_a[m].t()

            v = v.view(p.grad.data.size())
            updates[p] = v

        vg_sum = 0
        for p in self.model.parameters():
            v = updates[p]
            vg_sum += (v * p.grad.data * self.lr * self.lr).sum()

        nu = min(1, math.sqrt(self.kl_clip / vg_sum))

        for p in self.model.parameters():
            v = updates[p]
            p.grad.data.copy_(v)
            p.grad.data.mul_(nu)

        self.optim.step()
        self.steps += 1