python source code of diagonal

import numpy as np

import torch

from pytorchrl.distributions.base import Distribution
from pytorchrl.misc.tensor_utils import constant


class DiagonalGaussian(Distribution):
    """
    Instead of a distribution, rather a collection of distribution.
    """
    def __init__(self, means, log_stds):
        """
        Parameters
        ----------
        means (Variable):
        log_stds (Variable):
        """
        self.means = means
        self.log_stds = log_stds

        # dim is the dimension of action space
        self.dim = self.means.size()[-1]

    @classmethod
    def from_dict(cls, means, log_stds):
        """
        Parameters
        ----------
        means (Variable):
        log_std (Variable):
        """
        return cls(means=means, log_stds=log_stds)

    def entropy(self):
        """
        Entropy of gaussian distribution is given by
        1/2 * log(2 * \pi * e * sigma^2)
        = log(sqrt(2 * \pi * e) * sigma))
        = log(sigma) + log(sqrt(2 * \pi * e))
        """
        return np.sum(self.log_stds.data.numpy() + np.log(np.sqrt(2 * np.pi * np.e)), axis=-1)

    def log_likelihood(self, a):
        """
        Compute log likelihood of a.

        Parameters
        ----------
        a (Variable):

        Returns
        -------
        logli (Variable)
        """
        # First cast into float tensor
        a = a.type(torch.FloatTensor)

        # Convert into a sample of standard normal
        zs = (a - self.means) / (self.log_stds.exp())

        # TODO (ewei), I feel this equation is not correct.
        # Mainly the first line
        # TODO (ewei), still need to understand what is meaning of having
        # -1 for axis in sum method, (same for numpy)
        logli = - self.log_stds.sum(-1) - \
            constant(0.5) * zs.pow(2).sum(-1) - \
            constant(0.5) * constant(float(self.dim)) * constant(float(np.log(2 * np.pi)))

        return logli

    def kl_div(self, other):
        """
        Given the distribution parameters of two diagonal multivariate Gaussians,
        compute their KL divergence (vectorized)

        https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence#Kullback.E2.80.93Leibler_divergence_for_multivariate_normal_distributions

        In general, for two n-dimensional distributions, we have

        D_KL(N1||N2) =
        1/2 ( tr(Σ_2^{-1}Σ_1) + (μ_2 - μ_1)^T Σ_2^{-1} (μ_2 - μ_1) - n + ln(det(Σ_2) / det(Σ_1)) )

        Here, Σ_1 and Σ_2 are diagonal. Hence this equation can be simplified.
        In terms of the parameters of this method,

        determinant of diagonal matrix is product of diagonal, thus
        - ln(det(Σ_2) / det(Σ_1)) = sum(2 * (log_stds_2 - log_stds_1), axis=-1)

        inverse of diagonal matrix is the diagonal matrix of elements at diagonal inverted, thus
        - (μ_2 - μ_1)^T Σ_2^{-1} (μ_2 - μ_1) = sum((means_1 - means_2)^2 / vars_2, axis=-1)

        trace is sum of the diagonal elements
        - tr(Σ_2^{-1}Σ_1) = sum(vars_1 / vars_2, axis=-1)

        Where

        - vars_1 = exp(2 * log_stds_1)

        - vars_2 = exp(2 * log_stds_2)

        Combined together, we have

        D_KL(N1||N2)
        = 1/2 ( tr(Σ_2^{-1}Σ_1) + (μ_2 - μ_1)^T Σ_2^{-1} (μ_2 - μ_1) - n + ln(det(Σ_2) / det(Σ_1)) )
        = sum(1/2 * ((vars_1 - vars_2) / vars_2 + (means_1 - means_2)^2 / vars_2 + 2 * (log_stds_2 - log_stds_1)), axis=-1)
        = sum( ((means_1 - means_2)^2 + vars_1 - vars_2) / (2 * vars_2) + (log_stds_2 - log_stds_1)), axis=-1)

        Parameters
        ----------
        other (DiagonalGaussian):

        Returns
        -------
        kl_div (Variable):
        """
        # Constant should wrap in Variable to multiply with another Variable
        # TODO (ewei) kl seems have problem
        variance = (constant(2.0) * self.log_stds).exp()
        other_variance = (constant(2.0) * other.log_stds).exp()
        numerator = (self.means - other.means).pow(2) + \
            variance - other_variance
        denominator = constant(2.0) * other_variance + constant(1e-8)
        # TODO (ewei), -1 for sum has a big impact, need to figure out why
        kl_div = (numerator / denominator + other.log_stds - self.log_stds).sum(-1)

        return kl_div