Python torch.distributions() Examples

The following are 30 code examples of torch.distributions(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch , or try the search function .
Example #1
Source File: decoder_helpers.py    From texar-pytorch with Apache License 2.0 6 votes vote down vote up
def sample(self, time: int, outputs: torch.Tensor) -> torch.Tensor:
        r"""Returns ``sample_id`` of shape ``[batch_size, vocab_size]``. If
        :attr:`straight_through` is `False`, this contains the Gumbel softmax
        distributions over vocabulary with temperature :attr:`tau`. If
        :attr:`straight_through` is `True`, this contains one-hot vectors of
        the greedy samples.
        """
        gumbel_samples = self._gumbel.sample(outputs.size()).to(
            device=outputs.device, dtype=outputs.dtype)
        sample_ids = torch.softmax(
            (outputs + gumbel_samples) / self._tau, dim=-1)
        if self._straight_through:
            argmax_ids = torch.argmax(sample_ids, dim=-1).unsqueeze(1)
            sample_ids_hard = torch.zeros_like(sample_ids).scatter_(
                dim=-1, index=argmax_ids, value=1.0)  # one-hot vectors
            sample_ids = (sample_ids_hard - sample_ids).detach() + sample_ids
        return sample_ids 
Example #2
Source File: tanh_normal.py    From garage with MIT License 6 votes vote down vote up
def rsample_with_pre_tanh_value(self, sample_shape=torch.Size()):
        """Return a sample, sampled from this TanhNormal distribution.

        Returns the sampled value before the tanh transform is applied and the
        sampled value with the tanh transform applied to it.

        Args:
            sample_shape (list): shape of the return.

        Note:
            Gradients pass through this operation.

        Returns:
            torch.Tensor: Samples from this distribution.
            torch.Tensor: Samples from the underlying
                :obj:`torch.distributions.Normal` distribution, prior to being
                transformed with `tanh`.

        """
        z = self._normal.rsample(sample_shape)
        return z, torch.tanh(z) 
Example #3
Source File: VAEAC.py    From vaeac with MIT License 6 votes vote down vote up
def generate_reconstructions_params(self, batch, mask, K=1):
        """
        Generate parameters of generative distributions for reconstructions
        from the given batch.
        It makes K latent representation for each object from the batch
        and generate samples from them.
        The second axis is used to index samples for an object, i. e.
        if the batch shape is [n x D1 x D2], then the result shape is
        [n x K x D1 x D2].
        It is better to use it inside torch.no_grad in order to save memory.
        With torch.no_grad the method doesn't require extra memory
        except the memory for the result.
        """
        _, prior = self.make_latent_distributions(batch, mask)
        reconstructions_params = []
        for i in range(K):
            latent = prior.rsample()
            rec_params = self.generative_network(latent)
            reconstructions_params.append(rec_params.unsqueeze(1))
        return torch.cat(reconstructions_params, 1) 
Example #4
Source File: prob_utils.py    From vaeac with MIT License 6 votes vote down vote up
def normal_parse_params(params, min_sigma=0):
    """
    Take a Tensor (e. g. neural network output) and return
    torch.distributions.Normal distribution.
    This Normal distribution is component-wise independent,
    and its dimensionality depends on the input shape.
    First half of channels is mean of the distribution,
    the softplus of the second half is std (sigma), so there is
    no restrictions on the input tensor.

    min_sigma is the minimal value of sigma. I. e. if the above
    softplus is less than min_sigma, then sigma is clipped
    from below with value min_sigma. This regularization
    is required for the numerical stability and may be considered
    as a neural network architecture choice without any change
    to the probabilistic model.
    """
    n = params.shape[0]
    d = params.shape[1]
    mu = params[:, :d // 2]
    sigma_params = params[:, d // 2:]
    sigma = softplus(sigma_params)
    sigma = sigma.clamp(min=min_sigma)
    distr = Normal(mu, sigma)
    return distr 
Example #5
Source File: distributions.py    From probflow with MIT License 6 votes vote down vote up
def __call__(self):
        """Get the distribution object from the backend"""
        if get_backend() == 'pytorch':
            import torch.distributions as tod
            raise NotImplementedError
        else:
            import tensorflow as tf
            from tensorflow_probability import distributions as tfd

            # Convert to tensorflow distributions if probflow distributions
            if isinstance(self.distributions, BaseDistribution):
                self.distributions = self.distributions()

            # Broadcast probs/logits
            shape = self.distributions.batch_shape
            args = {'logits': None, 'probs': None}
            if self.logits is not None:
                args['logits'] = tf.broadcast_to(self['logits'], shape)
            else:
                args['probs'] = tf.broadcast_to(self['probs'], shape)

            # Return TFP distribution object
            return tfd.MixtureSameFamily(
                    tfd.Categorical(**args),
                    self.distributions) 
Example #6
Source File: actor.py    From ReAgent with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _log_prob(self, r, scale_log):
        """
        Compute log probability from normal distribution the same way as
        torch.distributions.normal.Normal, which is:

        ```
        -((value - loc) ** 2) / (2 * var) - log_scale - math.log(math.sqrt(2 * math.pi))
        ```

        In the context of this class, `value = loc + r * scale`. Therefore, this
        function only takes `r` & `scale`; it can be reduced to below.

        The primary reason we don't use Normal class is that it currently
        cannot be exported through ONNX.
        """
        return -(r ** 2) / 2 - scale_log - self.const 
Example #7
Source File: autozivae.py    From scVI with MIT License 6 votes vote down vote up
def get_alphas_betas(
        self, as_numpy: bool = True
    ) -> Dict[str, Union[torch.Tensor, np.ndarray]]:

        # Return parameters of Bernoulli Beta distributions in a dictionary

        outputs = {}
        outputs["alpha_posterior"] = torch.sigmoid(self.alpha_posterior_logit)
        outputs["beta_posterior"] = torch.sigmoid(self.beta_posterior_logit)
        outputs["alpha_prior"] = torch.sigmoid(self.alpha_prior_logit)
        outputs["beta_prior"] = torch.sigmoid(self.beta_prior_logit)

        if as_numpy:
            for key, value in outputs.items():
                outputs[key] = (
                    value.detach().cpu().numpy()
                    if value.requires_grad
                    else value.cpu().numpy()
                )

        return outputs 
Example #8
Source File: pl.py    From neuralsort with MIT License 6 votes vote down vote up
def rsample(self, sample_shape, log_score=True):
        """
        sample_shape: number of samples from the PL distribution. Scalar.
        """
        with torch.enable_grad():  # torch.distributions turns off autograd
            n_samples = sample_shape[0]

            def sample_gumbel(samples_shape, eps=1e-20):
                U = torch.zeros(samples_shape, device='cuda').uniform_()
                return -torch.log(-torch.log(U + eps) + eps)
            if not log_score:
                log_s_perturb = torch.log(self.scores.unsqueeze(
                    0)) + sample_gumbel([n_samples, 1, self.n, 1])
            else:
                log_s_perturb = self.scores.unsqueeze(
                    0) + sample_gumbel([n_samples, 1, self.n, 1])
            log_s_perturb = log_s_perturb.view(-1, self.n, 1)
            P_hat = self.relaxed_sort(log_s_perturb)
            P_hat = P_hat.view(n_samples, -1, self.n, self.n)

            return P_hat.squeeze() 
Example #9
Source File: VAEAC.py    From vaeac with MIT License 6 votes vote down vote up
def generate_samples_params(self, batch, mask, K=1):
        """
        Generate parameters of generative distributions for samples
        from the given batch.
        It makes K latent representation for each object from the batch
        and generate samples from them.
        The second axis is used to index samples for an object, i. e.
        if the batch shape is [n x D1 x D2], then the result shape is
        [n x K x D1 x D2].
        It is better to use it inside torch.no_grad in order to save memory.
        With torch.no_grad the method doesn't require extra memory
        except the memory for the result.
        """
        _, prior = self.make_latent_distributions(batch, mask)
        samples_params = []
        for i in range(K):
            latent = prior.rsample()
            sample_params = self.generative_network(latent)
            samples_params.append(sample_params.unsqueeze(1))
        return torch.cat(samples_params, 1) 
Example #10
Source File: module.py    From pyfilter with MIT License 6 votes vote down vote up
def apply(self, f: Callable[[torch.Tensor], torch.Tensor]):
        """
        Applies function f to all tensors.
        :param f: The callable
        :return: Self
        """

        for t in (t_ for t_ in self.tensors() if t_._base is None):
            t.data = f(t.data)

            if t._grad is not None:
                t._grad.data = f(t._grad.data)

        for t in (t_ for t_ in self.tensors() if t_._base is not None):
            # TODO: Not too sure about this one, happens for some distributions
            if t._base.dim() > 0:
                t.data = t._base.data.view(t.data.shape)
            else:
                t.data = f(t.data)

        return self 
Example #11
Source File: module.py    From pyfilter with MIT License 6 votes vote down vote up
def tensors(self) -> Tuple[torch.Tensor, ...]:
        """
        Finds and returns all instances of type module.
        """

        res = tuple()

        # ===== Find all tensor types ====== #
        res += tuple(self._find_obj_helper(torch.Tensor).values())

        # ===== Tensor containers ===== #
        for tc in self._find_obj_helper(TensorContainerBase).values():
            res += tc.tensors
            for t in (t_ for t_ in tc.tensors if isinstance(t_, Parameter) and t_.trainable):
                res += _iterate_distribution(t.distr)

        # ===== Pytorch distributions ===== #
        for d in self._find_obj_helper(Distribution).values():
            res += _iterate_distribution(d)

        # ===== Modules ===== #
        for mod in self.modules().values():
            res += mod.tensors()

        return res 
Example #12
Source File: module.py    From pyfilter with MIT License 6 votes vote down vote up
def _iterate_distribution(d: Distribution) -> Tuple[Distribution, ...]:
    """
    Helper method for iterating over distributions.
    :param d: The distribution
    """

    res = tuple()
    if not isinstance(d, TransformedDistribution):
        res += tuple(_find_types(d, torch.Tensor).values())

        for sd in _find_types(d, Distribution).values():
            res += _iterate_distribution(sd)

    else:
        res += _iterate_distribution(d.base_dist)

        for t in d.transforms:
            res += tuple(_find_types(t, torch.Tensor).values())

    return res 
Example #13
Source File: models.py    From RecNN with Apache License 2.0 6 votes vote down vote up
def __init__(self, state_dim, action_dim, latent_dim):
        super(bcqGenerator, self).__init__()
        # encoder
        self.e1 = nn.Linear(state_dim + action_dim, 750)
        self.e2 = nn.Linear(750, 750)

        self.mean = nn.Linear(750, latent_dim)
        self.log_std = nn.Linear(750, latent_dim)
        
        # decoder
        self.d1 = nn.Linear(state_dim + latent_dim, 750)
        self.d2 = nn.Linear(750, 750)
        self.d3 = nn.Linear(750, action_dim)
        
        self.latent_dim = latent_dim
        self.normal = torch.distributions.Normal(0, 1) 
Example #14
Source File: modules.py    From recurrent-visual-attention with MIT License 6 votes vote down vote up
def forward(self, h_t):
        # compute mean
        feat = F.relu(self.fc(h_t.detach()))
        mu = torch.tanh(self.fc_lt(feat))

        # reparametrization trick
        l_t = torch.distributions.Normal(mu, self.std).rsample()
        l_t = l_t.detach()
        log_pi = Normal(mu, self.std).log_prob(l_t)

        # we assume both dimensions are independent
        # 1. pdf of the joint is the product of the pdfs
        # 2. log of the product is the sum of the logs
        log_pi = torch.sum(log_pi, dim=1)

        # bound between [-1, 1]
        l_t = torch.clamp(l_t, -1, 1)

        return log_pi, l_t 
Example #15
Source File: test_multivariate_normal.py    From gpytorch with MIT License 6 votes vote down vote up
def test_kl_divergence(self, cuda=False):
        device = torch.device("cuda") if cuda else torch.device("cpu")
        for dtype in (torch.float, torch.double):
            mean0 = torch.randn(4, device=device, dtype=dtype)
            mean1 = mean0 + 1
            var0 = torch.randn(4, device=device, dtype=dtype).abs_()
            var1 = var0 * math.exp(2)

            dist_a = MultivariateNormal(mean0, DiagLazyTensor(var0))
            dist_b = MultivariateNormal(mean1, DiagLazyTensor(var0))
            dist_c = MultivariateNormal(mean0, DiagLazyTensor(var1))

            res = torch.distributions.kl.kl_divergence(dist_a, dist_a)
            actual = 0.0
            self.assertLess((res - actual).abs().item(), 1e-2)

            res = torch.distributions.kl.kl_divergence(dist_b, dist_a)
            actual = var0.reciprocal().sum().div(2.0)
            self.assertLess((res - actual).div(res).abs().item(), 1e-2)

            res = torch.distributions.kl.kl_divergence(dist_a, dist_c)
            actual = 0.5 * (8 - 4 + 4 * math.exp(-2))
            self.assertLess((res - actual).div(res).abs().item(), 1e-2) 
Example #16
Source File: continuous.py    From rising with MIT License 5 votes vote down vote up
def __init__(self, mu: Union[float, torch.Tensor],
                 sigma: Union[float, torch.Tensor]):
        """
        Args:
            mu : the distributions mean
            sigma : the distributions standard deviation
        """
        super().__init__(torch.distributions.Normal(loc=mu, scale=sigma)) 
Example #17
Source File: continuous.py    From rising with MIT License 5 votes vote down vote up
def __init__(self, low: Union[float, torch.Tensor],
                 high: Union[float, torch.Tensor]):
        """
        Args:
            low : the lower range (inclusive)
            high : the higher range (exclusive)
        """
        super().__init__(torch.distributions.Uniform(low=low, high=high)) 
Example #18
Source File: decoder_helpers.py    From texar-pytorch with Apache License 2.0 5 votes vote down vote up
def sample(self, time: int, outputs: torch.Tensor) -> torch.Tensor:
        r"""Returns ``sample_id`` which is softmax distributions over vocabulary
        with temperature :attr:`tau`. Shape = ``[batch_size, vocab_size]``.
        """
        del time
        sample_ids = torch.softmax(outputs / self._tau, dim=-1)
        return sample_ids 
Example #19
Source File: distributions.py    From probflow with MIT License 5 votes vote down vote up
def __call__(self):
        """Get the distribution object from the backend"""
        if get_backend() == 'pytorch':
            TorchDeterministic = get_TorchDeterministic()
            return TorchDeterministic(self['loc'])
        else:
            from tensorflow_probability import distributions as tfd
            return tfd.Deterministic(self['loc']) 
Example #20
Source File: distributions.py    From probflow with MIT License 5 votes vote down vote up
def __call__(self):
        """Get the distribution object from the backend"""
        if get_backend() == 'pytorch':
            import torch.distributions as tod
            raise NotImplementedError
        else:
            from tensorflow_probability import distributions as tfd
            return tfd.HiddenMarkovModel(
                initial_distribution=tfd.Categorical(self['initial']),
                transition_distribution=tfd.Categorical(self['transition']),
                observation_distribution=self['observation'],
                num_steps=self['steps']) 
Example #21
Source File: distributions.py    From probflow with MIT License 5 votes vote down vote up
def __call__(self):
        """Get the distribution object from the backend"""
        if get_backend() == 'pytorch':
            import torch.distributions as tod
            return tod.cauchy.Cauchy(self['loc'], self['scale'])
        else:
            from tensorflow_probability import distributions as tfd
            return tfd.Cauchy(self['loc'], self['scale']) 
Example #22
Source File: distributions.py    From WaveRNN-Pytorch with MIT License 5 votes vote down vote up
def sample_from_discretized_mix_logistic(y, log_scale_min=hp.log_scale_min):
    """
    Sample from discretized mixture of logistic distributions

    Args:
        y (Tensor): B x C x T
        log_scale_min (float): Log scale minimum value

    Returns:
        Tensor: sample in range of [-1, 1].
    """
    assert y.size(1) % 3 == 0
    nr_mix = y.size(1) // 3

    # B x T x C
    y = y.transpose(1, 2)
    logit_probs = y[:, :, :nr_mix]

    # sample mixture indicator from softmax
    temp = logit_probs.data.new(logit_probs.size()).uniform_(1e-5, 1.0 - 1e-5)
    temp = logit_probs.data - torch.log(- torch.log(temp))
    _, argmax = temp.max(dim=-1)

    # (B, T) -> (B, T, nr_mix)
    one_hot = to_one_hot(argmax, nr_mix)
    # select logistic parameters
    means = torch.sum(y[:, :, nr_mix:2 * nr_mix] * one_hot, dim=-1)
    log_scales = torch.clamp(torch.sum(
        y[:, :, 2 * nr_mix:3 * nr_mix] * one_hot, dim=-1), min=log_scale_min)
    # sample from logistic & clip to interval
    # we don't actually round to the nearest 8bit value when sampling
    u = means.data.new(means.size()).uniform_(1e-5, 1.0 - 1e-5)
    x = means + torch.exp(log_scales) * (torch.log(u) - torch.log(1. - u))

    x = torch.clamp(torch.clamp(x, min=-1.), max=1.)

    return x


# add gaussian from clarinet implementation:https://raw.githubusercontent.com/ksw0306/ClariNet/master/loss.py 
Example #23
Source File: action_distributions.py    From sample-factory with MIT License 5 votes vote down vote up
def kl_divergence(self, other):
        kl = torch.distributions.kl.kl_divergence(self, other)
        return kl 
Example #24
Source File: action_distributions.py    From sample-factory with MIT License 5 votes vote down vote up
def dbg_print(self):
        for d in self.distributions:
            d.dbg_print()


# noinspection PyAbstractClass 
Example #25
Source File: action_distributions.py    From sample-factory with MIT License 5 votes vote down vote up
def kl_divergence(self, other):
        kls = [
            d.kl_divergence(other_d).unsqueeze(dim=1)
            for d, other_d
            in zip(self.distributions, other.distributions)
        ]

        kls = torch.cat(kls, dim=1)
        kl = kls.sum(dim=1)
        return kl 
Example #26
Source File: action_distributions.py    From sample-factory with MIT License 5 votes vote down vote up
def log_prob(self, actions):
        # split into batches of actions from individual distributions
        list_of_action_batches = torch.chunk(actions, len(self.distributions), dim=1)
        list_of_action_batches = [a.squeeze(dim=1) for a in list_of_action_batches]

        log_probs = self._calc_log_probs(list_of_action_batches)
        return log_probs 
Example #27
Source File: action_distributions.py    From sample-factory with MIT License 5 votes vote down vote up
def sample(self):
        list_of_action_batches = [d.sample() for d in self.distributions]
        return self._flatten_actions(list_of_action_batches) 
Example #28
Source File: action_distributions.py    From sample-factory with MIT License 5 votes vote down vote up
def sample_actions_log_probs(self):
        list_of_action_batches = [d.sample() for d in self.distributions]
        batch_of_action_tuples = self._flatten_actions(list_of_action_batches)
        log_probs = self._calc_log_probs(list_of_action_batches)
        return batch_of_action_tuples, log_probs 
Example #29
Source File: action_distributions.py    From sample-factory with MIT License 5 votes vote down vote up
def _calc_log_probs(self, list_of_action_batches):
        # calculate batched log probs for every distribution
        log_probs = [d.log_prob(a) for d, a in zip(self.distributions, list_of_action_batches)]
        log_probs = [lp.unsqueeze(dim=1) for lp in log_probs]

        # concatenate and calculate sum of individual log-probs
        # this is valid under the assumption that action distributions are independent
        log_probs = torch.cat(log_probs, dim=1)
        log_probs = log_probs.sum(dim=1)

        return log_probs 
Example #30
Source File: action_distributions.py    From sample-factory with MIT License 5 votes vote down vote up
def __init__(self, action_space, logits_flat):
        self.logit_lengths = [calc_num_logits(s) for s in action_space.spaces]
        self.split_logits = torch.split(logits_flat, self.logit_lengths, dim=1)
        assert len(self.split_logits) == len(action_space.spaces)

        self.distributions = []
        for i, space in enumerate(action_space.spaces):
            self.distributions.append(get_action_distribution(space, self.split_logits[i]))