Python torch.nn.functional.softplus() Examples

The following are 30 code examples of torch.nn.functional.softplus(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.nn.functional , or try the search function .
Example #1
Source File: loss.py    From torch-toolbox with BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def forward(self, x, target):
        similarity_matrix = x @ x.T  # need gard here
        label_matrix = target.unsqueeze(1) == target.unsqueeze(0)
        negative_matrix = label_matrix.logical_not()
        positive_matrix = label_matrix.fill_diagonal_(False)

        sp = torch.where(positive_matrix, similarity_matrix,
                         torch.zeros_like(similarity_matrix))
        sn = torch.where(negative_matrix, similarity_matrix,
                         torch.zeros_like(similarity_matrix))

        ap = torch.clamp_min(1 + self.m - sp.detach(), min=0.)
        an = torch.clamp_min(sn.detach() + self.m, min=0.)

        logit_p = -self.gamma * ap * (sp - self.dp)
        logit_n = self.gamma * an * (sn - self.dn)

        logit_p = torch.where(positive_matrix, logit_p,
                              torch.zeros_like(logit_p))
        logit_n = torch.where(negative_matrix, logit_n,
                              torch.zeros_like(logit_n))

        loss = F.softplus(torch.logsumexp(logit_p, dim=1) +
                          torch.logsumexp(logit_n, dim=1)).mean()
        return loss 
Example #2
Source File: DDPAE.py    From DDPAE-video-prediction with MIT License 6 votes vote down vote up
def sample_content(self, content, sample):
    '''
    Pass into content_lstm to get a final content.
    '''
    content = content.view(-1, self.n_frames_input, self.total_components, self.content_latent_size)
    contents = []
    for i in range(self.total_components):
      z = content[:, :, i, :]
      z = self.content_lstm(z).unsqueeze(1) # batch_size x 1 x (content_latent_size * 2)
      contents.append(z)
    content = torch.cat(contents, dim=1).view(-1, self.content_latent_size * 2)

    # Get mu and sigma, and sample.
    content_mu = content[:, :self.content_latent_size]
    content_sigma = F.softplus(content[:, self.content_latent_size:])
    content = self.pyro_sample('content', dist.Normal, content_mu, content_sigma, sample)
    return content 
Example #3
Source File: grid_attention_layer.py    From Attention-Gated-Networks with MIT License 6 votes vote down vote up
def _concatenation_debug(self, x, g):
        input_size = x.size()
        batch_size = input_size[0]
        assert batch_size == g.size(0)

        # theta => (b, c, t, h, w) -> (b, i_c, t, h, w) -> (b, i_c, thw)
        # phi   => (b, g_d) -> (b, i_c)
        theta_x = self.theta(x)
        theta_x_size = theta_x.size()

        # g (b, c, t', h', w') -> phi_g (b, i_c, t', h', w')
        #  Relu(theta_x + phi_g + bias) -> f = (b, i_c, thw) -> (b, i_c, t/s1, h/s2, w/s3)
        phi_g = F.upsample(self.phi(g), size=theta_x_size[2:], mode=self.upsample_mode)
        f = F.softplus(theta_x + phi_g)

        #  psi^T * f -> (b, psi_i_c, t/s1, h/s2, w/s3)
        sigm_psi_f = F.sigmoid(self.psi(f))

        # upsample the attentions and multiply
        sigm_psi_f = F.upsample(sigm_psi_f, size=input_size[2:], mode=self.upsample_mode)
        y = sigm_psi_f.expand_as(x) * x
        W_y = self.W(y)

        return W_y, sigm_psi_f 
Example #4
Source File: uncertainty_estimation.py    From PyTorch-BayesianCNN with MIT License 6 votes vote down vote up
def get_uncertainty_per_image(model, input_image, T=15, normalized=False):
    input_image = input_image.unsqueeze(0)
    input_images = input_image.repeat(T, 1, 1, 1)

    net_out, _ = model(input_images)
    pred = torch.mean(net_out, dim=0).cpu().detach().numpy()
    if normalized:
        prediction = F.softplus(net_out)
        p_hat = prediction / torch.sum(prediction, dim=1).unsqueeze(1)
    else:
        p_hat = F.softmax(net_out, dim=1)
    p_hat = p_hat.detach().cpu().numpy()
    p_bar = np.mean(p_hat, axis=0)

    temp = p_hat - np.expand_dims(p_bar, 0)
    epistemic = np.dot(temp.T, temp) / T
    epistemic = np.diag(epistemic)

    aleatoric = np.diag(p_bar) - (np.dot(p_hat.T, p_hat) / T)
    aleatoric = np.diag(aleatoric)

    return pred, epistemic, aleatoric 
Example #5
Source File: nconv.py    From nconv with GNU General Public License v3.0 6 votes vote down vote up
def _pos(self, p):
        pos_fn = self.pos_fn.lower()
        if pos_fn == 'softmax':
            p_sz = p.size()
            p = p.view(p_sz[0],p_sz[1], -1)
            p = F.softmax(p, -1)
            return p.view(p_sz)
        elif pos_fn == 'exp':
            return torch.exp(p)
        elif pos_fn == 'softplus':
            return F.softplus(p, beta=10)
        elif pos_fn == 'sigmoid':
            return F.sigmoid(p)
        else:
            print('Undefined positive function!')
            return 
Example #6
Source File: flows.py    From torchkit with MIT License 6 votes vote down vote up
def forward(self, inputs):
        if len(inputs) == 2:
            input, logdet = inputs
        elif len(inputs) == 3:
            input, logdet, context = inputs
        else:
            raise(Exception('inputs length not correct'))
        
        output = F.sigmoid(input)
        logdet += sum_from_one(- F.softplus(input) - F.softplus(-input))
        
        
        if len(inputs) == 2:
            return output, logdet
        elif len(inputs) == 3:
            return output, logdet, context
        else:
            raise(Exception('inputs length not correct')) 
Example #7
Source File: flows.py    From torchkit with MIT License 6 votes vote down vote up
def forward(self, inputs):
        x, logdet, context = inputs
        out, _ = self.mdl((x, context))
        if isinstance(self.mdl, iaf_modules.cMADE):
            mean = out[:,:,0]
            lstd = out[:,:,1]
            
        std = self.realify(lstd)
        
        if self.realify == nn_.softplus:
            x_ = mean + std * x
        elif self.realify == nn_.sigmoid:
            x_ = (-std+1.0) * mean + std * x
        elif self.realify == nn_.sigmoid2:
            x_ = (-std+2.0) * mean + std * x
        logdet_ = sum_from_one(torch.log(std)) + logdet
        return x_, logdet_, context 
Example #8
Source File: flows.py    From torchkit with MIT License 6 votes vote down vote up
def reset_parameters(self):
        if isinstance(self.mean, nn_.ResLinear):
            self.mean.dot_01.scale.data.uniform_(-0.001, 0.001)
            self.mean.dot_h1.scale.data.uniform_(-0.001, 0.001)
            self.mean.dot_01.bias.data.uniform_(-0.001, 0.001)
            self.mean.dot_h1.bias.data.uniform_(-0.001, 0.001)
            self.lstd.dot_01.scale.data.uniform_(-0.001, 0.001)
            self.lstd.dot_h1.scale.data.uniform_(-0.001, 0.001)
            if self.realify == nn_.softplus:
                inv = np.log(np.exp(1-nn_.delta)-1) * 0.5
                self.lstd.dot_01.bias.data.uniform_(inv-0.001, inv+0.001)
                self.lstd.dot_h1.bias.data.uniform_(inv-0.001, inv+0.001)
            else:
                self.lstd.dot_01.bias.data.uniform_(-0.001, 0.001)
                self.lstd.dot_h1.bias.data.uniform_(-0.001, 0.001)
        elif isinstance(self.mean, nn.Linear):
            self.mean.weight.data.uniform_(-0.001, 0.001)
            self.mean.bias.data.uniform_(-0.001, 0.001)
            self.lstd.weight.data.uniform_(-0.001, 0.001)
            if self.realify == nn_.softplus:
                inv = np.log(np.exp(1-nn_.delta)-1) * 0.5
                self.lstd.bias.data.uniform_(inv-0.001, inv+0.001)
            else:
                self.lstd.bias.data.uniform_(-0.001, 0.001) 
Example #9
Source File: flows.py    From torchkit with MIT License 6 votes vote down vote up
def __init__(self, dim, context_dim, 
                 oper=nn_.ResLinear, realify=nn_.softplus):
        super(LinearFlow, self).__init__()
        self.realify = realify
        
        self.dim = dim
        self.context_dim = context_dim

        
        if type(dim) is int:
            dim_ = dim
        else:
            dim_ = np.prod(dim)
        
        self.mean = oper(context_dim, dim_)
        self.lstd = oper(context_dim, dim_)
        
        self.reset_parameters() 
Example #10
Source File: test_nn_activations.py    From numpy-ml with GNU General Public License v3.0 6 votes vote down vote up
def test_softplus_grad(N=50):
    from numpy_ml.neural_nets.activations import SoftPlus

    N = np.inf if N is None else N

    mine = SoftPlus()
    gold = torch_gradient_generator(F.softplus)

    i = 0
    while i < N:
        n_ex = np.random.randint(1, 100)
        n_dims = np.random.randint(1, 100)
        z = random_tensor((n_ex, n_dims), standardize=True)
        assert_almost_equal(mine.grad(z), gold(z))
        print("PASSED")
        i += 1 
Example #11
Source File: test_nn.py    From numpy-ml with GNU General Public License v3.0 6 votes vote down vote up
def test_softplus_grad(N=15):
    from numpy_ml.neural_nets.activations import SoftPlus

    np.random.seed(12345)

    N = np.inf if N is None else N

    mine = SoftPlus()
    gold = torch_gradient_generator(F.softplus)

    i = 0
    while i < N:
        n_ex = np.random.randint(1, 100)
        n_dims = np.random.randint(1, 100)
        z = random_tensor((n_ex, n_dims), standardize=True)
        assert_almost_equal(mine.grad(z), gold(z))
        print("PASSED")
        i += 1


#######################################################################
#                          Layers                                     #
####################################################################### 
Example #12
Source File: test_nn.py    From numpy-ml with GNU General Public License v3.0 6 votes vote down vote up
def test_softplus_activation(N=15):
    from numpy_ml.neural_nets.activations import SoftPlus

    np.random.seed(12345)

    N = np.inf if N is None else N

    mine = SoftPlus()
    gold = lambda z: F.softplus(torch.FloatTensor(z)).numpy()

    i = 0
    while i < N:
        n_dims = np.random.randint(1, 100)
        z = random_stochastic_matrix(1, n_dims)
        assert_almost_equal(mine.fn(z), gold(z))
        print("PASSED")
        i += 1


#######################################################################
#                      Activation Gradients                           #
####################################################################### 
Example #13
Source File: real_nvp.py    From real-nvp with MIT License 6 votes vote down vote up
def _pre_process(self, x):
        """Dequantize the input image `x` and convert to logits.

        Args:
            x (torch.Tensor): Input image.

        Returns:
            y (torch.Tensor): Dequantized logits of `x`.

        See Also:
            - Dequantization: https://arxiv.org/abs/1511.01844, Section 3.1
            - Modeling logits: https://arxiv.org/abs/1605.08803, Section 4.1
        """
        y = (x * 255. + torch.rand_like(x)) / 256.
        y = (2 * y - 1) * self.data_constraint
        y = (y + 1) / 2
        y = y.log() - (1. - y).log()

        # Save log-determinant of Jacobian of initial transform
        ldj = F.softplus(y) + F.softplus(-y) \
            - F.softplus((1. - self.data_constraint).log() - self.data_constraint.log())
        sldj = ldj.view(ldj.size(0), -1).sum(-1)

        return y, sldj 
Example #14
Source File: prob_utils.py    From vaeac with MIT License 6 votes vote down vote up
def normal_parse_params(params, min_sigma=0):
    """
    Take a Tensor (e. g. neural network output) and return
    torch.distributions.Normal distribution.
    This Normal distribution is component-wise independent,
    and its dimensionality depends on the input shape.
    First half of channels is mean of the distribution,
    the softplus of the second half is std (sigma), so there is
    no restrictions on the input tensor.

    min_sigma is the minimal value of sigma. I. e. if the above
    softplus is less than min_sigma, then sigma is clipped
    from below with value min_sigma. This regularization
    is required for the numerical stability and may be considered
    as a neural network architecture choice without any change
    to the probabilistic model.
    """
    n = params.shape[0]
    d = params.shape[1]
    mu = params[:, :d // 2]
    sigma_params = params[:, d // 2:]
    sigma = softplus(sigma_params)
    sigma = sigma.clamp(min=min_sigma)
    distr = Normal(mu, sigma)
    return distr 
Example #15
Source File: deep-energy-mnist.py    From higher with Apache License 2.0 6 votes vote down vote up
def forward(self, x, y):
        # First extract an embedding z from the visual input x.
        #
        # We use softplus activations so our model has
        # (generally) non-zero second-order derivatives.
        z = F.softplus(self.conv1(x))
        z = F.max_pool2d(z, 2, 2)
        z = F.softplus(self.conv2(z))
        z = F.max_pool2d(z, 2, 2)
        z = z.view(-1, 4*4*50)
        z = F.softplus(self.fc1(z))
        z = self.fc2(z)

        # Next combine that embedding with the proposed label y
        # and pass that through a single hidden-layer to predict
        # the energy function value.
        v = torch.cat((z, y), dim=1)
        v = F.softplus(self.fce1(v))
        E = self.fce2(v).squeeze()
        return E 
Example #16
Source File: vpg.py    From garage with MIT License 6 votes vote down vote up
def _compute_policy_entropy(self, obs):
        r"""Compute entropy value of probability distribution.

        Notes: P is the maximum path length (self.max_path_length)

        Args:
            obs (torch.Tensor): Observation from the environment
                with shape :math:`(N, P, O*)`.

        Returns:
            torch.Tensor: Calculated entropy values given observation
                with shape :math:`(N, P)`.

        """
        if self._stop_entropy_gradient:
            with torch.no_grad():
                policy_entropy = self.policy(obs)[0].entropy()
        else:
            policy_entropy = self.policy(obs)[0].entropy()

        # This prevents entropy from becoming negative for small policy std
        if self._use_softplus_entropy:
            policy_entropy = F.softplus(policy_entropy)

        return policy_entropy 
Example #17
Source File: flows.py    From torchkit with MIT License 5 votes vote down vote up
def __init__(self, num_ds_dim=4):
        super(SigmoidFlow, self).__init__()
        self.num_ds_dim = num_ds_dim
        
        self.act_a = lambda x: nn_.softplus(x)
        self.act_b = lambda x: x
        self.act_w = lambda x: nn_.softmax(x,dim=2) 
Example #18
Source File: loss.py    From RobGAN with MIT License 5 votes vote down vote up
def loss_KL_d(dis_fake, dis_real):
    L1 = torch.mean(F.softplus(-dis_real))
    L2 = torch.mean(F.softplus(dis_fake))
    return L1 + L2 
Example #19
Source File: train_distilled_image.py    From dataset-distillation with MIT License 5 votes vote down vote up
def get_steps(self):
        data_label_iterable = (x for _ in range(self.state.distill_epochs) for x in zip(self.data, self.labels))
        lrs = F.softplus(self.raw_distill_lrs).unbind()

        steps = []
        for (data, label), lr in zip(data_label_iterable, lrs):
            steps.append((data, label, lr))

        return steps 
Example #20
Source File: flows.py    From torchkit with MIT License 5 votes vote down vote up
def reset_parameters(self):
        self.mdl.hidden_to_output.cscale.weight.data.uniform_(-0.001, 0.001)
        self.mdl.hidden_to_output.cscale.bias.data.uniform_(0.0, 0.0)
        self.mdl.hidden_to_output.cbias.weight.data.uniform_(-0.001, 0.001)
        self.mdl.hidden_to_output.cbias.bias.data.uniform_(0.0, 0.0)
        if self.realify == nn_.softplus:
            inv = np.log(np.exp(1-nn_.delta)-1) 
            self.mdl.hidden_to_output.cbias.bias.data[1::2].uniform_(inv,inv)
        elif self.realify == nn_.sigmoid:
            self.mdl.hidden_to_output.cbias.bias.data[1::2].uniform_(2.0,2.0) 
Example #21
Source File: kafnets.py    From kernel-activation-functions with MIT License 5 votes vote down vote up
def softplus_kernel(self, input):
        return F.softplus(input.unsqueeze(self.unsqueeze_dim) - self.dict) 
Example #22
Source File: GAN.py    From MaximumMarginGANs with MIT License 5 votes vote down vote up
def forward(self, x):
			#return F.softplus(x, self.a, 20.)
			return self.a(x) 
Example #23
Source File: pytorch_util.py    From leap with MIT License 5 votes vote down vote up
def softplus(x):
    """
    PyTorch's softplus isn't (easily) serializable.
    """
    return F.softplus(x) 
Example #24
Source File: static_head.py    From pytorch-dnc with MIT License 5 votes vote down vote up
def forward(self, hidden_vb, memory_vb):
        # outputs for computing addressing for heads
        # NOTE: to be consistent w/ the dnc paper, we use
        # NOTE: sigmoid to constrain to [0, 1]
        # NOTE: oneplus to constrain to [1, +inf]
        self.key_vb   = F.tanh(self.hid_2_key(hidden_vb)).view(-1, self.num_heads, self.mem_wid)    # TODO: relu to bias the memory to store positive values ??? check again
        self.beta_vb  = F.softplus(self.hid_2_beta(hidden_vb)).view(-1, self.num_heads, 1)          # beta >=1: https://github.com/deepmind/dnc/issues/9
        self.gate_vb  = F.sigmoid(self.hid_2_gate(hidden_vb)).view(-1, self.num_heads, 1)           # gate /in (0, 1): interpolation gate, blend wl_{t-1} & wc
        self.shift_vb = F.softmax(self.hid_2_shift(hidden_vb).view(-1, self.num_heads, self.num_allowed_shifts).transpose(0, 2)).transpose(0, 2)    # shift: /sum=1
        self.gamma_vb = (1. + F.softplus(self.hid_2_gamma(hidden_vb))).view(-1, self.num_heads, 1)  # gamma >= 1: sharpen the final weights

        # now we compute the addressing mechanism
        self._content_focus(memory_vb)
        self._location_focus() 
Example #25
Source File: context_conditioned_policy.py    From garage with MIT License 5 votes vote down vote up
def infer_posterior(self, context):
        r"""Compute :math:`q(z \| c)` as a function of input context and sample new z.

        Args:
            context (torch.Tensor): Context values, with shape
                :math:`(X, N, C)`. X is the number of tasks. N is batch size. C
                is the combined size of observation, action, reward, and next
                observation if next observation is used in context. Otherwise,
                C is the combined size of observation, action, and reward.

        """
        params = self._context_encoder.forward(context)
        params = params.view(context.size(0), -1,
                             self._context_encoder.output_dim)
        # with probabilistic z, predict mean and variance of q(z | c)
        if self._use_information_bottleneck:
            mu = params[..., :self._latent_dim]
            sigma_squared = F.softplus(params[..., self._latent_dim:])
            z_params = [
                product_of_gaussians(m, s)
                for m, s in zip(torch.unbind(mu), torch.unbind(sigma_squared))
            ]
            self.z_means = torch.stack([p[0] for p in z_params])
            self.z_vars = torch.stack([p[1] for p in z_params])
        else:
            self.z_means = torch.mean(params, dim=1)
        self.sample_from_belief()

    # pylint: disable=arguments-differ 
Example #26
Source File: dynamic_head.py    From pytorch-dnc with MIT License 5 votes vote down vote up
def forward(self, hidden_vb, memory_vb):
        # outputs for computing addressing for heads
        # NOTE: to be consistent w/ the dnc paper, we use
        # NOTE: sigmoid to constrain to [0, 1]
        # NOTE: oneplus to constrain to [1, +inf]
        self.key_vb   = F.tanh(self.hid_2_key(hidden_vb)).view(-1, self.num_heads, self.mem_wid)    # TODO: relu to bias the memory to store positive values ??? check again
        self.beta_vb  = F.softplus(self.hid_2_beta(hidden_vb)).view(-1, self.num_heads, 1)          # beta >=1: https://github.com/deepmind/dnc/issues/9

        # now we compute the addressing mechanism
        self._content_focus(memory_vb) 
Example #27
Source File: activations_autofn.py    From gen-efficientnet-pytorch with Apache License 2.0 5 votes vote down vote up
def backward(ctx, grad_output):
        x = ctx.saved_tensors[0]
        x_sigmoid = torch.sigmoid(x)
        x_tanh_sp = F.softplus(x).tanh()
        return grad_output.mul(x_tanh_sp + x * x_sigmoid * (1 - x_tanh_sp * x_tanh_sp)) 
Example #28
Source File: activations_autofn.py    From gen-efficientnet-pytorch with Apache License 2.0 5 votes vote down vote up
def forward(ctx, x):
        ctx.save_for_backward(x)
        y = x.mul(torch.tanh(F.softplus(x)))  # x * tanh(ln(1 + exp(x)))
        return y 
Example #29
Source File: activations_jit.py    From gen-efficientnet-pytorch with Apache License 2.0 5 votes vote down vote up
def mish_jit_bwd(x, grad_output):
    x_sigmoid = torch.sigmoid(x)
    x_tanh_sp = F.softplus(x).tanh()
    return grad_output.mul(x_tanh_sp + x * x_sigmoid * (1 - x_tanh_sp * x_tanh_sp)) 
Example #30
Source File: loss.py    From RobGAN with MIT License 5 votes vote down vote up
def loss_KL_g(dis_fake):
    return torch.mean(F.softplus(-dis_fake))


# Hinge loss