Python torch.nn.functional.glu() Examples

The following are 30 code examples of torch.nn.functional.glu(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.nn.functional , or try the search function

Example #1

Source File: resnet.py From nsf with MIT License

6 votes

def forward(self, inputs, context=None):
        temps = inputs
        if self.use_batch_norm:
            temps = self.batch_norm_layers[0](temps)
        temps = self.activation(temps)
        temps = self.linear_layers[0](temps)
        if self.use_batch_norm:
            temps = self.batch_norm_layers[1](temps)
        temps = self.activation(temps)
        temps = self.dropout(temps)
        temps = self.linear_layers[1](temps)
        if context is not None:
            temps = F.glu(
                torch.cat(
                    (temps, self.context_layer(context)),
                    dim=1
                ),
                dim=1
            )
        return inputs + temps

Example #2

Source File: resnet.py From nsf with MIT License

6 votes

def forward(self, inputs, context=None):
        temps = inputs
        if self.use_batch_norm:
            temps = self.batch_norm_layers[0](temps)
        temps = self.activation(temps)
        temps = self.conv_layers[0](temps)
        if self.use_batch_norm:
            temps = self.batch_norm_layers[1](temps)
        temps = self.activation(temps)
        temps = self.dropout(temps)
        temps = self.conv_layers[1](temps)
        if context is not None:
            temps = F.glu(
                torch.cat(
                    (temps, self.context_layer(context)),
                    dim=1
                ),
                dim=1
            )
        return inputs + temps

Example #3

Source File: made.py From nsf with MIT License

6 votes

def forward(self, inputs, context=None):
        temps = inputs
        if self.use_batch_norm:
            temps = self.batch_norm_layers[0](temps)
        temps = self.activation(temps)
        temps = self.linear_layers[0](temps)
        if self.use_batch_norm:
            temps = self.batch_norm_layers[1](temps)
        temps = self.activation(temps)
        temps = self.dropout(temps)
        temps = self.linear_layers[1](temps)
        if context is not None:
            temps = F.glu(
                torch.cat((temps, self.context_layer(context)), dim=1),
                dim=1
            )
        return inputs + temps

Example #4

Source File: aggregators.py From attn2d with MIT License

6 votes

def forward(self, x, need_attention_weights=False):
        x = F.glu(self.linear(x), dim=-1) # B, Tt, Ts, C
        if not need_attention_weights:
            # Maxpool 
            x, _ = x.max(dim=2)  # B, Tt, C
            return x, None
        # Output attention weights:
        if need_attention_weights:
            # x in B, Tt, Ts, C
            B, Tt, Ts, C = x.size()
            x, indices = x.max(dim=2)
            # indices in B, Tt, C with each channel selecting a source position
            # Terrible but will do:
            attn = x.new_zeros(B, Tt, Ts)
            for i in range(Ts):
                attn[:,:,i] = indices.eq(i).sum(dim=-1)
            # Normalize
            attn = attn / attn.sum(dim=-1, keepdim=True)
        return x, attn

Example #5

Source File: conformer_convolution.py From neural_sp with Apache License 2.0

6 votes

def forward(self, xs):
        """Forward pass.

        Args:
            xs (FloatTensor): `[B, T, d_model]`
        Returns:
            xs (FloatTensor): `[B, T, d_model]`

        """
        B, T, d_model = xs.size()
        assert d_model == self.d_model

        xs = xs.transpose(2, 1).contiguous()  # `[B, C, T]`
        xs = self.pointwise_conv1(xs)  # `[B, 2 * C, T]`
        xs = xs.transpose(2, 1)  # `[B, T, 2 * C]`
        xs = F.glu(xs)  # `[B, T, C]`
        xs = xs.transpose(2, 1).contiguous()  # `[B, C, T]`
        xs = self.depthwise_conv(xs)  # `[B, C, T]`

        xs = self.batch_norm(xs)
        xs = self.activation(xs)
        xs = self.pointwise_conv2(xs)  # `[B, C, T]`

        xs = xs.transpose(2, 1).contiguous()  # `[B, T, C]`
        return xs

Example #6

Source File: aggregators.py From attn2d with MIT License

6 votes

def one_step(self, x, need_attention_weights=False):
        x = x[:, -1:]  # B, 1, Ts, C
        x = F.glu(self.linear(x), dim=-1) # B, 1, Ts, C
        if not need_attention_weights:
            x, _ = x.max(dim=2)  # B, Tt, C
            return x, None
        # Output attention weights:
        if need_attention_weights:
            B, Tt, Ts, C = x.size()
            x, indices = x.max(dim=2)
            # indices in B, Tt, C with each channel selecting a source position
            # Terrible but will do:
            attn = x.new_zeros(B, Tt, Ts)
            for i in range(Ts):
                attn[:,:,i] = indices.eq(i).sum(dim=-1)
            # Normalize
            attn = attn / attn.sum(dim=-1, keepdim=True)
        return x, attn

Example #7

Source File: aggregators.py From attn2d with MIT License

6 votes

def forward(self, x, need_attention_weights=False):
        x = F.glu(self.linear(x), dim=-1) # B, Tt, Ts, C
        if not need_attention_weights:
            # Maxpool 
            B, Tt, Ts, C = x.size()
            mask = torch.triu(utils.fill_with_neg_inf(x.new(Tt, Ts)), self.waitk)
            x, _ = (
                x + mask.unsqueeze(0).unsqueeze(-1)
            ).max(dim=2)  # B, Tt, C
            return x, None
        # Output attention weights:
        if need_attention_weights:
            # x in B, Tt, Ts, C
            B, Tt, Ts, C = x.size()
            x, indices = x.max(dim=2)
            # indices in B, Tt, C with each channel selecting a source position
            # Terrible but will do:
            attn = x.new_zeros(B, Tt, Ts)
            for i in range(Ts):
                attn[:,:,i] = indices.eq(i).sum(dim=-1)
            # Normalize
            attn = attn / attn.sum(dim=-1, keepdim=True)
        return x, attn

Example #8

Source File: aggregators.py From attn2d with MIT License

6 votes

def forward(self, x, need_attention_weights=False):
        x = F.glu(self.linear(x), dim=-1) # B, Tt, Ts, C
        if not need_attention_weights:
            # Maxpool 
            x, _ = x.max(dim=2)  # B, Tt, C
            return x, None
        # Output attention weights:
        if need_attention_weights:
            # x in B, Tt, Ts, C
            B, Tt, Ts, C = x.size()
            x, indices = x.max(dim=2)
            # indices in B, Tt, C with each channel selecting a source position
            # Terrible but will do:
            attn = x.new_zeros(B, Tt, Ts)
            for i in range(Ts):
                attn[:,:,i] = indices.eq(i).sum(dim=-1)
            # Normalize
            attn = attn / attn.sum(dim=-1, keepdim=True)
        return x, attn

Example #9

Source File: aggregators.py From attn2d with MIT License

5 votes

def forward(self, x, need_attention_weights=False):
        B, Tt, Ts, C = x.size()
        x = F.glu(self.linear(x), dim=-1) # B, Tt, Ts, C
        x = x.permute(0, 3, 1, 2)  # B, C, Tt, Ts
        x = F.pad(x, (Ts-1, 0), 'constant', -1000)
        x = F.max_pool2d(x, 
                         (1, Ts), # kernel size
                         (1, 1), # stride
                         0, # padding
                         1, # dilation
                         False, # ceil_mode
                         False, # return indices
                        )
        x = x.permute(0, 2, 3, 1)
        return x, None

Example #10

Source File: densenet_cascade.py From attn2d with MIT License

5 votes

def forward(self, x):
        return F.glu(self.linear(x), dim=-1)

Example #11

Source File: resnet_addup_nonorm2_gated_noffn.py From attn2d with MIT License

5 votes

def forward(self, x):
        return F.glu(self.linear(x), dim=-1)

Example #12

Source File: pa_resnet.py From attn2d with MIT License

5 votes

def forward(self, x):
        return F.glu(self.linear(x), dim=-1)

Example #13

Source File: densenet.py From attn2d with MIT License

5 votes

def forward(self, x):
        return F.glu(self.linear(x), dim=-1)

Example #14

Source File: hmm_controls2.py From attn2d with MIT License

5 votes

def forward_gate(self, x):
        for l in self.gate[:-1]:
            x = F.glu(l(x))
        return self.gate[-1](x)

Example #15

Source File: resnet_addup_nonorm2_gated.py From attn2d with MIT License

5 votes

def forward(self, x):
        return F.glu(self.linear(x), dim=-1)

Example #16

Source File: test_pyprof_nvtx.py From apex with BSD 3-Clause "New" or "Revised" License

5 votes

def test_glu(self):
        inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
        output = F.glu(inp, dim=-1)

Example #17

Source File: model.py From Mixture-of-Embedding-Experts with Apache License 2.0

5 votes

def forward(self,x):
        x1 = self.fc(x)

        if self.add_batch_norm:
            x1 = self.batch_norm(x1) 

        x = th.cat((x, x1), 1)
        
        return F.glu(x,1)

Example #18

Source File: module.py From OpenTransformer with MIT License

5 votes

def __init__(self, idim, hidden_units, dropout_rate, activation='relu'):
        super(PositionwiseFeedForward, self).__init__()
        self.activation = activation
        self.w_1 = nn.Linear(idim, hidden_units * 2 if activation == 'glu' else hidden_units)
        self.w_2 = nn.Linear(hidden_units, idim)
        self.dropout = nn.Dropout(dropout_rate)

Example #19

Source File: module.py From OpenTransformer with MIT License

5 votes

def forward(self, x):
        x = self.w_1(x)
        if self.activation == 'relu':
            x = F.relu(x)
        elif self.activation == 'tanh':
            x = F.tanh(x)
        elif self.activation == 'glu':
            x = F.glu(x)
        else:
            raise NotImplementedError
        return self.w_2(self.dropout(x))

Example #20

Source File: fconv.py From XSum with MIT License

5 votes

def forward(self, src_tokens, src_lengths):
        # embed tokens and positions
        x = self.embed_tokens(src_tokens) + self.embed_positions(src_tokens)
        x = F.dropout(x, p=self.dropout, training=self.training)
        input_embedding = x

        # project to size of convolution
        x = self.fc1(x)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # temporal convolutions
        for proj, conv in zip(self.projections, self.convolutions):
            residual = x if proj is None else proj(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
            padding_l = (conv.kernel_size[0] - 1) // 2
            padding_r = conv.kernel_size[0] // 2
            x = F.pad(x, (0, 0, 0, 0, padding_l, padding_r))
            x = conv(x)
            x = F.glu(x, dim=2)
            x = (x + residual) * math.sqrt(0.5)

        # T x B x C -> B x T x C
        x = x.transpose(1, 0)

        # project back to size of embedding
        x = self.fc2(x)

        # scale gradients (this only affects backward, not forward)
        x = GradMultiply.apply(x, 1.0 / (2.0 * self.num_attention_layers))

        # add output to input embedding for attention
        y = (x + input_embedding) * math.sqrt(0.5)

        return x, y

Example #21

Source File: conv.py From nsf with MIT License

5 votes

def forward(self, inputs):
        temps = self.conv_transpose(inputs)
        outputs = F.glu(temps, dim=1)
        return outputs

Example #22

Source File: fconv_self_att.py From crosentgec with GNU General Public License v3.0

5 votes

def forward(self, src_tokens, src_lengths):
        # embed tokens and positions
        x = self.embed_tokens(src_tokens) + self.embed_positions(src_tokens)
        x = F.dropout(x, p=self.dropout, training=self.training)
        input_embedding = x.transpose(0, 1)

        # project to size of convolution
        x = self.fc1(x)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # temporal convolutions
        for proj, conv, attention in zip(self.projections, self.convolutions, self.attention):
            residual = x if proj is None else proj(x)

            x = F.dropout(x, p=self.dropout, training=self.training)
            padding_l = (conv.kernel_size[0] - 1) // 2
            padding_r = conv.kernel_size[0] // 2
            x = F.pad(x, (0, 0, 0, 0, padding_l, padding_r))
            x = conv(x)
            x = F.glu(x, dim=2)
            if attention is not None:
                x = attention(x)
            x = (x + residual) * math.sqrt(0.5)

        # T x B x C -> B x T x C
        x = x.transpose(1, 0)

        # project back to size of embedding
        x = self.fc2(x)

        # scale gradients (this only affects backward, not forward)
        x = GradMultiply.apply(x, 1.0 / (2.0 * self.num_attention_layers))

        # add output to input embedding for attention
        y = (x + input_embedding.transpose(0, 1)) * math.sqrt(0.5)

        return {
            'encoder_out': (x, y),
        }

Example #23

Source File: w2l_conv_glu_enc.py From fairseq with MIT License

5 votes

def forward(self, src_tokens, src_lengths, **kwargs):

        """
        src_tokens: padded tensor (B, T, C * feat)
        src_lengths: tensor of original lengths of input utterances (B,)
        """
        B, T, _ = src_tokens.size()
        x = src_tokens.transpose(1, 2).contiguous()  # (B, feat, T) assuming C == 1

        for layer_idx in range(len(self.conv_layers)):
            x = self.conv_layers[layer_idx](x)
            x = F.glu(x, dim=1)
            x = F.dropout(x, p=self.dropouts[layer_idx], training=self.training)

        x = x.transpose(1, 2).contiguous()  # (B, T, 908)
        x = self.linear_layers[0](x)
        x = F.glu(x, dim=2)
        x = F.dropout(x, p=self.dropouts[-1])
        x = self.linear_layers[1](x)

        assert x.size(0) == B
        assert x.size(1) == T

        encoder_out = x.transpose(0, 1)  # (T, B, vocab_size)

        # need to debug this -- find a simpler/elegant way in pytorch APIs
        encoder_padding_mask = (
            torch.arange(T).view(1, T).expand(B, -1).to(x.device)
            >= src_lengths.view(B, 1).expand(-1, T)
        ).t()  # (B x T) -> (T x B)

        return {
            "encoder_out": encoder_out,  # (T, B, vocab_size)
            "encoder_padding_mask": encoder_padding_mask,  # (T, B)
        }

Example #24

Source File: fconv_self_att.py From training_results_v0.5 with Apache License 2.0

5 votes

def forward(self, src_tokens, src_lengths):
        # embed tokens and positions
        x = self.embed_tokens(src_tokens) + self.embed_positions(src_tokens)
        x = F.dropout(x, p=self.dropout, training=self.training)
        input_embedding = x.transpose(0, 1)

        # project to size of convolution
        x = self.fc1(x)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # temporal convolutions
        for proj, conv, attention in zip(self.projections, self.convolutions, self.attention):
            residual = x if proj is None else proj(x)

            x = F.dropout(x, p=self.dropout, training=self.training)
            padding_l = (conv.kernel_size[0] - 1) // 2
            padding_r = conv.kernel_size[0] // 2
            x = F.pad(x, (0, 0, 0, 0, padding_l, padding_r))
            x = conv(x)
            x = F.glu(x, dim=2)
            if attention is not None:
                x = attention(x)
            x = (x + residual) * math.sqrt(0.5)

        # T x B x C -> B x T x C
        x = x.transpose(1, 0)

        # project back to size of embedding
        x = self.fc2(x)

        # scale gradients (this only affects backward, not forward)
        x = GradMultiply.apply(x, 1.0 / (2.0 * self.num_attention_layers))

        # add output to input embedding for attention
        y = (x + input_embedding.transpose(0, 1)) * math.sqrt(0.5)

        return {
            'encoder_out': (x, y),
        }

Example #25

Source File: model.py From howto100m with Apache License 2.0

5 votes

def forward(self, x):
        x1 = self.fc(x)
        if self.add_batch_norm:
            x1 = self.batch_norm(x1)
        x = th.cat((x, x1), 1)
        return F.glu(x, 1)

Example #26

Source File: pa_gatenet.py From attn2d with MIT License

5 votes

def forward(self, x):
        return F.glu(self.linear(x), dim=-1)

Example #27

Source File: glu.py From neural_sp with Apache License 2.0

5 votes

def forward(self, xs):
        return F.glu(self.fc(xs), dim=-1)

Example #28

Source File: gated_conv.py From neural_sp with Apache License 2.0

5 votes

def forward(self, xs, xlens, task, use_cache=False, streaming=False,):
        """Forward pass.

        Args:
            xs (FloatTensor): `[B, T, F]`
            xlens (IntTensor): `[B]`
        Returns:
            eouts (dict):
                xs (FloatTensor): `[B, T', C_o * F]`
                xlens (IntTensor): `[B]`

        """
        eouts = {'ys': {'xs': None, 'xlens': None},
                 'ys_sub1': {'xs': None, 'xlens': None},
                 'ys_sub2': {'xs': None, 'xlens': None}}

        bs, xmax, input_dim = xs.size()
        xs = xs.transpose(2, 1).unsqueeze(3)  # `[B, in_ch (input_dim), T, 1]`

        xs = self.layers(xs)  # `[B, out_ch, T, 1]`
        bs, out_ch, xmax, freq = xs.size()
        xs = xs.transpose(2, 1).contiguous().view(bs, xmax, -1)  # `[B, T, out_ch * feat_dim]`

        # weight normalization + GLU for the last fully-connected layer
        xs = F.glu(self.fc_glu(xs), dim=2)

        # Bridge layer
        if self.bridge is not None:
            xs = self.bridge(xs)

        # NOTE: no subsampling is conducted

        if task in ['all', 'ys']:
            eouts['ys']['xs'], eouts['ys']['xlens'] = xs, xlens
        else:
            raise NotImplementedError
        return eouts

Example #29

Source File: pa_gatenet3.py From attn2d with MIT License

5 votes

def forward(self, x):
        return F.glu(self.linear(x), dim=-1)

Example #30

Source File: pa_gatenet6.py From attn2d with MIT License

5 votes

def forward(self, x):
        return F.glu(self.linear(x), dim=-1)