Python torch.nn.functional.glu() Examples

The following are 30 code examples of torch.nn.functional.glu(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.nn.functional , or try the search function .
Example #1
Source File: resnet.py    From nsf with MIT License 6 votes vote down vote up
def forward(self, inputs, context=None):
        temps = inputs
        if self.use_batch_norm:
            temps = self.batch_norm_layers[0](temps)
        temps = self.activation(temps)
        temps = self.linear_layers[0](temps)
        if self.use_batch_norm:
            temps = self.batch_norm_layers[1](temps)
        temps = self.activation(temps)
        temps = self.dropout(temps)
        temps = self.linear_layers[1](temps)
        if context is not None:
            temps = F.glu(
                torch.cat(
                    (temps, self.context_layer(context)),
                    dim=1
                ),
                dim=1
            )
        return inputs + temps 
Example #2
Source File: resnet.py    From nsf with MIT License 6 votes vote down vote up
def forward(self, inputs, context=None):
        temps = inputs
        if self.use_batch_norm:
            temps = self.batch_norm_layers[0](temps)
        temps = self.activation(temps)
        temps = self.conv_layers[0](temps)
        if self.use_batch_norm:
            temps = self.batch_norm_layers[1](temps)
        temps = self.activation(temps)
        temps = self.dropout(temps)
        temps = self.conv_layers[1](temps)
        if context is not None:
            temps = F.glu(
                torch.cat(
                    (temps, self.context_layer(context)),
                    dim=1
                ),
                dim=1
            )
        return inputs + temps 
Example #3
Source File: made.py    From nsf with MIT License 6 votes vote down vote up
def forward(self, inputs, context=None):
        temps = inputs
        if self.use_batch_norm:
            temps = self.batch_norm_layers[0](temps)
        temps = self.activation(temps)
        temps = self.linear_layers[0](temps)
        if self.use_batch_norm:
            temps = self.batch_norm_layers[1](temps)
        temps = self.activation(temps)
        temps = self.dropout(temps)
        temps = self.linear_layers[1](temps)
        if context is not None:
            temps = F.glu(
                torch.cat((temps, self.context_layer(context)), dim=1),
                dim=1
            )
        return inputs + temps 
Example #4
Source File: aggregators.py    From attn2d with MIT License 6 votes vote down vote up
def forward(self, x, need_attention_weights=False):
        x = F.glu(self.linear(x), dim=-1) # B, Tt, Ts, C
        if not need_attention_weights:
            # Maxpool 
            x, _ = x.max(dim=2)  # B, Tt, C
            return x, None
        # Output attention weights:
        if need_attention_weights:
            # x in B, Tt, Ts, C
            B, Tt, Ts, C = x.size()
            x, indices = x.max(dim=2)
            # indices in B, Tt, C with each channel selecting a source position
            # Terrible but will do:
            attn = x.new_zeros(B, Tt, Ts)
            for i in range(Ts):
                attn[:,:,i] = indices.eq(i).sum(dim=-1)
            # Normalize
            attn = attn / attn.sum(dim=-1, keepdim=True)
        return x, attn 
Example #5
Source File: conformer_convolution.py    From neural_sp with Apache License 2.0 6 votes vote down vote up
def forward(self, xs):
        """Forward pass.

        Args:
            xs (FloatTensor): `[B, T, d_model]`
        Returns:
            xs (FloatTensor): `[B, T, d_model]`

        """
        B, T, d_model = xs.size()
        assert d_model == self.d_model

        xs = xs.transpose(2, 1).contiguous()  # `[B, C, T]`
        xs = self.pointwise_conv1(xs)  # `[B, 2 * C, T]`
        xs = xs.transpose(2, 1)  # `[B, T, 2 * C]`
        xs = F.glu(xs)  # `[B, T, C]`
        xs = xs.transpose(2, 1).contiguous()  # `[B, C, T]`
        xs = self.depthwise_conv(xs)  # `[B, C, T]`

        xs = self.batch_norm(xs)
        xs = self.activation(xs)
        xs = self.pointwise_conv2(xs)  # `[B, C, T]`

        xs = xs.transpose(2, 1).contiguous()  # `[B, T, C]`
        return xs 
Example #6
Source File: aggregators.py    From attn2d with MIT License 6 votes vote down vote up
def one_step(self, x, need_attention_weights=False):
        x = x[:, -1:]  # B, 1, Ts, C
        x = F.glu(self.linear(x), dim=-1) # B, 1, Ts, C
        if not need_attention_weights:
            x, _ = x.max(dim=2)  # B, Tt, C
            return x, None
        # Output attention weights:
        if need_attention_weights:
            B, Tt, Ts, C = x.size()
            x, indices = x.max(dim=2)
            # indices in B, Tt, C with each channel selecting a source position
            # Terrible but will do:
            attn = x.new_zeros(B, Tt, Ts)
            for i in range(Ts):
                attn[:,:,i] = indices.eq(i).sum(dim=-1)
            # Normalize
            attn = attn / attn.sum(dim=-1, keepdim=True)
        return x, attn 
Example #7
Source File: aggregators.py    From attn2d with MIT License 6 votes vote down vote up
def forward(self, x, need_attention_weights=False):
        x = F.glu(self.linear(x), dim=-1) # B, Tt, Ts, C
        if not need_attention_weights:
            # Maxpool 
            B, Tt, Ts, C = x.size()
            mask = torch.triu(utils.fill_with_neg_inf(x.new(Tt, Ts)), self.waitk)
            x, _ = (
                x + mask.unsqueeze(0).unsqueeze(-1)
            ).max(dim=2)  # B, Tt, C
            return x, None
        # Output attention weights:
        if need_attention_weights:
            # x in B, Tt, Ts, C
            B, Tt, Ts, C = x.size()
            x, indices = x.max(dim=2)
            # indices in B, Tt, C with each channel selecting a source position
            # Terrible but will do:
            attn = x.new_zeros(B, Tt, Ts)
            for i in range(Ts):
                attn[:,:,i] = indices.eq(i).sum(dim=-1)
            # Normalize
            attn = attn / attn.sum(dim=-1, keepdim=True)
        return x, attn 
Example #8
Source File: aggregators.py    From attn2d with MIT License 6 votes vote down vote up
def forward(self, x, need_attention_weights=False):
        x = F.glu(self.linear(x), dim=-1) # B, Tt, Ts, C
        if not need_attention_weights:
            # Maxpool 
            x, _ = x.max(dim=2)  # B, Tt, C
            return x, None
        # Output attention weights:
        if need_attention_weights:
            # x in B, Tt, Ts, C
            B, Tt, Ts, C = x.size()
            x, indices = x.max(dim=2)
            # indices in B, Tt, C with each channel selecting a source position
            # Terrible but will do:
            attn = x.new_zeros(B, Tt, Ts)
            for i in range(Ts):
                attn[:,:,i] = indices.eq(i).sum(dim=-1)
            # Normalize
            attn = attn / attn.sum(dim=-1, keepdim=True)
        return x, attn 
Example #9
Source File: aggregators.py    From attn2d with MIT License 5 votes vote down vote up
def forward(self, x, need_attention_weights=False):
        B, Tt, Ts, C = x.size()
        x = F.glu(self.linear(x), dim=-1) # B, Tt, Ts, C
        x = x.permute(0, 3, 1, 2)  # B, C, Tt, Ts
        x = F.pad(x, (Ts-1, 0), 'constant', -1000)
        x = F.max_pool2d(x, 
                         (1, Ts), # kernel size
                         (1, 1), # stride
                         0, # padding
                         1, # dilation
                         False, # ceil_mode
                         False, # return indices
                        )
        x = x.permute(0, 2, 3, 1)
        return x, None 
Example #10
Source File: densenet_cascade.py    From attn2d with MIT License 5 votes vote down vote up
def forward(self, x):
        return F.glu(self.linear(x), dim=-1) 
Example #11
Source File: resnet_addup_nonorm2_gated_noffn.py    From attn2d with MIT License 5 votes vote down vote up
def forward(self, x):
        return F.glu(self.linear(x), dim=-1) 
Example #12
Source File: pa_resnet.py    From attn2d with MIT License 5 votes vote down vote up
def forward(self, x):
        return F.glu(self.linear(x), dim=-1) 
Example #13
Source File: densenet.py    From attn2d with MIT License 5 votes vote down vote up
def forward(self, x):
        return F.glu(self.linear(x), dim=-1) 
Example #14
Source File: hmm_controls2.py    From attn2d with MIT License 5 votes vote down vote up
def forward_gate(self, x):
        for l in self.gate[:-1]:
            x = F.glu(l(x))
        return self.gate[-1](x) 
Example #15
Source File: resnet_addup_nonorm2_gated.py    From attn2d with MIT License 5 votes vote down vote up
def forward(self, x):
        return F.glu(self.linear(x), dim=-1) 
Example #16
Source File: test_pyprof_nvtx.py    From apex with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_glu(self):
        inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
        output = F.glu(inp, dim=-1) 
Example #17
Source File: model.py    From Mixture-of-Embedding-Experts with Apache License 2.0 5 votes vote down vote up
def forward(self,x):
        x1 = self.fc(x)

        if self.add_batch_norm:
            x1 = self.batch_norm(x1) 

        x = th.cat((x, x1), 1)
        
        return F.glu(x,1) 
Example #18
Source File: module.py    From OpenTransformer with MIT License 5 votes vote down vote up
def __init__(self, idim, hidden_units, dropout_rate, activation='relu'):
        super(PositionwiseFeedForward, self).__init__()
        self.activation = activation
        self.w_1 = nn.Linear(idim, hidden_units * 2 if activation == 'glu' else hidden_units)
        self.w_2 = nn.Linear(hidden_units, idim)
        self.dropout = nn.Dropout(dropout_rate) 
Example #19
Source File: module.py    From OpenTransformer with MIT License 5 votes vote down vote up
def forward(self, x):
        x = self.w_1(x)
        if self.activation == 'relu':
            x = F.relu(x)
        elif self.activation == 'tanh':
            x = F.tanh(x)
        elif self.activation == 'glu':
            x = F.glu(x)
        else:
            raise NotImplementedError
        return self.w_2(self.dropout(x)) 
Example #20
Source File: fconv.py    From XSum with MIT License 5 votes vote down vote up
def forward(self, src_tokens, src_lengths):
        # embed tokens and positions
        x = self.embed_tokens(src_tokens) + self.embed_positions(src_tokens)
        x = F.dropout(x, p=self.dropout, training=self.training)
        input_embedding = x

        # project to size of convolution
        x = self.fc1(x)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # temporal convolutions
        for proj, conv in zip(self.projections, self.convolutions):
            residual = x if proj is None else proj(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
            padding_l = (conv.kernel_size[0] - 1) // 2
            padding_r = conv.kernel_size[0] // 2
            x = F.pad(x, (0, 0, 0, 0, padding_l, padding_r))
            x = conv(x)
            x = F.glu(x, dim=2)
            x = (x + residual) * math.sqrt(0.5)

        # T x B x C -> B x T x C
        x = x.transpose(1, 0)

        # project back to size of embedding
        x = self.fc2(x)

        # scale gradients (this only affects backward, not forward)
        x = GradMultiply.apply(x, 1.0 / (2.0 * self.num_attention_layers))

        # add output to input embedding for attention
        y = (x + input_embedding) * math.sqrt(0.5)

        return x, y 
Example #21
Source File: conv.py    From nsf with MIT License 5 votes vote down vote up
def forward(self, inputs):
        temps = self.conv_transpose(inputs)
        outputs = F.glu(temps, dim=1)
        return outputs 
Example #22
Source File: fconv_self_att.py    From crosentgec with GNU General Public License v3.0 5 votes vote down vote up
def forward(self, src_tokens, src_lengths):
        # embed tokens and positions
        x = self.embed_tokens(src_tokens) + self.embed_positions(src_tokens)
        x = F.dropout(x, p=self.dropout, training=self.training)
        input_embedding = x.transpose(0, 1)

        # project to size of convolution
        x = self.fc1(x)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # temporal convolutions
        for proj, conv, attention in zip(self.projections, self.convolutions, self.attention):
            residual = x if proj is None else proj(x)

            x = F.dropout(x, p=self.dropout, training=self.training)
            padding_l = (conv.kernel_size[0] - 1) // 2
            padding_r = conv.kernel_size[0] // 2
            x = F.pad(x, (0, 0, 0, 0, padding_l, padding_r))
            x = conv(x)
            x = F.glu(x, dim=2)
            if attention is not None:
                x = attention(x)
            x = (x + residual) * math.sqrt(0.5)

        # T x B x C -> B x T x C
        x = x.transpose(1, 0)

        # project back to size of embedding
        x = self.fc2(x)

        # scale gradients (this only affects backward, not forward)
        x = GradMultiply.apply(x, 1.0 / (2.0 * self.num_attention_layers))

        # add output to input embedding for attention
        y = (x + input_embedding.transpose(0, 1)) * math.sqrt(0.5)

        return {
            'encoder_out': (x, y),
        } 
Example #23
Source File: w2l_conv_glu_enc.py    From fairseq with MIT License 5 votes vote down vote up
def forward(self, src_tokens, src_lengths, **kwargs):

        """
        src_tokens: padded tensor (B, T, C * feat)
        src_lengths: tensor of original lengths of input utterances (B,)
        """
        B, T, _ = src_tokens.size()
        x = src_tokens.transpose(1, 2).contiguous()  # (B, feat, T) assuming C == 1

        for layer_idx in range(len(self.conv_layers)):
            x = self.conv_layers[layer_idx](x)
            x = F.glu(x, dim=1)
            x = F.dropout(x, p=self.dropouts[layer_idx], training=self.training)

        x = x.transpose(1, 2).contiguous()  # (B, T, 908)
        x = self.linear_layers[0](x)
        x = F.glu(x, dim=2)
        x = F.dropout(x, p=self.dropouts[-1])
        x = self.linear_layers[1](x)

        assert x.size(0) == B
        assert x.size(1) == T

        encoder_out = x.transpose(0, 1)  # (T, B, vocab_size)

        # need to debug this -- find a simpler/elegant way in pytorch APIs
        encoder_padding_mask = (
            torch.arange(T).view(1, T).expand(B, -1).to(x.device)
            >= src_lengths.view(B, 1).expand(-1, T)
        ).t()  # (B x T) -> (T x B)

        return {
            "encoder_out": encoder_out,  # (T, B, vocab_size)
            "encoder_padding_mask": encoder_padding_mask,  # (T, B)
        } 
Example #24
Source File: fconv_self_att.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def forward(self, src_tokens, src_lengths):
        # embed tokens and positions
        x = self.embed_tokens(src_tokens) + self.embed_positions(src_tokens)
        x = F.dropout(x, p=self.dropout, training=self.training)
        input_embedding = x.transpose(0, 1)

        # project to size of convolution
        x = self.fc1(x)

        # B x T x C -> T x B x C
        x = x.transpose(0, 1)

        # temporal convolutions
        for proj, conv, attention in zip(self.projections, self.convolutions, self.attention):
            residual = x if proj is None else proj(x)

            x = F.dropout(x, p=self.dropout, training=self.training)
            padding_l = (conv.kernel_size[0] - 1) // 2
            padding_r = conv.kernel_size[0] // 2
            x = F.pad(x, (0, 0, 0, 0, padding_l, padding_r))
            x = conv(x)
            x = F.glu(x, dim=2)
            if attention is not None:
                x = attention(x)
            x = (x + residual) * math.sqrt(0.5)

        # T x B x C -> B x T x C
        x = x.transpose(1, 0)

        # project back to size of embedding
        x = self.fc2(x)

        # scale gradients (this only affects backward, not forward)
        x = GradMultiply.apply(x, 1.0 / (2.0 * self.num_attention_layers))

        # add output to input embedding for attention
        y = (x + input_embedding.transpose(0, 1)) * math.sqrt(0.5)

        return {
            'encoder_out': (x, y),
        } 
Example #25
Source File: model.py    From howto100m with Apache License 2.0 5 votes vote down vote up
def forward(self, x):
        x1 = self.fc(x)
        if self.add_batch_norm:
            x1 = self.batch_norm(x1)
        x = th.cat((x, x1), 1)
        return F.glu(x, 1) 
Example #26
Source File: pa_gatenet.py    From attn2d with MIT License 5 votes vote down vote up
def forward(self, x):
        return F.glu(self.linear(x), dim=-1) 
Example #27
Source File: glu.py    From neural_sp with Apache License 2.0 5 votes vote down vote up
def forward(self, xs):
        return F.glu(self.fc(xs), dim=-1) 
Example #28
Source File: gated_conv.py    From neural_sp with Apache License 2.0 5 votes vote down vote up
def forward(self, xs, xlens, task, use_cache=False, streaming=False,):
        """Forward pass.

        Args:
            xs (FloatTensor): `[B, T, F]`
            xlens (IntTensor): `[B]`
        Returns:
            eouts (dict):
                xs (FloatTensor): `[B, T', C_o * F]`
                xlens (IntTensor): `[B]`

        """
        eouts = {'ys': {'xs': None, 'xlens': None},
                 'ys_sub1': {'xs': None, 'xlens': None},
                 'ys_sub2': {'xs': None, 'xlens': None}}

        bs, xmax, input_dim = xs.size()
        xs = xs.transpose(2, 1).unsqueeze(3)  # `[B, in_ch (input_dim), T, 1]`

        xs = self.layers(xs)  # `[B, out_ch, T, 1]`
        bs, out_ch, xmax, freq = xs.size()
        xs = xs.transpose(2, 1).contiguous().view(bs, xmax, -1)  # `[B, T, out_ch * feat_dim]`

        # weight normalization + GLU for the last fully-connected layer
        xs = F.glu(self.fc_glu(xs), dim=2)

        # Bridge layer
        if self.bridge is not None:
            xs = self.bridge(xs)

        # NOTE: no subsampling is conducted

        if task in ['all', 'ys']:
            eouts['ys']['xs'], eouts['ys']['xlens'] = xs, xlens
        else:
            raise NotImplementedError
        return eouts 
Example #29
Source File: pa_gatenet3.py    From attn2d with MIT License 5 votes vote down vote up
def forward(self, x):
        return F.glu(self.linear(x), dim=-1) 
Example #30
Source File: pa_gatenet6.py    From attn2d with MIT License 5 votes vote down vote up
def forward(self, x):
        return F.glu(self.linear(x), dim=-1)