Python torch.nn.functional.glu() Examples
The following are 30
code examples of torch.nn.functional.glu().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.nn.functional
, or try the search function
.
Example #1
Source File: resnet.py From nsf with MIT License | 6 votes |
def forward(self, inputs, context=None): temps = inputs if self.use_batch_norm: temps = self.batch_norm_layers[0](temps) temps = self.activation(temps) temps = self.linear_layers[0](temps) if self.use_batch_norm: temps = self.batch_norm_layers[1](temps) temps = self.activation(temps) temps = self.dropout(temps) temps = self.linear_layers[1](temps) if context is not None: temps = F.glu( torch.cat( (temps, self.context_layer(context)), dim=1 ), dim=1 ) return inputs + temps
Example #2
Source File: resnet.py From nsf with MIT License | 6 votes |
def forward(self, inputs, context=None): temps = inputs if self.use_batch_norm: temps = self.batch_norm_layers[0](temps) temps = self.activation(temps) temps = self.conv_layers[0](temps) if self.use_batch_norm: temps = self.batch_norm_layers[1](temps) temps = self.activation(temps) temps = self.dropout(temps) temps = self.conv_layers[1](temps) if context is not None: temps = F.glu( torch.cat( (temps, self.context_layer(context)), dim=1 ), dim=1 ) return inputs + temps
Example #3
Source File: made.py From nsf with MIT License | 6 votes |
def forward(self, inputs, context=None): temps = inputs if self.use_batch_norm: temps = self.batch_norm_layers[0](temps) temps = self.activation(temps) temps = self.linear_layers[0](temps) if self.use_batch_norm: temps = self.batch_norm_layers[1](temps) temps = self.activation(temps) temps = self.dropout(temps) temps = self.linear_layers[1](temps) if context is not None: temps = F.glu( torch.cat((temps, self.context_layer(context)), dim=1), dim=1 ) return inputs + temps
Example #4
Source File: aggregators.py From attn2d with MIT License | 6 votes |
def forward(self, x, need_attention_weights=False): x = F.glu(self.linear(x), dim=-1) # B, Tt, Ts, C if not need_attention_weights: # Maxpool x, _ = x.max(dim=2) # B, Tt, C return x, None # Output attention weights: if need_attention_weights: # x in B, Tt, Ts, C B, Tt, Ts, C = x.size() x, indices = x.max(dim=2) # indices in B, Tt, C with each channel selecting a source position # Terrible but will do: attn = x.new_zeros(B, Tt, Ts) for i in range(Ts): attn[:,:,i] = indices.eq(i).sum(dim=-1) # Normalize attn = attn / attn.sum(dim=-1, keepdim=True) return x, attn
Example #5
Source File: conformer_convolution.py From neural_sp with Apache License 2.0 | 6 votes |
def forward(self, xs): """Forward pass. Args: xs (FloatTensor): `[B, T, d_model]` Returns: xs (FloatTensor): `[B, T, d_model]` """ B, T, d_model = xs.size() assert d_model == self.d_model xs = xs.transpose(2, 1).contiguous() # `[B, C, T]` xs = self.pointwise_conv1(xs) # `[B, 2 * C, T]` xs = xs.transpose(2, 1) # `[B, T, 2 * C]` xs = F.glu(xs) # `[B, T, C]` xs = xs.transpose(2, 1).contiguous() # `[B, C, T]` xs = self.depthwise_conv(xs) # `[B, C, T]` xs = self.batch_norm(xs) xs = self.activation(xs) xs = self.pointwise_conv2(xs) # `[B, C, T]` xs = xs.transpose(2, 1).contiguous() # `[B, T, C]` return xs
Example #6
Source File: aggregators.py From attn2d with MIT License | 6 votes |
def one_step(self, x, need_attention_weights=False): x = x[:, -1:] # B, 1, Ts, C x = F.glu(self.linear(x), dim=-1) # B, 1, Ts, C if not need_attention_weights: x, _ = x.max(dim=2) # B, Tt, C return x, None # Output attention weights: if need_attention_weights: B, Tt, Ts, C = x.size() x, indices = x.max(dim=2) # indices in B, Tt, C with each channel selecting a source position # Terrible but will do: attn = x.new_zeros(B, Tt, Ts) for i in range(Ts): attn[:,:,i] = indices.eq(i).sum(dim=-1) # Normalize attn = attn / attn.sum(dim=-1, keepdim=True) return x, attn
Example #7
Source File: aggregators.py From attn2d with MIT License | 6 votes |
def forward(self, x, need_attention_weights=False): x = F.glu(self.linear(x), dim=-1) # B, Tt, Ts, C if not need_attention_weights: # Maxpool B, Tt, Ts, C = x.size() mask = torch.triu(utils.fill_with_neg_inf(x.new(Tt, Ts)), self.waitk) x, _ = ( x + mask.unsqueeze(0).unsqueeze(-1) ).max(dim=2) # B, Tt, C return x, None # Output attention weights: if need_attention_weights: # x in B, Tt, Ts, C B, Tt, Ts, C = x.size() x, indices = x.max(dim=2) # indices in B, Tt, C with each channel selecting a source position # Terrible but will do: attn = x.new_zeros(B, Tt, Ts) for i in range(Ts): attn[:,:,i] = indices.eq(i).sum(dim=-1) # Normalize attn = attn / attn.sum(dim=-1, keepdim=True) return x, attn
Example #8
Source File: aggregators.py From attn2d with MIT License | 6 votes |
def forward(self, x, need_attention_weights=False): x = F.glu(self.linear(x), dim=-1) # B, Tt, Ts, C if not need_attention_weights: # Maxpool x, _ = x.max(dim=2) # B, Tt, C return x, None # Output attention weights: if need_attention_weights: # x in B, Tt, Ts, C B, Tt, Ts, C = x.size() x, indices = x.max(dim=2) # indices in B, Tt, C with each channel selecting a source position # Terrible but will do: attn = x.new_zeros(B, Tt, Ts) for i in range(Ts): attn[:,:,i] = indices.eq(i).sum(dim=-1) # Normalize attn = attn / attn.sum(dim=-1, keepdim=True) return x, attn
Example #9
Source File: aggregators.py From attn2d with MIT License | 5 votes |
def forward(self, x, need_attention_weights=False): B, Tt, Ts, C = x.size() x = F.glu(self.linear(x), dim=-1) # B, Tt, Ts, C x = x.permute(0, 3, 1, 2) # B, C, Tt, Ts x = F.pad(x, (Ts-1, 0), 'constant', -1000) x = F.max_pool2d(x, (1, Ts), # kernel size (1, 1), # stride 0, # padding 1, # dilation False, # ceil_mode False, # return indices ) x = x.permute(0, 2, 3, 1) return x, None
Example #10
Source File: densenet_cascade.py From attn2d with MIT License | 5 votes |
def forward(self, x): return F.glu(self.linear(x), dim=-1)
Example #11
Source File: resnet_addup_nonorm2_gated_noffn.py From attn2d with MIT License | 5 votes |
def forward(self, x): return F.glu(self.linear(x), dim=-1)
Example #12
Source File: pa_resnet.py From attn2d with MIT License | 5 votes |
def forward(self, x): return F.glu(self.linear(x), dim=-1)
Example #13
Source File: densenet.py From attn2d with MIT License | 5 votes |
def forward(self, x): return F.glu(self.linear(x), dim=-1)
Example #14
Source File: hmm_controls2.py From attn2d with MIT License | 5 votes |
def forward_gate(self, x): for l in self.gate[:-1]: x = F.glu(l(x)) return self.gate[-1](x)
Example #15
Source File: resnet_addup_nonorm2_gated.py From attn2d with MIT License | 5 votes |
def forward(self, x): return F.glu(self.linear(x), dim=-1)
Example #16
Source File: test_pyprof_nvtx.py From apex with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_glu(self): inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype) output = F.glu(inp, dim=-1)
Example #17
Source File: model.py From Mixture-of-Embedding-Experts with Apache License 2.0 | 5 votes |
def forward(self,x): x1 = self.fc(x) if self.add_batch_norm: x1 = self.batch_norm(x1) x = th.cat((x, x1), 1) return F.glu(x,1)
Example #18
Source File: module.py From OpenTransformer with MIT License | 5 votes |
def __init__(self, idim, hidden_units, dropout_rate, activation='relu'): super(PositionwiseFeedForward, self).__init__() self.activation = activation self.w_1 = nn.Linear(idim, hidden_units * 2 if activation == 'glu' else hidden_units) self.w_2 = nn.Linear(hidden_units, idim) self.dropout = nn.Dropout(dropout_rate)
Example #19
Source File: module.py From OpenTransformer with MIT License | 5 votes |
def forward(self, x): x = self.w_1(x) if self.activation == 'relu': x = F.relu(x) elif self.activation == 'tanh': x = F.tanh(x) elif self.activation == 'glu': x = F.glu(x) else: raise NotImplementedError return self.w_2(self.dropout(x))
Example #20
Source File: fconv.py From XSum with MIT License | 5 votes |
def forward(self, src_tokens, src_lengths): # embed tokens and positions x = self.embed_tokens(src_tokens) + self.embed_positions(src_tokens) x = F.dropout(x, p=self.dropout, training=self.training) input_embedding = x # project to size of convolution x = self.fc1(x) # B x T x C -> T x B x C x = x.transpose(0, 1) # temporal convolutions for proj, conv in zip(self.projections, self.convolutions): residual = x if proj is None else proj(x) x = F.dropout(x, p=self.dropout, training=self.training) padding_l = (conv.kernel_size[0] - 1) // 2 padding_r = conv.kernel_size[0] // 2 x = F.pad(x, (0, 0, 0, 0, padding_l, padding_r)) x = conv(x) x = F.glu(x, dim=2) x = (x + residual) * math.sqrt(0.5) # T x B x C -> B x T x C x = x.transpose(1, 0) # project back to size of embedding x = self.fc2(x) # scale gradients (this only affects backward, not forward) x = GradMultiply.apply(x, 1.0 / (2.0 * self.num_attention_layers)) # add output to input embedding for attention y = (x + input_embedding) * math.sqrt(0.5) return x, y
Example #21
Source File: conv.py From nsf with MIT License | 5 votes |
def forward(self, inputs): temps = self.conv_transpose(inputs) outputs = F.glu(temps, dim=1) return outputs
Example #22
Source File: fconv_self_att.py From crosentgec with GNU General Public License v3.0 | 5 votes |
def forward(self, src_tokens, src_lengths): # embed tokens and positions x = self.embed_tokens(src_tokens) + self.embed_positions(src_tokens) x = F.dropout(x, p=self.dropout, training=self.training) input_embedding = x.transpose(0, 1) # project to size of convolution x = self.fc1(x) # B x T x C -> T x B x C x = x.transpose(0, 1) # temporal convolutions for proj, conv, attention in zip(self.projections, self.convolutions, self.attention): residual = x if proj is None else proj(x) x = F.dropout(x, p=self.dropout, training=self.training) padding_l = (conv.kernel_size[0] - 1) // 2 padding_r = conv.kernel_size[0] // 2 x = F.pad(x, (0, 0, 0, 0, padding_l, padding_r)) x = conv(x) x = F.glu(x, dim=2) if attention is not None: x = attention(x) x = (x + residual) * math.sqrt(0.5) # T x B x C -> B x T x C x = x.transpose(1, 0) # project back to size of embedding x = self.fc2(x) # scale gradients (this only affects backward, not forward) x = GradMultiply.apply(x, 1.0 / (2.0 * self.num_attention_layers)) # add output to input embedding for attention y = (x + input_embedding.transpose(0, 1)) * math.sqrt(0.5) return { 'encoder_out': (x, y), }
Example #23
Source File: w2l_conv_glu_enc.py From fairseq with MIT License | 5 votes |
def forward(self, src_tokens, src_lengths, **kwargs): """ src_tokens: padded tensor (B, T, C * feat) src_lengths: tensor of original lengths of input utterances (B,) """ B, T, _ = src_tokens.size() x = src_tokens.transpose(1, 2).contiguous() # (B, feat, T) assuming C == 1 for layer_idx in range(len(self.conv_layers)): x = self.conv_layers[layer_idx](x) x = F.glu(x, dim=1) x = F.dropout(x, p=self.dropouts[layer_idx], training=self.training) x = x.transpose(1, 2).contiguous() # (B, T, 908) x = self.linear_layers[0](x) x = F.glu(x, dim=2) x = F.dropout(x, p=self.dropouts[-1]) x = self.linear_layers[1](x) assert x.size(0) == B assert x.size(1) == T encoder_out = x.transpose(0, 1) # (T, B, vocab_size) # need to debug this -- find a simpler/elegant way in pytorch APIs encoder_padding_mask = ( torch.arange(T).view(1, T).expand(B, -1).to(x.device) >= src_lengths.view(B, 1).expand(-1, T) ).t() # (B x T) -> (T x B) return { "encoder_out": encoder_out, # (T, B, vocab_size) "encoder_padding_mask": encoder_padding_mask, # (T, B) }
Example #24
Source File: fconv_self_att.py From training_results_v0.5 with Apache License 2.0 | 5 votes |
def forward(self, src_tokens, src_lengths): # embed tokens and positions x = self.embed_tokens(src_tokens) + self.embed_positions(src_tokens) x = F.dropout(x, p=self.dropout, training=self.training) input_embedding = x.transpose(0, 1) # project to size of convolution x = self.fc1(x) # B x T x C -> T x B x C x = x.transpose(0, 1) # temporal convolutions for proj, conv, attention in zip(self.projections, self.convolutions, self.attention): residual = x if proj is None else proj(x) x = F.dropout(x, p=self.dropout, training=self.training) padding_l = (conv.kernel_size[0] - 1) // 2 padding_r = conv.kernel_size[0] // 2 x = F.pad(x, (0, 0, 0, 0, padding_l, padding_r)) x = conv(x) x = F.glu(x, dim=2) if attention is not None: x = attention(x) x = (x + residual) * math.sqrt(0.5) # T x B x C -> B x T x C x = x.transpose(1, 0) # project back to size of embedding x = self.fc2(x) # scale gradients (this only affects backward, not forward) x = GradMultiply.apply(x, 1.0 / (2.0 * self.num_attention_layers)) # add output to input embedding for attention y = (x + input_embedding.transpose(0, 1)) * math.sqrt(0.5) return { 'encoder_out': (x, y), }
Example #25
Source File: model.py From howto100m with Apache License 2.0 | 5 votes |
def forward(self, x): x1 = self.fc(x) if self.add_batch_norm: x1 = self.batch_norm(x1) x = th.cat((x, x1), 1) return F.glu(x, 1)
Example #26
Source File: pa_gatenet.py From attn2d with MIT License | 5 votes |
def forward(self, x): return F.glu(self.linear(x), dim=-1)
Example #27
Source File: glu.py From neural_sp with Apache License 2.0 | 5 votes |
def forward(self, xs): return F.glu(self.fc(xs), dim=-1)
Example #28
Source File: gated_conv.py From neural_sp with Apache License 2.0 | 5 votes |
def forward(self, xs, xlens, task, use_cache=False, streaming=False,): """Forward pass. Args: xs (FloatTensor): `[B, T, F]` xlens (IntTensor): `[B]` Returns: eouts (dict): xs (FloatTensor): `[B, T', C_o * F]` xlens (IntTensor): `[B]` """ eouts = {'ys': {'xs': None, 'xlens': None}, 'ys_sub1': {'xs': None, 'xlens': None}, 'ys_sub2': {'xs': None, 'xlens': None}} bs, xmax, input_dim = xs.size() xs = xs.transpose(2, 1).unsqueeze(3) # `[B, in_ch (input_dim), T, 1]` xs = self.layers(xs) # `[B, out_ch, T, 1]` bs, out_ch, xmax, freq = xs.size() xs = xs.transpose(2, 1).contiguous().view(bs, xmax, -1) # `[B, T, out_ch * feat_dim]` # weight normalization + GLU for the last fully-connected layer xs = F.glu(self.fc_glu(xs), dim=2) # Bridge layer if self.bridge is not None: xs = self.bridge(xs) # NOTE: no subsampling is conducted if task in ['all', 'ys']: eouts['ys']['xs'], eouts['ys']['xlens'] = xs, xlens else: raise NotImplementedError return eouts
Example #29
Source File: pa_gatenet3.py From attn2d with MIT License | 5 votes |
def forward(self, x): return F.glu(self.linear(x), dim=-1)
Example #30
Source File: pa_gatenet6.py From attn2d with MIT License | 5 votes |
def forward(self, x): return F.glu(self.linear(x), dim=-1)