Python torch.nn.TransformerEncoder() Examples

The following are 11 code examples of torch.nn.TransformerEncoder(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.nn , or try the search function

Example #1

Source File: model.py From examples with BSD 3-Clause "New" or "Revised" License

7 votes

def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5):
        super(TransformerModel, self).__init__()
        try:
            from torch.nn import TransformerEncoder, TransformerEncoderLayer
        except:
            raise ImportError('TransformerEncoder module does not exist in PyTorch 1.1 or lower.')
        self.model_type = 'Transformer'
        self.src_mask = None
        self.pos_encoder = PositionalEncoding(ninp, dropout)
        encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        self.encoder = nn.Embedding(ntoken, ninp)
        self.ninp = ninp
        self.decoder = nn.Linear(ninp, ntoken)

        self.init_weights()

Example #2

Source File: py_Transformer.py From NLP_Toolkit with Apache License 2.0

6 votes

def __init__(self, src_vocab, trg_vocab, trg_vocab2, d_model, ff_dim, num, n_heads,\
                 max_encoder_len, max_decoder_len, mappings, idx_mappings):
        super(pyTransformer, self).__init__()
        self.src_vocab = src_vocab
        self.trg_vocab = trg_vocab
        self.trg_vocab2 = trg_vocab2
        self.d_model = d_model
        self.ff_dim = ff_dim
        self.num = num
        self.n_heads = n_heads
        self.max_encoder_len = max_encoder_len
        self.max_decoder_len = max_decoder_len
        self.mappings = mappings
        self.idx_mappings = idx_mappings
        self.embed1 = nn.Embedding(src_vocab, d_model)
        self.embed2 = nn.Embedding(trg_vocab, d_model)
        #self.transformer = nn.Transformer(d_model=d_model, nhead=n_heads, num_encoder_layers=num,\
        #                                  num_decoder_layers=num, dim_feedforward=ff_dim, dropout=0.1)
        self.encoder = nn.TransformerEncoder(nn.TransformerEncoderLayer(d_model, n_heads, ff_dim, dropout=0.1), \
                                             num_layers=num, norm=nn.LayerNorm(normalized_shape=d_model, eps=1e-6))
        self.decoder = nn.TransformerDecoder(nn.TransformerDecoderLayer(d_model, n_heads, ff_dim, dropout=0.1),\
                                             num_layers=num, norm=nn.LayerNorm(normalized_shape=d_model, eps=1e-6))
        self.fc1 = nn.Linear(d_model, trg_vocab)
        self.fc2 = nn.Linear(d_model, trg_vocab2)

Example #3

Source File: seq2seq_transformer.py From MultiTurnDialogZoo with MIT License

6 votes

def __init__(self, input_vocab_size, opt_vocab_size, d_model, nhead,
                 num_encoder_layers, dim_feedforward, position_embed_size=300,
                 utter_n_layer=2, dropout=0.3, sos=0, pad=0, teach_force=1):
        super(Transformer, self).__init__()
        self.d_model = d_model
        self.hidden_size = d_model
        self.embed_src = nn.Embedding(input_vocab_size, d_model)
        # position maxlen is 5000
        self.pos_enc = PositionEmbedding(d_model, dropout=dropout,
                                         max_len=position_embed_size)
        self.input_vocab_size = input_vocab_size
        self.utter_n_layer = utter_n_layer
        self.opt_vocab_size = opt_vocab_size
        self.pad, self.sos = pad, sos
        self.teach_force = teach_force
        
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, 
                                                   dim_feedforward=dim_feedforward, 
                                                   dropout=dropout,  activation='gelu')
        self.encoder = nn.TransformerEncoder(encoder_layer,
                                             num_layers=num_encoder_layers)
        
        self.decoder = Decoder(d_model, d_model, opt_vocab_size, 
                               n_layers=utter_n_layer, dropout=dropout, nhead=nhead)

Example #4

Source File: pytorch_U2GNN_UnSup.py From Graph-Transformer with Apache License 2.0

6 votes

def __init__(self, vocab_size, feature_dim_size, ff_hidden_size, sampled_num,
                 num_self_att_layers, num_U2GNN_layers, dropout, device):
        super(TransformerU2GNN, self).__init__()
        self.feature_dim_size = feature_dim_size
        self.ff_hidden_size = ff_hidden_size
        self.num_self_att_layers = num_self_att_layers #Each U2GNN layer consists of a number of self-attention layers
        self.num_U2GNN_layers = num_U2GNN_layers
        self.vocab_size = vocab_size
        self.sampled_num = sampled_num
        self.device = device
        #
        self.u2gnn_layers = torch.nn.ModuleList()
        for _ in range(self.num_U2GNN_layers):
            encoder_layers = TransformerEncoderLayer(d_model=self.feature_dim_size, nhead=1, dim_feedforward=self.ff_hidden_size, dropout=0.5) # embed_dim must be divisible by num_heads
            self.u2gnn_layers.append(TransformerEncoder(encoder_layers, self.num_self_att_layers))
        # Linear function
        self.dropouts = nn.Dropout(dropout)
        self.ss = SampledSoftmax(self.vocab_size, self.sampled_num, self.feature_dim_size*self.num_U2GNN_layers, self.device)

Example #5

Source File: pytorch_U2GNN_Sup.py From Graph-Transformer with Apache License 2.0

6 votes

def __init__(self, feature_dim_size, ff_hidden_size, num_classes,
                 num_self_att_layers, dropout, num_U2GNN_layers):
        super(TransformerU2GNN, self).__init__()
        self.feature_dim_size = feature_dim_size
        self.ff_hidden_size = ff_hidden_size
        self.num_classes = num_classes
        self.num_self_att_layers = num_self_att_layers #Each U2GNN layer consists of a number of self-attention layers
        self.num_U2GNN_layers = num_U2GNN_layers
        #
        self.u2gnn_layers = torch.nn.ModuleList()
        for _ in range(self.num_U2GNN_layers):
            encoder_layers = TransformerEncoderLayer(d_model=self.feature_dim_size, nhead=1, dim_feedforward=self.ff_hidden_size, dropout=0.5)
            self.u2gnn_layers.append(TransformerEncoder(encoder_layers, self.num_self_att_layers))
        # Linear function
        self.predictions = torch.nn.ModuleList()
        self.dropouts = torch.nn.ModuleList()
        # self.predictions.append(nn.Linear(feature_dim_size, num_classes)) # For including feature vectors to predict graph labels???
        for _ in range(self.num_U2GNN_layers):
            self.predictions.append(nn.Linear(self.feature_dim_size, self.num_classes))
            self.dropouts.append(nn.Dropout(dropout))

Example #6

Source File: transformer.py From pykaldi2 with MIT License

6 votes

def __init__(self, dim_feat, 
                dim_model, 
                nheads, 
                dim_feedforward, 
                nlayers, 
                dropout, 
                output_size, 
                kernel_size=3,
                stride=1):
        super(TransformerAM, self).__init__()
        self.pos_encoder = PositionalEncoding(dim_model, dropout)
        self.input_layer = nn.Linear(dim_feat, dim_model)
        self.output_layer = nn.Linear(dim_model, output_size)
        encoder_norm = nn.LayerNorm(dim_model)
        encoder_layer = TransformerEncoderLayerWithConv1d(dim_model, nheads, dim_feedforward, dropout, kernel_size, stride)
        self.transformer = nn.TransformerEncoder(encoder_layer, nlayers, norm=encoder_norm)

Example #7

Source File: model.py From PyTorch with MIT License

6 votes

def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5):
        super(TransformerModel, self).__init__()
        try:
            from torch.nn import TransformerEncoder, TransformerEncoderLayer
        except:
            raise ImportError('TransformerEncoder module does not exist in PyTorch 1.1 or lower.')
        self.model_type = 'Transformer'
        self.src_mask = None
        self.pos_encoder = PositionalEncoding(ninp, dropout)
        encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        self.encoder = nn.Embedding(ntoken, ninp)
        self.ninp = ninp
        self.decoder = nn.Linear(ninp, ntoken)

        self.init_weights()

Example #8

Source File: model.py From CoupletAI with MIT License

5 votes

def __init__(self, vocab_size: int, embed_dim: int, hidden_dim: int):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.hidden2tag = nn.Linear(hidden_dim, vocab_size)
        self.mapper = nn.Linear(embed_dim, hidden_dim)
        layer = nn.TransformerEncoderLayer(hidden_dim, 4, dim_feedforward=512)
        self.encoder = nn.TransformerEncoder(layer, 4)

Example #9

Source File: tk_native.py From transformer-kernel-ranking with Apache License 2.0

5 votes

def __init__(self,
                 _embsize:int,
                 kernels_mu: List[float],
                 kernels_sigma: List[float],
                 att_heads: int,
                 att_layer: int,
                 att_proj_dim: int,
                 att_ff_dim: int):

        super(TK_Native_v1, self).__init__()

        n_kernels = len(kernels_mu)

        if len(kernels_mu) != len(kernels_sigma):
            raise Exception("len(kernels_mu) != len(kernels_sigma)")

        # static - kernel size & magnitude variables
        self.mu = Variable(torch.cuda.FloatTensor(kernels_mu), requires_grad=False).view(1, 1, 1, n_kernels)
        self.sigma = Variable(torch.cuda.FloatTensor(kernels_sigma), requires_grad=False).view(1, 1, 1, n_kernels)
        self.nn_scaler = nn.Parameter(torch.full([1], 0.01, dtype=torch.float32, requires_grad=True))
        self.mixer = nn.Parameter(torch.full([1,1,1], 0.5, dtype=torch.float32, requires_grad=True))

        encoder_layer = nn.TransformerEncoderLayer(_embsize, att_heads, dim_feedforward=att_ff_dim, dropout=0)
        self.contextualizer = nn.TransformerEncoder(encoder_layer, att_layer, norm=None)

        # this does not really do "attention" - just a plain cosine matrix calculation (without learnable weights) 
        self.cosine_module = CosineMatrixAttention()

        # bias is set to True in original code (we found it to not help, how could it?)
        self.dense = nn.Linear(n_kernels, 1, bias=False)
        self.dense_mean = nn.Linear(n_kernels, 1, bias=False)
        self.dense_comb = nn.Linear(2, 1, bias=False)

        # init with small weights, otherwise the dense output is way to high for the tanh -> resulting in loss == 1 all the time
        torch.nn.init.uniform_(self.dense.weight, -0.014, 0.014)  # inits taken from matchzoo
        torch.nn.init.uniform_(self.dense_mean.weight, -0.014, 0.014)  # inits taken from matchzoo

        # init with small weights, otherwise the dense output is way to high for the tanh -> resulting in loss == 1 all the time
        torch.nn.init.uniform_(self.dense.weight, -0.014, 0.014)  # inits taken from matchzoo
        #self.dense.bias.data.fill_(0.0)

Example #10

Source File: pytorch_transformer_wrapper.py From allennlp with Apache License 2.0

4 votes

def __init__(
        self,
        input_dim: int,
        num_layers: int,
        feedforward_hidden_dim: int = 2048,
        num_attention_heads: int = 8,
        positional_encoding: Optional[str] = None,
        positional_embedding_size: int = 512,
        dropout_prob: float = 0.1,
        activation: str = "relu",
    ) -> None:
        super().__init__()

        layer = nn.TransformerEncoderLayer(
            d_model=input_dim,
            nhead=num_attention_heads,
            dim_feedforward=feedforward_hidden_dim,
            dropout=dropout_prob,
            activation=activation,
        )
        self._transformer = nn.TransformerEncoder(layer, num_layers)
        self._input_dim = input_dim

        # initialize parameters
        # We do this before the embeddings are initialized so we get the default initialization for the embeddings.
        for p in self.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)

        if positional_encoding is None:
            self._sinusoidal_positional_encoding = False
            self._positional_embedding = None
        elif positional_encoding == "sinusoidal":
            self._sinusoidal_positional_encoding = True
            self._positional_embedding = None
        elif positional_encoding == "embedding":
            self._sinusoidal_positional_encoding = False
            self._positional_embedding = nn.Embedding(positional_embedding_size, input_dim)
        else:
            raise ValueError(
                "positional_encoding must be one of None, 'sinusoidal', or 'embedding'"
            )

Example #11

Source File: torch_transformer_encoder.py From summarus with Apache License 2.0

4 votes

def __init__(
        self,
        input_dim: int,
        num_layers: int,
        feedforward_hidden_dim: int = 2048,
        num_attention_heads: int = 8,
        positional_encoding: Optional[str] = None,
        positional_embedding_size: int = 512,
        dropout_prob: float = 0.1,
        activation: str = "relu",
    ) -> None:
        super().__init__()

        layer = nn.TransformerEncoderLayer(
            d_model=input_dim,
            nhead=num_attention_heads,
            dim_feedforward=feedforward_hidden_dim,
            dropout=dropout_prob,
            activation=activation,
        )
        self._transformer = nn.TransformerEncoder(layer, num_layers)
        self._input_dim = input_dim

        # initialize parameters
        # We do this before the embeddings are initialized so we get the default initialization for the embeddings.
        for p in self.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)

        if positional_encoding is None:
            self._sinusoidal_positional_encoding = False
            self._positional_embedding = None
        elif positional_encoding == "sinusoidal":
            self._sinusoidal_positional_encoding = True
            self._positional_embedding = None
        elif positional_encoding == "embedding":
            self._sinusoidal_positional_encoding = False
            self._positional_embedding = nn.Embedding(positional_embedding_size, input_dim)
        else:
            raise ValueError(
                "positional_encoding must be one of None, 'sinusoidal', or 'embedding'"
            )