Python torch.nn.Linear() Examples

The following are 30 code examples of torch.nn.Linear(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.nn , or try the search function .
Example #1
Source File: encoder.py    From hgraph2graph with MIT License 9 votes vote down vote up
def __init__(self, rnn_type, input_size, node_fdim, hidden_size, depth, dropout):
        super(MPNEncoder, self).__init__()
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.depth = depth
        self.W_o = nn.Sequential( 
                nn.Linear(node_fdim + hidden_size, hidden_size), 
                nn.ReLU(),
                nn.Dropout(dropout)
        )

        if rnn_type == 'GRU':
            self.rnn = GRU(input_size, hidden_size, depth) 
        elif rnn_type == 'LSTM':
            self.rnn = LSTM(input_size, hidden_size, depth) 
        else:
            raise ValueError('unsupported rnn cell type ' + rnn_type) 
Example #2
Source File: model.py    From cat-bbs with MIT License 8 votes vote down vote up
def __init__(self, block, layers, num_classes=1000):
        self.inplanes = 64
        super(MyResNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        # note the increasing dilation
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilation=1)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4)

        # these layers will not be used
        self.avgpool = nn.AvgPool2d(7)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_() 
Example #3
Source File: model2.py    From controllable-text-attribute-transfer with Apache License 2.0 6 votes vote down vote up
def __init__(self, input_size, hidden_size, correlation_func=1, do_similarity=False):
        super(AttentionScore, self).__init__()
        self.correlation_func = correlation_func
        self.hidden_size = hidden_size

        if correlation_func == 2 or correlation_func == 3:
            self.linear = nn.Linear(input_size, hidden_size, bias=False)
            if do_similarity:
                self.diagonal = Parameter(torch.ones(1, 1, 1) / (hidden_size ** 0.5), requires_grad=False)
            else:
                self.diagonal = Parameter(torch.ones(1, 1, hidden_size), requires_grad=True)

        if correlation_func == 4:
            self.linear = nn.Linear(input_size, input_size, bias=False)

        if correlation_func == 5:
            self.linear = nn.Linear(input_size, hidden_size, bias=False) 
Example #4
Source File: model2.py    From controllable-text-attribute-transfer with Apache License 2.0 6 votes vote down vote up
def __init__(self, input_size, hidden_size, correlation_func=1, do_similarity=False):
        super(AttentionScore, self).__init__()
        self.correlation_func = correlation_func
        self.hidden_size = hidden_size

        if correlation_func == 2 or correlation_func == 3:
            self.linear = nn.Linear(input_size, hidden_size, bias=False)
            if do_similarity:
                self.diagonal = Parameter(torch.ones(1, 1, 1) / (hidden_size ** 0.5), requires_grad=False)
            else:
                self.diagonal = Parameter(torch.ones(1, 1, hidden_size), requires_grad=True)

        if correlation_func == 4:
            self.linear = nn.Linear(input_size, input_size, bias=False)

        if correlation_func == 5:
            self.linear = nn.Linear(input_size, hidden_size, bias=False) 
Example #5
Source File: encoder.py    From hgraph2graph with MIT License 6 votes vote down vote up
def __init__(self, rnn_type, input_size, node_fdim, hidden_size, depth, dropout):
        super(MPNEncoder, self).__init__()
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.depth = depth
        self.W_o = nn.Sequential( 
                nn.Linear(node_fdim + hidden_size, hidden_size), 
                nn.ReLU(),
                nn.Dropout(dropout)
        )

        if rnn_type == 'GRU':
            self.rnn = GRU(input_size, hidden_size, depth) 
        elif rnn_type == 'LSTM':
            self.rnn = LSTM(input_size, hidden_size, depth) 
        else:
            raise ValueError('unsupported rnn cell type ' + rnn_type) 
Example #6
Source File: network.py    From Collaborative-Learning-for-Weakly-Supervised-Object-Detection with MIT License 6 votes vote down vote up
def _init_modules(self): 
    self._init_head_tail()

    # rpn
    self.rpn_net = nn.Conv2d(self._net_conv_channels, cfg.RPN_CHANNELS, [3, 3], padding=1)

    self.rpn_cls_score_net = nn.Conv2d(cfg.RPN_CHANNELS, self._num_anchors * 2, [1, 1])
    
    self.rpn_bbox_pred_net = nn.Conv2d(cfg.RPN_CHANNELS, self._num_anchors * 4, [1, 1])

    self.cls_score_net_fast = nn.Linear(self._fc7_channels, self._num_classes+1)
    self.bbox_pred_net_fast = nn.Linear(self._fc7_channels, (self._num_classes+1) * 4)


    self.cls_score_net = nn.Linear(self._fc7_channels, self._num_classes)  # between class
    self.bbox_pred_net = nn.Linear(self._fc7_channels, self._num_classes)  # between boxes

    self.init_weights() 
Example #7
Source File: model.py    From subword-qac with MIT License 6 votes vote down vote up
def __init__(self, config):
        super(LanguageModel, self).__init__()
        self.config = config
        self.ntoken = ntoken = config.ntoken
        self.ninp = ninp = config.ninp
        self.nhid = nhid = config.nhid
        self.nlayers = nlayers = config.nlayers

        self.encoder = nn.Embedding(ntoken, ninp)
        self.dropouti = nn.Dropout(config.dropouti) if config.dropouti > 0 else None
        self.lstm = LSTM([ninp] + [nhid] * nlayers, bias=False, layernorm=True,
                         dropoutr=config.dropoutr, dropouth=config.dropouth, dropouto=config.dropouto)
        self.projection = nn.Linear(nhid, ninp)
        self.decoder = nn.Linear(ninp, ntoken)
        self.decoder.weight = self.encoder.weight

        self.init_weights() 
Example #8
Source File: model2.py    From controllable-text-attribute-transfer with Apache License 2.0 6 votes vote down vote up
def __init__(self, input_size, hidden_size, correlation_func=1, do_similarity=False):
        super(AttentionScore, self).__init__()
        self.correlation_func = correlation_func
        self.hidden_size = hidden_size

        if correlation_func == 2 or correlation_func == 3:
            self.linear = nn.Linear(input_size, hidden_size, bias=False)
            if do_similarity:
                self.diagonal = Parameter(torch.ones(1, 1, 1) / (hidden_size ** 0.5), requires_grad=False)
            else:
                self.diagonal = Parameter(torch.ones(1, 1, hidden_size), requires_grad=True)

        if correlation_func == 4:
            self.linear = nn.Linear(input_size, input_size, bias=False)

        if correlation_func == 5:
            self.linear = nn.Linear(input_size, hidden_size, bias=False) 
Example #9
Source File: model.py    From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __init__(self):
        super(CW2_Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3)
        self.bnm1 = nn.BatchNorm2d(32, momentum=0.1)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.bnm2 = nn.BatchNorm2d(64, momentum=0.1)
        self.conv3 = nn.Conv2d(64, 128, 3)
        self.bnm3 = nn.BatchNorm2d(128, momentum=0.1)
        self.conv4 = nn.Conv2d(128, 128, 3)
        self.bnm4 = nn.BatchNorm2d(128, momentum=0.1)
        self.fc1 = nn.Linear(3200, 256)
        #self.dropout1 = nn.Dropout(p=0.35, inplace=False)
        self.bnm5 = nn.BatchNorm1d(256, momentum=0.1)
        self.fc2 = nn.Linear(256, 256)
        self.bnm6 = nn.BatchNorm1d(256, momentum=0.1)
        self.fc3 = nn.Linear(256, 10)
        #self.dropout2 = nn.Dropout(p=0.35, inplace=False)
        #self.dropout3 = nn.Dropout(p=0.35, inplace=False) 
Example #10
Source File: hgnn.py    From hgraph2graph with MIT License 5 votes vote down vote up
def __init__(self, args):
        super(HierVGNN, self).__init__()
        self.latent_size = args.latent_size
        self.encoder = HierMPNEncoder(args.vocab, args.atom_vocab, args.rnn_type, args.embed_size, args.hidden_size, args.depthT, args.depthG, args.dropout)
        self.decoder = HierMPNDecoder(args.vocab, args.atom_vocab, args.rnn_type, args.embed_size, args.hidden_size, args.hidden_size, args.diterT, args.diterG, args.dropout, attention=True)
        self.encoder.tie_embedding(self.decoder.hmpn)

        self.T_mean = nn.Linear(args.hidden_size, args.latent_size)
        self.T_var = nn.Linear(args.hidden_size, args.latent_size)
        self.G_mean = nn.Linear(args.hidden_size, args.latent_size)
        self.G_var = nn.Linear(args.hidden_size, args.latent_size)

        self.W_tree = nn.Sequential( nn.Linear(args.hidden_size + args.latent_size, args.hidden_size), nn.ReLU() )
        self.W_graph = nn.Sequential( nn.Linear(args.hidden_size + args.latent_size, args.hidden_size), nn.ReLU() ) 
Example #11
Source File: test_wrappers.py    From mmdetection with Apache License 2.0 5 votes vote down vote up
def test_linear():
    test_cases = OrderedDict([
        ('in_w', [10, 20]),
        ('in_h', [10, 20]),
        ('in_feature', [1, 3]),
        ('out_feature', [1, 3]),
    ])

    for in_h, in_w, in_feature, out_feature in product(
            *list(test_cases.values())):
        # wrapper op with 0-dim input
        x_empty = torch.randn(0, in_feature, requires_grad=True)
        torch.manual_seed(0)
        wrapper = Linear(in_feature, out_feature)
        wrapper_out = wrapper(x_empty)

        # torch op with 3-dim input as shape reference
        x_normal = torch.randn(3, in_feature)
        torch.manual_seed(0)
        ref = nn.Linear(in_feature, out_feature)
        ref_out = ref(x_normal)

        assert wrapper_out.shape[0] == 0
        assert wrapper_out.shape[1:] == ref_out.shape[1:]

        wrapper_out.sum().backward()
        assert wrapper.weight.grad is not None
        assert wrapper.weight.grad.shape == wrapper.weight.shape

        assert torch.equal(wrapper(x_normal), ref_out)

    # eval mode
    x_empty = torch.randn(0, in_feature)
    wrapper = Linear(in_feature, out_feature)
    wrapper.eval()
    wrapper(x_empty) 
Example #12
Source File: hgnn.py    From hgraph2graph with MIT License 5 votes vote down vote up
def __init__(self, args):
        super(HierCondVGNN, self).__init__(args)
        self.W_tree = nn.Sequential( nn.Linear(args.hidden_size + args.latent_size + args.cond_size, args.hidden_size), nn.ReLU() )
        self.W_graph = nn.Sequential( nn.Linear(args.hidden_size + args.latent_size + args.cond_size, args.hidden_size), nn.ReLU() )

        self.U_tree = nn.Sequential( nn.Linear(args.hidden_size + args.cond_size, args.hidden_size), nn.ReLU() )
        self.U_graph = nn.Sequential( nn.Linear(args.hidden_size + args.cond_size, args.hidden_size), nn.ReLU() ) 
Example #13
Source File: model.py    From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self):
        super(CW_Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3)
        self.conv2 = nn.Conv2d(32, 32, 3)
        self.conv3 = nn.Conv2d(32, 64, 3)
        self.conv4 = nn.Conv2d(64, 64, 3)
        self.fc1 = nn.Linear(1600, 200)
        self.fc2 = nn.Linear(200, 200)
        self.fc3 = nn.Linear(200, 2) # Change to 10, 2 is just dummy!!!! 
Example #14
Source File: encoder.py    From hgraph2graph with MIT License 5 votes vote down vote up
def __init__(self, vocab, avocab, rnn_type, embed_size, hidden_size, depthT, depthG, dropout):
        super(HierMPNEncoder, self).__init__()
        self.vocab = vocab
        self.hidden_size = hidden_size
        self.dropout = dropout
        self.atom_size = atom_size = avocab.size()
        self.bond_size = bond_size = len(MolGraph.BOND_LIST) + MolGraph.MAX_POS

        self.E_c = nn.Sequential(
                nn.Embedding(vocab.size()[0], embed_size),
                nn.Dropout(dropout)
        )
        self.E_i = nn.Sequential(
                nn.Embedding(vocab.size()[1], embed_size),
                nn.Dropout(dropout)
        )
        self.W_c = nn.Sequential( 
                nn.Linear(embed_size + hidden_size, hidden_size), 
                nn.ReLU(),
                nn.Dropout(dropout)
        )
        self.W_i = nn.Sequential( 
                nn.Linear(embed_size + hidden_size, hidden_size), 
                nn.ReLU(),
                nn.Dropout(dropout)
        )

        self.E_a = torch.eye(atom_size).cuda()
        self.E_b = torch.eye( len(MolGraph.BOND_LIST) ).cuda()
        self.E_apos = torch.eye( MolGraph.MAX_POS ).cuda()
        self.E_pos = torch.eye( MolGraph.MAX_POS ).cuda()

        self.W_root = nn.Sequential( 
                nn.Linear(hidden_size * 2, hidden_size), 
                nn.Tanh() #root activation is tanh
        )
        self.tree_encoder = MPNEncoder(rnn_type, hidden_size + MolGraph.MAX_POS, hidden_size, hidden_size, depthT, dropout)
        self.inter_encoder = MPNEncoder(rnn_type, hidden_size + MolGraph.MAX_POS, hidden_size, hidden_size, depthT, dropout)
        self.graph_encoder = MPNEncoder(rnn_type, atom_size + bond_size, atom_size, hidden_size, depthG, dropout) 
Example #15
Source File: rnn.py    From hgraph2graph with MIT License 5 votes vote down vote up
def __init__(self, input_size, hidden_size, depth):
        super(GRU, self).__init__()
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.depth = depth

        self.W_z = nn.Linear(input_size + hidden_size, hidden_size)
        self.W_r = nn.Linear(input_size, hidden_size, bias=False)
        self.U_r = nn.Linear(hidden_size, hidden_size)
        self.W_h = nn.Linear(input_size + hidden_size, hidden_size) 
Example #16
Source File: ssd_vgg.py    From mmdetection with Apache License 2.0 5 votes vote down vote up
def init_weights(self, pretrained=None):
        """Initialize the weights in backbone.

        Args:
            pretrained (str, optional): Path to pre-trained weights.
                Defaults to None.
        """
        if isinstance(pretrained, str):
            logger = get_root_logger()
            load_checkpoint(self, pretrained, strict=False, logger=logger)
        elif pretrained is None:
            for m in self.features.modules():
                if isinstance(m, nn.Conv2d):
                    kaiming_init(m)
                elif isinstance(m, nn.BatchNorm2d):
                    constant_init(m, 1)
                elif isinstance(m, nn.Linear):
                    normal_init(m, std=0.01)
        else:
            raise TypeError('pretrained must be a str or None')

        for m in self.extra.modules():
            if isinstance(m, nn.Conv2d):
                xavier_init(m, distribution='uniform')

        constant_init(self.l2_norm, self.l2_norm.scale) 
Example #17
Source File: double_bbox_head.py    From mmdetection with Apache License 2.0 5 votes vote down vote up
def _add_fc_branch(self):
        """Add the fc branch which consists of a sequential of fc layers."""
        branch_fcs = nn.ModuleList()
        for i in range(self.num_fcs):
            fc_in_channels = (
                self.in_channels *
                self.roi_feat_area if i == 0 else self.fc_out_channels)
            branch_fcs.append(nn.Linear(fc_in_channels, self.fc_out_channels))
        return branch_fcs 
Example #18
Source File: coarse_mask_head.py    From mmdetection with Apache License 2.0 5 votes vote down vote up
def init_weights(self):
        for m in self.fcs.modules():
            if isinstance(m, nn.Linear):
                xavier_init(m)
        constant_init(self.fc_logits, 0.001) 
Example #19
Source File: grid_head.py    From mmdetection with Apache License 2.0 5 votes vote down vote up
def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                # TODO: compare mode = "fan_in" or "fan_out"
                kaiming_init(m)
        for m in self.modules():
            if isinstance(m, nn.ConvTranspose2d):
                normal_init(m, std=0.001)
        nn.init.constant_(self.deconv2.bias, -np.log(0.99 / 0.01)) 
Example #20
Source File: hgnn.py    From hgraph2graph with MIT License 5 votes vote down vote up
def __init__(self, args):
        super(HierVAE, self).__init__()
        self.encoder = HierMPNEncoder(args.vocab, args.atom_vocab, args.rnn_type, args.embed_size, args.hidden_size, args.depthT, args.depthG, args.dropout)
        self.decoder = HierMPNDecoder(args.vocab, args.atom_vocab, args.rnn_type, args.embed_size, args.hidden_size, args.latent_size, args.diterT, args.diterG, args.dropout)
        self.encoder.tie_embedding(self.decoder.hmpn)
        self.latent_size = args.latent_size

        self.R_mean = nn.Linear(args.hidden_size, args.latent_size)
        self.R_var = nn.Linear(args.hidden_size, args.latent_size)

        #self.T_mean = nn.Linear(args.hidden_size, args.latent_size)
        #self.T_var = nn.Linear(args.hidden_size, args.latent_size)

        #self.G_mean = nn.Linear(args.hidden_size, args.latent_size)
        #self.G_var = nn.Linear(args.hidden_size, args.latent_size) 
Example #21
Source File: rnn.py    From hgraph2graph with MIT License 5 votes vote down vote up
def __init__(self, input_size, hidden_size, depth):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.depth = depth

        self.W_i = nn.Sequential( nn.Linear(input_size + hidden_size, hidden_size), nn.Sigmoid() )
        self.W_o = nn.Sequential( nn.Linear(input_size + hidden_size, hidden_size), nn.Sigmoid() )
        self.W_f = nn.Sequential( nn.Linear(input_size + hidden_size, hidden_size), nn.Sigmoid() )
        self.W = nn.Sequential( nn.Linear(input_size + hidden_size, hidden_size), nn.Tanh() ) 
Example #22
Source File: rnn.py    From hgraph2graph with MIT License 5 votes vote down vote up
def __init__(self, input_size, hidden_size, depth):
        super(GRU, self).__init__()
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.depth = depth

        self.W_z = nn.Linear(input_size + hidden_size, hidden_size)
        self.W_r = nn.Linear(input_size, hidden_size, bias=False)
        self.U_r = nn.Linear(hidden_size, hidden_size)
        self.W_h = nn.Linear(input_size + hidden_size, hidden_size) 
Example #23
Source File: sequence_encoder.py    From DDPAE-video-prediction with MIT License 5 votes vote down vote up
def __init__(self, input_size, hidden_size, output_size, num_layers=1):
    super(SequenceEncoder, self).__init__()

    self.encoder = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                           num_layers=num_layers, batch_first=True)
    self.out_layer = nn.Linear(hidden_size, output_size)

    self.input_size = input_size
    self.hidden_size = hidden_size 
Example #24
Source File: pose_rnn.py    From DDPAE-video-prediction with MIT License 5 votes vote down vote up
def __init__(self, n_components, n_frames_output, n_channels, image_size,
               image_latent_size, hidden_size, ngf, output_size, independent_components):
    super(PoseRNN, self).__init__()

    n_layers = int(np.log2(image_size)) - 1
    self.image_encoder = ImageEncoder(n_channels, image_latent_size, ngf, n_layers)
    # Encoder
    self.encode_rnn = nn.LSTM(image_latent_size + hidden_size, hidden_size,
                              num_layers=1, batch_first=True)
    if independent_components:
      predict_input_size = hidden_size
    else:
      predict_input_size = hidden_size * 2
    self.predict_rnn = nn.LSTM(predict_input_size, hidden_size, num_layers=1, batch_first=True)

    # Beta
    self.beta_mu_layer = nn.Linear(hidden_size, output_size)
    self.beta_sigma_layer = nn.Linear(hidden_size, output_size)

    # Initial pose
    self.initial_pose_rnn = nn.LSTM(hidden_size, hidden_size, num_layers=1, batch_first=True)
    self.initial_pose_mu = nn.Linear(hidden_size, output_size)
    self.initial_pose_sigma = nn.Linear(hidden_size, output_size)

    self.n_components = n_components
    self.n_frames_output = n_frames_output
    self.image_latent_size = image_latent_size
    self.hidden_size = hidden_size
    self.output_size = output_size
    self.independent_components = independent_components 
Example #25
Source File: gpt.py    From comet-commonsense with Apache License 2.0 5 votes vote down vote up
def __init__(self, model, cfg, trunc_and_reshape=True):
        super(LMHead, self).__init__()
        self.n_embd = cfg.hSize
        embed_shape = model.embed.weight.shape
        self.decoder = nn.Linear(embed_shape[1], embed_shape[0], bias=False)
        self.decoder.weight = model.embed.weight  # Tied weights
        self.trunc_and_reshape = trunc_and_reshape  # XD 
Example #26
Source File: model.py    From controllable-text-attribute-transfer with Apache License 2.0 5 votes vote down vote up
def __init__(self, latent_size, output_size):
        super().__init__()
        self.fc1 = nn.Linear(latent_size, 100)
        self.relu1 = nn.LeakyReLU(0.2, )
        self.fc2 = nn.Linear(100, 50)
        self.relu2 = nn.LeakyReLU(0.2)
        self.fc3 = nn.Linear(50, output_size)
        self.sigmoid = nn.Sigmoid() 
Example #27
Source File: network.py    From MomentumContrast.pytorch with MIT License 5 votes vote down vote up
def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.fc1 = nn.Linear(9216, 128) 
Example #28
Source File: model.py    From controllable-text-attribute-transfer with Apache License 2.0 5 votes vote down vote up
def __init__(self, encoder, decoder, src_embed, tgt_embed, generator, position_layer, model_size, latent_size):
        super(EncoderDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.src_embed = src_embed
        self.tgt_embed = tgt_embed
        self.generator = generator
        self.position_layer = position_layer
        self.model_size = model_size
        self.latent_size = latent_size
        self.sigmoid = nn.Sigmoid()

        # self.memory2latent = nn.Linear(self.model_size, self.latent_size)
        # self.latent2memory = nn.Linear(self.latent_size, self.model_size) 
Example #29
Source File: model.py    From controllable-text-attribute-transfer with Apache License 2.0 5 votes vote down vote up
def __init__(self, d_model, vocab):
        super(Generator, self).__init__()
        self.proj = nn.Linear(d_model, vocab) 
Example #30
Source File: rnn.py    From hgraph2graph with MIT License 5 votes vote down vote up
def __init__(self, input_size, hidden_size, depth):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.depth = depth

        self.W_i = nn.Sequential( nn.Linear(input_size + hidden_size, hidden_size), nn.Sigmoid() )
        self.W_o = nn.Sequential( nn.Linear(input_size + hidden_size, hidden_size), nn.Sigmoid() )
        self.W_f = nn.Sequential( nn.Linear(input_size + hidden_size, hidden_size), nn.Sigmoid() )
        self.W = nn.Sequential( nn.Linear(input_size + hidden_size, hidden_size), nn.Tanh() )