# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), # to deal in the Software without restriction, including without limitation # the rights to use, copy, modify, merge, publish, distribute, sublicense, # and/or sell copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. from __future__ import print_function from __future__ import division import torch import torch.nn as nn import torch.nn.functional as TorchF ################################# class Ecoder(nn.Module): def __init__(self, N_CHANNELS, N_KERNELS, \ BATCH_SIZE, IMG_DIM, VERBOSE=False, pred_cam=False): super(Ecoder, self).__init__() block1 = self.convblock(N_CHANNELS, 64, N_KERNELS, stride=2, pad=2) block2 = self.convblock(64, 128, N_KERNELS, stride=2, pad=2) block3 = self.convblock(128, 256, N_KERNELS, stride=2, pad=2) linear1 = self.linearblock(16384, 1024) linear2 = self.linearblock(1024, 1024) self.linear3 = nn.Linear(1024, 1024) self.pred_cam = pred_cam if self.pred_cam: self.pred_cam_linear_1 = nn.Linear(1024, 128) self.pred_cam_linear_2 = nn.Linear(128, 9 + 3) linear4 = self.linearblock(1024, 1024) linear5 = self.linearblock(1024, 2048) self.linear6 = nn.Linear(2048, 1926) linear42 = self.linearblock(1024, 1024) linear52 = self.linearblock(1024, 2048) self.linear62 = nn.Linear(2048, 1926) ################################################# all_blocks = block1 + block2 + block3 self.encoder1 = nn.Sequential(*all_blocks) all_blocks = linear1 + linear2 self.encoder2 = nn.Sequential(*all_blocks) all_blocks = linear4 + linear5 self.decoder = nn.Sequential(*all_blocks) all_blocks = linear42 + linear52 self.decoder2 = nn.Sequential(*all_blocks) # Initialize with Xavier Glorot for m in self.modules(): if isinstance(m, nn.ConvTranspose2d) \ or isinstance(m, nn.Linear) \ or isinstance(object, nn.Conv2d): nn.init.xavier_uniform_(m.weight) nn.init.normal_(m.weight, mean=0, std=0.001) # Free some memory del all_blocks, block1, block2, block3, \ linear1, linear2, linear4, linear5, \ linear42, linear52 # Print summary if VERBOSE: self.summary(BATCH_SIZE, N_CHANNELS, IMG_DIM) def convblock(self, indim, outdim, ker, stride, pad): block2 = [ nn.Conv2d(indim, outdim, ker, stride, pad), nn.BatchNorm2d(outdim), nn.ReLU() ] return block2 def linearblock(self, indim, outdim): block2 = [ nn.Linear(indim, outdim), nn.BatchNorm1d(outdim), nn.ReLU() ] return block2 def forward(self, x): for layer in self.encoder1: x = layer(x) bnum = x.shape[0] x = x.view(bnum, -1) # flatten the encoder1 output for layer in self.encoder2: x = layer(x) x = self.linear3(x) if self.pred_cam: cam_x = TorchF.relu(self.pred_cam_linear_1(x)) pred_cam = self.pred_cam_linear_2(cam_x) x1 = x for layer in self.decoder: x1 = layer(x1) x1 = self.linear6(x1) x2 = x for layer in self.decoder2: x2 = layer(x2) x2 = self.linear62(x2) if self.pred_cam: return x1, x2, pred_cam return x1, x2 def summary(self, BATCH_SIZE, N_CHANNELS, IMG_DIM): x = torch.zeros(BATCH_SIZE, N_CHANNELS, IMG_DIM, IMG_DIM) # Print the title in a good design # for easy recognition. print() summary_title = '| {} Summary |'.format(self.__class__.__name__) for _ in range(len(summary_title)): print('-', end='') print() print(summary_title) for _ in range(len(summary_title)): print('-', end='') print('\n') # Run forward pass while not tracking history on # tape using `torch.no_grad()` for printing the # output shape of each neural layer operation. print('Input: {}'.format(x.size())) with torch.no_grad(): for layer in self.encoder1: x = layer(x) print('Out: {} \tLayer: {}'.format(x.size(), layer)) bnum = x.shape[0] x = x.view(bnum, -1) # flatten the encoder1 output print('Out: {} \tlayer: {}'.format(x.size(), 'Reshape: Flatten')) for layer in self.encoder2: x = layer(x) print('Out: {} \tLayer: {}'.format(x.size(), layer)) x = self.linear3(x) print('Out: {} \tLayer: {}'.format(x.size(), self.linear3)) for layer in self.decoder: x = layer(x) print('Out: {} \tLayer: {}'.format(x.size(), layer)) x = self.linear6(x) print('Out: {} \tLayer: {}'.format(x.size(), self.linear6)) if __name__ == '__main__': ################################### BATCH_SIZE = 64 IMG_DIM = 64 N_CHANNELS = 4 N_KERNELS = 5 VERBOSE = True model = Ecoder(N_CHANNELS, N_KERNELS, BATCH_SIZE, IMG_DIM, VERBOSE)