python source code of convert

import numpy as np
from PIL import Image
import torch
import torchvision.transforms as transforms
from torch.autograd import Variable
from model import NetworkImageNet
from genotypes import PNASNet
from operations import *
from utils import preprocess_for_eval

import sys
import os
sys.path.append('../PNASNet.TF')
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

import tensorflow as tf
from pnasnet import build_pnasnet_large, pnasnet_large_arg_scope
slim = tf.contrib.slim


class ConvertPNASNet(object):

  def __init__(self):
    self.image = Image.open('data/cat.jpg')
    self.read_tf_weight()
    self.write_pytorch_weight()

  def read_tf_weight(self):
    self.weight_dict = {}
    image_ph = tf.placeholder(tf.uint8, (None, None, 3))
    image_proc = preprocess_for_eval(image_ph, 323, 323)
    with slim.arg_scope(pnasnet_large_arg_scope()):
      logits, end_points = build_pnasnet_large(
          tf.expand_dims(image_proc, 0), num_classes=1001, is_training=False)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    ckpt_restorer = tf.train.Saver()
    ckpt_restorer.restore(sess, '../PNASNet.TF/data/model.ckpt')

    weight_keys = [var.name[:-2] for var in tf.global_variables()]
    weight_vals = sess.run(tf.global_variables())
    for weight_key, weight_val in zip(weight_keys, weight_vals):
      self.weight_dict[weight_key] = weight_val

    self.tf_logits, self.tf_end_points, self.tf_image_proc = sess.run(
        [logits, end_points, image_proc], feed_dict={image_ph: self.image})

  def write_pytorch_weight(self):
    model = NetworkImageNet(216, 1001, 12, False, PNASNet)
    model.drop_path_prob = 0
    model.eval()

    self.used_keys = []
    self.convert_conv(model.conv0, 'conv0/weights')
    self.convert_bn(model.conv0_bn, 'conv0_bn/gamma', 'conv0_bn/beta',
        'conv0_bn/moving_mean', 'conv0_bn/moving_variance')
    self.convert_cell(model.stem1, 'cell_stem_0/')
    self.convert_cell(model.stem2, 'cell_stem_1/')

    for i in range(12):
      self.convert_cell(model.cells[i], 'cell_{}/'.format(i))
    
    self.convert_fc(model.classifier, 'final_layer/FC/weights',
        'final_layer/FC/biases')

    print('Conversion complete!')
    print('Check 1: whether all TF variables are used...')
    assert len(self.weight_dict) == len(self.used_keys)
    print('Pass!')

    model = model.cuda()
    image = self.tf_image_proc.transpose((2, 0, 1))
    image = Variable(self.Tensor(image)).cuda()
    logits, _ = model(image.unsqueeze(0))
    self.pytorch_logits = logits.data.cpu().numpy()

    print('Check 2: whether logits have small diff...')
    assert np.max(np.abs(self.tf_logits - self.pytorch_logits)) < 1e-5
    print('Pass!')

    model_path = 'data/PNASNet-5_Large.pth'
    torch.save(model.state_dict(), model_path)
    print('PyTorch model saved to {}'.format(model_path))

  def convert_cell(self, cell, name):
    # cell.preprocess0
    assert isinstance(cell.preprocess0, FactorizedReduce) or isinstance(cell.preprocess0, ReLUConvBN) or isinstance(cell.preprocess0, Identity)
    if isinstance(cell.preprocess0, FactorizedReduce):
      self.convert_conv(cell.preprocess0.conv_1, name + 'path1_conv/weights')
      self.convert_conv(cell.preprocess0.conv_2, name + 'path2_conv/weights')
      self.convert_bn(cell.preprocess0.bn, name + 'final_path_bn/gamma',
          name + 'final_path_bn/beta', name + 'final_path_bn/moving_mean',
          name + 'final_path_bn/moving_variance')
    else:
      if name + 'prev_1x1/weights' in self.weight_dict:
        self.convert_conv(cell.preprocess0.op[1], name + 'prev_1x1/weights')
        self.convert_bn(cell.preprocess0.op[2], name + 'prev_bn/gamma',
            name + 'prev_bn/beta', name + 'prev_bn/moving_mean',
            name + 'prev_bn/moving_variance')
      # else preprocess0 is Identity or = preprocess1; do nothing

    # cell.preprocess1
    assert isinstance(cell.preprocess1, ReLUConvBN)
    self.convert_conv(cell.preprocess1.op[1], name + '1x1/weights')
    self.convert_bn(cell.preprocess1.op[2], name + 'beginning_bn/gamma',
        name + 'beginning_bn/beta', name + 'beginning_bn/moving_mean',
        name + 'beginning_bn/moving_variance')

    # cell._ops
    for i in range(len(cell._ops)):
      side = 'left/' if i % 2 == 0 else 'right/'
      prefix = name + 'comb_iter_{}/'.format(i // 2) + side
      if isinstance(cell._ops[i], SepConv):
        suffix = '{0}x{0}'.format(cell._ops[i].op[1].kernel_size[0])
        
        self.convert_conv(cell._ops[i].op[1], 
            prefix + 'separable_' + suffix + '_1/depthwise_weights', sep=True)
        self.convert_conv(cell._ops[i].op[2],
            prefix + 'separable_' + suffix + '_1/pointwise_weights', sep=False)
        self.convert_bn(cell._ops[i].op[3],
            prefix + 'bn_sep_' + suffix + '_1/gamma',
            prefix + 'bn_sep_' + suffix + '_1/beta',
            prefix + 'bn_sep_' + suffix + '_1/moving_mean',
            prefix + 'bn_sep_' + suffix + '_1/moving_variance')
        self.convert_conv(cell._ops[i].op[5],
            prefix + 'separable_' + suffix + '_2/depthwise_weights', sep=True)
        self.convert_conv(cell._ops[i].op[6],
            prefix + 'separable_' + suffix + '_2/pointwise_weights', sep=False)
        self.convert_bn(cell._ops[i].op[7],
            prefix + 'bn_sep_' + suffix + '_2/gamma',
            prefix + 'bn_sep_' + suffix + '_2/beta',
            prefix + 'bn_sep_' + suffix + '_2/moving_mean',
            prefix + 'bn_sep_' + suffix + '_2/moving_variance')
      elif isinstance(cell._ops[i], ReLUConvBN):
        # skip_connect with stride > 1
        self.convert_conv(cell._ops[i].op[1], prefix + '1x1/weights')
        self.convert_bn(cell._ops[i].op[2],
            prefix + 'bn_1/gamma', prefix + 'bn_1/beta',
            prefix + 'bn_1/moving_mean', prefix + 'bn_1/moving_variance')
      elif isinstance(cell._ops[i], nn.Sequential):
        # max_pool or avg_pool with C_in != C_out
        self.convert_conv(cell._ops[i][1], prefix + '1x1/weights')
        self.convert_bn(cell._ops[i][2], 
            prefix + 'bn_1/gamma', prefix + 'bn_1/beta',
            prefix + 'bn_1/moving_mean', prefix + 'bn_1/moving_variance')

  def convert_conv(self, conv2d, weights_key, sep=False):
    weights = self.weight_dict[weights_key]
    if sep:
      # TF: [filter_height, filter_width, in_channels, channel_multiplier]
      # TF: [1, 1, channel_multiplier * in_channels, channel_multiplier]
      # PyTorch: [out_channels, in_channels // groups, *kernel_size]
      weights = np.transpose(weights, (2, 3, 0, 1))
    else:
      # TF: [filter_height, filter_width, in_channels, out_channels]
      # PyTorch: [out_channels, in_channels, *kernel_size]
      weights = np.transpose(weights, (3, 2, 0, 1))
    assert conv2d.weight.shape == self.Param(weights).shape, '{0} vs {1}'.format(conv2d.weight.shape, self.Param(weights).shape)
    conv2d.weight = self.Param(weights)
    self.used_keys += [weights_key]

  def convert_bn(self, bn, gamma_key, beta_key, moving_mean_key, moving_var_key):
    gamma = self.weight_dict[gamma_key]
    beta = self.weight_dict[beta_key]
    moving_mean = self.weight_dict[moving_mean_key]
    moving_var = self.weight_dict[moving_var_key]
    assert bn.weight.shape == self.Param(gamma).shape
    assert bn.bias.shape == self.Param(beta).shape
    assert bn.running_mean.shape == self.Tensor(moving_mean).shape
    assert bn.running_var.shape == self.Tensor(moving_var).shape
    bn.weight = self.Param(gamma)
    bn.bias = self.Param(beta)
    bn.running_mean = self.Tensor(moving_mean)
    bn.running_var = self.Tensor(moving_var)
    self.used_keys += [gamma_key, beta_key, moving_mean_key, moving_var_key]

  def convert_fc(self, fc, weights_key, biases_key):
    weights = self.weight_dict[weights_key]
    biases = self.weight_dict[biases_key]
    weights = np.transpose(weights)
    assert fc.weight.shape == self.Param(weights).shape
    assert fc.bias.shape == self.Param(biases).shape
    fc.weight = self.Param(weights)
    fc.bias = self.Param(biases)
    self.used_keys += [weights_key, biases_key]

  def Param(self, x):
    return torch.nn.Parameter(torch.from_numpy(x))

  def Tensor(self, x):
    return torch.from_numpy(x)


if __name__ == '__main__':
  ConvertPNASNet()