python source code of validate_submission

"""Helper library which performs validation of the submission."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import csv
import json
import logging
import os
import re
import subprocess

import numpy as np
from PIL import Image

from six import iteritems
from six import PY3


EXTRACT_COMMAND = {
    '.zip': ['unzip', '${src}', '-d', '${dst}'],
    '.tar': ['tar', 'xvf', '${src}', '-C', '${dst}'],
    '.tar.gz': ['tar', 'xvzf', '${src}', '-C', '${dst}'],
}

ALLOWED_SUBMISSION_TYPES = ['attack', 'targeted_attack', 'defense']

REQUIRED_METADATA_JSON_FIELDS = ['entry_point', 'container',
                                 'container_gpu', 'type']

CMD_VARIABLE_RE = re.compile('^\\$\\{(\\w+)\\}$')

BATCH_SIZE = 100
IMAGE_NAME_PATTERN = 'IMG{0:04}.png'

ALLOWED_EPS = [4, 8, 12, 16]

MAX_SUBMISSION_SIZE_ZIPPED = 8*1024*1024*1024      #  8 GiB
MAX_SUBMISSION_SIZE_UNPACKED = 16*1024*1024*1024   # 16 GiB
MAX_DOCKER_IMAGE_SIZE = 8*1024*1024*1024           #  8 GiB


def get_extract_command_template(filename):
  """Returns extraction command based on the filename extension."""
  for k, v in iteritems(EXTRACT_COMMAND):
    if filename.endswith(k):
      return v
  return None


def shell_call(command, **kwargs):
  """Calls shell command with parameter substitution.

  Args:
    command: command to run as a list of tokens
    **kwargs: dirctionary with substitutions

  Returns:
    whether command was successful, i.e. returned 0 status code

  Example of usage:
    shell_call(['cp', '${A}', '${B}'], A='src_file', B='dst_file')
  will call shell command:
    cp src_file dst_file
  """
  command = list(command)
  for i in range(len(command)):
    m = CMD_VARIABLE_RE.match(command[i])
    if m:
      var_id = m.group(1)
      if var_id in kwargs:
        command[i] = kwargs[var_id]
  return subprocess.call(command) == 0


def make_directory_writable(dirname):
  """Makes directory readable and writable by everybody.

  Args:
    dirname: name of the directory

  Returns:
    True if operation was successfull

  If you run something inside Docker container and it writes files, then
  these files will be written as root user with restricted permissions.
  So to be able to read/modify these files outside of Docker you have to change
  permissions to be world readable and writable.
  """
  retval = shell_call(['docker', 'run', '-v',
                       '{0}:/output_dir'.format(dirname),
                       'busybox:1.27.2',
                       'chmod', '-R', 'a+rwx', '/output_dir'])
  if not retval:
    logging.error('Failed to change permissions on directory: %s', dirname)
  return retval


def load_defense_output(filename):
  """Loads output of defense from given file."""
  result = {}
  with open(filename) as f:
    for row in csv.reader(f):
      try:
        image_filename = row[0]
        if not image_filename.endswith('.png'):
          image_filename += '.png'
        label = int(row[1])
      except (IndexError, ValueError):
        continue
      result[image_filename] = label
  return result


class SubmissionValidator(object):
  """Class which performs validation of the submission."""

  def __init__(self, temp_dir, use_gpu):
    """Initializes instance of SubmissionValidator.

    Args:
      temp_dir: temporary working directory
      use_gpu: whether to use GPU
    """
    self._temp_dir = temp_dir
    self._use_gpu = use_gpu
    self._tmp_extracted_dir = os.path.join(self._temp_dir, 'tmp_extracted')
    self._extracted_submission_dir = os.path.join(self._temp_dir, 'extracted')
    self._sample_input_dir = os.path.join(self._temp_dir, 'input')
    self._sample_output_dir = os.path.join(self._temp_dir, 'output')

  def _prepare_temp_dir(self):
    """Cleans up and prepare temporary directory."""
    shell_call(['rm', '-rf', os.path.join(self._temp_dir, '*')])
    # NOTE: we do not create self._extracted_submission_dir
    # this is intentional because self._tmp_extracted_dir or it's subdir
    # will be renames into self._extracted_submission_dir
    os.mkdir(self._tmp_extracted_dir)
    os.mkdir(self._sample_input_dir)
    os.mkdir(self._sample_output_dir)
    # make output dir world writable
    shell_call(['chmod', 'a+rwX', '-R', self._sample_output_dir])

  def _extract_submission(self, filename):
    """Extracts submission and moves it into self._extracted_submission_dir."""
    # verify filesize
    file_size = os.path.getsize(filename)
    if file_size > MAX_SUBMISSION_SIZE_ZIPPED:
      logging.error('Submission archive size %d is exceeding limit %d',
                    file_size, MAX_SUBMISSION_SIZE_ZIPPED)
      return False
    # determime archive type
    exctract_command_tmpl = get_extract_command_template(filename)
    if not exctract_command_tmpl:
      logging.error('Input file has to be zip, tar or tar.gz archive; however '
                    'found: %s', filename)
      return False
    # extract archive
    submission_dir = os.path.dirname(filename)
    submission_basename = os.path.basename(filename)
    logging.info('Extracting archive %s', filename)
    retval = shell_call(
        ['docker', 'run',
         '--network=none',
         '-v', '{0}:/input_dir'.format(submission_dir),
         '-v', '{0}:/output_dir'.format(self._tmp_extracted_dir),
         'busybox:1.27.2'] + exctract_command_tmpl,
        src=os.path.join('/input_dir', submission_basename),
        dst='/output_dir')
    if not retval:
      logging.error('Failed to extract submission from file %s', filename)
      return False
    if not make_directory_writable(self._tmp_extracted_dir):
      return False
    # find submission root
    root_dir = self._tmp_extracted_dir
    root_dir_content = [d for d in os.listdir(root_dir) if d != '__MACOSX']
    if (len(root_dir_content) == 1
        and os.path.isdir(os.path.join(root_dir, root_dir_content[0]))):
      logging.info('Looks like submission root is in subdirectory "%s" of '
                   'the archive', root_dir_content[0])
      root_dir = os.path.join(root_dir, root_dir_content[0])
    # Move files to self._extracted_submission_dir.
    # At this point self._extracted_submission_dir does not exist,
    # so following command will simply rename root_dir into
    # self._extracted_submission_dir
    if not shell_call(['mv', root_dir, self._extracted_submission_dir]):
      logging.error('Can''t move submission files from root directory')
      return False
    return True

  def _verify_submission_size(self):
    submission_size = 0
    for dirname, _, filenames in os.walk(self._extracted_submission_dir):
      for f in filenames:
        submission_size += os.path.getsize(os.path.join(dirname, f))
    logging.info('Unpacked submission size: %d', submission_size)
    if submission_size > MAX_SUBMISSION_SIZE_UNPACKED:
      logging.error('Submission size exceeding limit %d',
                    MAX_SUBMISSION_SIZE_UNPACKED)
    return submission_size <= MAX_SUBMISSION_SIZE_UNPACKED

  def _load_and_verify_metadata(self, submission_type):
    """Loads and verifies metadata.

    Args:
      submission_type: type of the submission

    Returns:
      dictionaty with metadata or None if metadata not found or invalid
    """
    metadata_filename = os.path.join(self._extracted_submission_dir,
                                     'metadata.json')
    if not os.path.isfile(metadata_filename):
      logging.error('metadata.json not found')
      return None
    try:
      with open(metadata_filename, 'r') as f:
        metadata = json.load(f)
    except IOError as e:
      logging.error('Failed to load metadata: %s', e)
      return None
    for field_name in REQUIRED_METADATA_JSON_FIELDS:
      if field_name not in metadata:
        logging.error('Field %s not found in metadata', field_name)
        return None
    # Verify submission type
    if submission_type != metadata['type']:
      logging.error('Invalid submission type in metadata, expected "%s", '
                    'actual "%s"', submission_type, metadata['type'])
      return None
    # Check submission entry point
    entry_point = metadata['entry_point']
    if not os.path.isfile(os.path.join(self._extracted_submission_dir,
                                       entry_point)):
      logging.error('Entry point not found: %s', entry_point)
      return None
    if not entry_point.endswith('.sh'):
      logging.warning('Entry point is not an .sh script. '
                      'This is not necessarily a problem, but if submission '
                      'won''t run double check entry point first: %s',
                      entry_point)
    # Metadata verified
    return metadata

  def _verify_docker_image_size(self, image_name):
    """Verifies size of Docker image.

    Args:
      image_name: name of the Docker image.

    Returns:
      True if image size is withing the limits, False otherwise.
    """
    shell_call(['docker', 'pull', image_name])
    try:
      image_size = subprocess.check_output(
          ['docker', 'inspect', '--format={{.Size}}', image_name]).strip()
      image_size = int(image_size) if PY3 else long(image_size)
    except (ValueError, subprocess.CalledProcessError) as e:
      logging.error('Failed to determine docker image size: %s', e)
      return False
    logging.info('Size of docker image %s is %d', image_name, image_size)
    if image_size > MAX_DOCKER_IMAGE_SIZE:
      logging.error('Image size exceeds limit %d', MAX_DOCKER_IMAGE_SIZE)
    return image_size <= MAX_DOCKER_IMAGE_SIZE

  def _prepare_sample_data(self, submission_type):
    """Prepares sample data for the submission.

    Args:
      submission_type: type of the submission.
    """
    # write images
    images = np.random.randint(0, 256,
                               size=[BATCH_SIZE, 299, 299, 3], dtype=np.uint8)
    for i in range(BATCH_SIZE):
      Image.fromarray(images[i, :, :, :]).save(
          os.path.join(self._sample_input_dir, IMAGE_NAME_PATTERN.format(i)))
    # write target class for targeted attacks
    if submission_type == 'targeted_attack':
      target_classes = np.random.randint(1, 1001, size=[BATCH_SIZE])
      target_class_filename = os.path.join(self._sample_input_dir,
                                           'target_class.csv')
      with open(target_class_filename, 'w') as f:
        for i in range(BATCH_SIZE):
          f.write((IMAGE_NAME_PATTERN + ',{1}\n').format(i, target_classes[i]))

  def _run_submission(self, metadata):
    """Runs submission inside Docker container.

    Args:
      metadata: dictionary with submission metadata

    Returns:
      True if status code of Docker command was success (i.e. zero),
      False otherwise.
    """
    if self._use_gpu:
      docker_binary = 'nvidia-docker'
      container_name = metadata['container_gpu']
    else:
      docker_binary = 'docker'
      container_name = metadata['container']
    if metadata['type'] == 'defense':
      cmd = [docker_binary, 'run',
             '--network=none',
             '-m=24g',
             '-v', '{0}:/input_images:ro'.format(self._sample_input_dir),
             '-v', '{0}:/output_data'.format(self._sample_output_dir),
             '-v', '{0}:/code'.format(self._extracted_submission_dir),
             '-w', '/code',
             container_name,
             './' + metadata['entry_point'],
             '/input_images',
             '/output_data/result.csv']
    else:
      epsilon = np.random.choice(ALLOWED_EPS)
      cmd = [docker_binary, 'run',
             '--network=none',
             '-m=24g',
             '-v', '{0}:/input_images:ro'.format(self._sample_input_dir),
             '-v', '{0}:/output_images'.format(self._sample_output_dir),
             '-v', '{0}:/code'.format(self._extracted_submission_dir),
             '-w', '/code',
             container_name,
             './' + metadata['entry_point'],
             '/input_images',
             '/output_images',
             str(epsilon)]
    logging.info('Command to run submission: %s', ' '.join(cmd))
    return shell_call(cmd)

  def _verify_output(self, submission_type):
    """Verifies correctness of the submission output.

    Args:
      submission_type: type of the submission

    Returns:
      True if output looks valid
    """
    result = True
    if submission_type == 'defense':
      try:
        image_classification = load_defense_output(
            os.path.join(self._sample_output_dir, 'result.csv'))
        expected_keys = [IMAGE_NAME_PATTERN.format(i)
                         for i in range(BATCH_SIZE)]
        if set(image_classification.keys()) != set(expected_keys):
          logging.error('Classification results are not saved for all images')
          result = False
      except IOError as e:
        logging.error('Failed to read defense output file: %s', e)
        result = False
    else:
      for i in range(BATCH_SIZE):
        image_filename = os.path.join(self._sample_output_dir,
                                      IMAGE_NAME_PATTERN.format(i))
        try:
          img = np.array(Image.open(image_filename).convert('RGB'))
          if list(img.shape) != [299, 299, 3]:
            logging.error('Invalid image size %s for image %s',
                          str(img.shape), image_filename)
            result = False
        except IOError as e:
          result = False
    return result

  def validate_submission(self, filename, submission_type):
    """Validates submission.

    Args:
      filename: submission filename
      submission_type: type of the submission,
        one of 'attack', 'targeted_attack' or 'defense'

    Returns:
      whether submission is valid
    """
    if submission_type not in ALLOWED_SUBMISSION_TYPES:
      logging.error('Invalid submission type: %s', submission_type)
      return False
    self._prepare_temp_dir()
    # Convert filename to be absolute path,
    # relative path might cause problems when monting directory in Docker
    filename = os.path.abspath(filename)
    # extract submission
    if not self._extract_submission(filename):
      return False
    # verify submission size
    if not self._verify_submission_size():
      return False
    # Load metadata
    metadata = self._load_and_verify_metadata(submission_type)
    if not metadata:
      return False
    # verify docker container size
    if not self._verify_docker_image_size(metadata['container_gpu']):
      return False
    # Try to run submission on sample data
    self._prepare_sample_data(submission_type)
    if not self._run_submission(metadata):
      logging.error('Failure while running submission')
      return False
    if not self._verify_output(submission_type):
      logging.warning('Some of the outputs of your submission are invalid or '
                      'missing. You submission still will be evaluation '
                      'but you might get lower score.')
    return True