python source code of preprocessing

models-master
- Makefile
- requirements-test.txt
- third_party
  - licenses.txt
- LICENSE
- models
  - language_translation
    - __init__.py
    - tensorflow
      - mlperf_gnmt
        __init__.py
        fp32
        nmt_utils.py
        evaluation_utils.py
        rouge.py
        __init__.py
        misc_utils.py
        run_inference.py
        hparam.py
        bleu.py
      - transformer_mlperf
        training
        bfloat16
        run_and_time.sh
        run_preprocessing.sh
        transformer
        utils
        tokenizer.py
        metrics.py
        tokenizer_test.py
        __init__.py
        dataset.py
        compute_bleu_test.py
        compute_bleu.py
        __init__.py
        data_download.py
        README.md
        model
        embedding_layer.py
        transformer.py
        model_utils_test.py
        attention_layer.py
        __init__.py
        model_params.py
        ffn_layer.py
        model_utils.py
        beam_search.py
        beam_search_test.py
        translate.py
        transformer_main.py
        vocab
        process_data.py
        fp32
        run_and_time.sh
        run_preprocessing.sh
        transformer
        utils
        tokenizer.py
        metrics.py
        tokenizer_test.py
        __init__.py
        dataset.py
        compute_bleu_test.py
        compute_bleu.py
        __init__.py
        data_download.py
        README.md
        model
        embedding_layer.py
        transformer.py
        model_utils_test.py
        attention_layer.py
        __init__.py
        model_params.py
        ffn_layer.py
        model_utils.py
        beam_search.py
        beam_search_test.py
        translate.py
        transformer_main.py
        vocab
        process_data.py
      - __init__.py
      - transformer_lt_official
        inference
        fp32
        utils
        tokenizer.py
        metrics.py
        tokenizer_test.py
        __init__.py
        official
        utils
        hyperparams_flags.py
        __init__.py
        logs
        hooks_helper_test.py
        cloud_lib_test.py
        metric_hook_test.py
        metric_hook.py
        guidelines.md
        mlperf_helper.py
        hooks.py
        logger.py
        hooks_test.py
        logger_test.py
        __init__.py
        cloud_lib.py
        hooks_helper.py
        flags
        flags_test.py
        guidelines.md
        _misc.py
        __init__.py
        README.md
        _benchmark.py
        _performance.py
        core.py
        _base.py
        _conventions.py
        _device.py
        _distribution.py
        compute_bleu.py
        infer_ab.py
  - reinforcement
    - __init__.py
    - tensorflow
      - minigo
        training
        __init__.py
        requirements.txt
        fp32
        bazel-clean-large-scale.patch
        avoid-repeated-clone-singlenode.patch
        get-data.patch
        mlperf_split.patch
        bazel-clean-single-node.patch
        large-scale-no-bg.patch
        minigo_mlperf.patch
        avoid-repeated-clone-multinode.patch
        tune_for_many_core.patch
        __init__.py
      - __init__.py
  - recommendation
    - __init__.py
    - tensorflow
      - wide_deep
        __init__.py
        inference
        __init__.py
        fp32
        wide_deep_inference.py
        __init__.py
      - wide_deep_large_ds
        training
        train.py
        __init__.py
        dataset
        featurecolumn_graph_optimization.py
        preprocess_csv_tfrecords.py
        __init__.py
        inference
        parallel_inference.py
        __init__.py
        inference.py
      - __init__.py
      - ncf
        training
        neumf_model.py
        ncf_common.py
        __init__.py
        ncf_estimator_main.py
  - common
    - __init__.py
    - tensorflow
      - mlperf_compliance
        tf_mlperf_log.py
        _transformer_tags.py
        _maskrcnn_tags.py
        mlperf_log.py
        _gnmt_tags.py
        _ncf_tags.py
        _resnet_tags.py
        resnet_log_helper.py
        tags.py
        __init__.py
        _ssd_tags.py
        test_tag_set.py
      - mlperf_utils
        misc
        __init__.py
        model_helpers.py
        export
        export.py
        __init__.py
        arg_parsers
        __init__.py
        parsers.py
        __init__.py
        logs
        metric_hook.py
        benchmark_uploader.py
        hooks.py
        logger.py
        __init__.py
        hooks_helper.py
      - __init__.py
  - image_recognition
    - __init__.py
    - tensorflow
      - inceptionv4
        inference
        cnn_util.py
        preprocessing.py
        accuracy.py
        datasets.py
        __init__.py
        benchmark.py
      - inceptionv3
        fp32
        eval_image_classifier_inference.py
        preprocessing.py
        datasets.py
        __init__.py
        int8
        cnn_util.py
        preprocessing.py
        accuracy.py
        datasets.py
        __init__.py
        preprocessing_benchmark.py
        calibration.py
        benchmark.py
      - resnet101
        __init__.py
        inference
        vgg_preprocessing.py
        eval_image_classifier_inference.py
        preprocessing.py
        datasets.py
        __init__.py
        int8
        vgg_preprocessing.py
        cnn_util.py
        preprocessing.py
        datasets.py
        __init__.py
        calibration.py
      - resnet50v1_5
        training
        mlperf_compliance
        tf_mlperf_log.py
        _transformer_tags.py
        _maskrcnn_tags.py
        mlperf_log.py
        _gnmt_tags.py
        _ncf_tags.py
        _resnet_tags.py
        resnet_log_helper.py
        tags.py
        __init__.py
        _ssd_tags.py
        test_tag_set.py
        mlperf_utils
        misc
        __init__.py
        model_helpers.py
        export
        export.py
        __init__.py
        arg_parsers
        __init__.py
        parsers.py
        __init__.py
        logs
        metric_hook.py
        benchmark_uploader.py
        hooks.py
        logger.py
        __init__.py
        hooks_helper.py
        __init__.py
        mlperf_resnet
        resnet_run_loop.py
        resnet_model.py
        __init__.py
        imagenet_main.py
        imagenet_preprocessing.py
        __init__.py
        inference
        eval_image_classifier_inference.py
        preprocessing.py
        datasets.py
        __init__.py
        int8
        generate_calibration_data.py
        cnn_util.py
        preprocessing.py
        datasets.py
        __init__.py
        preprocessing_benchmark.py
        benchmark.py
      - resnet50
        __init__.py
        inference
        eval_image_classifier_inference.py
        preprocessing.py
        datasets.py
        __init__.py
        int8
        generate_calibration_data.py
        cnn_util.py
        preprocessing.py
        datasets.py
        __init__.py
        preprocessing_benchmark.py
        benchmark.py
      - mobilenet_v1
        inference
        fp32
        cnn_util.py
        accuracy.py
        accuracy_preprocessing.py
        eval_image_classifier.py
        benchmark.py
        accuracy_datasets.py
        int8
        cnn_util.py
        preprocessing.py
        accuracy.py
        datasets.py
        __init__.py
        calibration.py
        benchmark.py
      - __init__.py
      - densenet169
        inference
        fp32
        image_preprocessing.py
        cnn_util.py
        accuracy.py
        densenet_preprocessing.py
        dataset.py
        benchmark.py
  - __init__.py
  - object_detection
    - __init__.py
    - tensorflow
      - ssd-mobilenet
        __init__.py
        inference
        coco_label_map.py
        coco_detection_evaluator.py
        __init__.py
        ssdmobilenet_preprocess.pb
        fp32
        infer_detections.py
        __init__.py
        coco_tools.py
        int8
        infer_detections.py
        __init__.py
      - __init__.py
      - ssd-resnet34
        training
        bfloat16
        benchmark-tf-2.0.diff
        __init__.py
        benchmark-bfloat16.diff
        __init__.py
        fp32
        benchmark-tf-2.0.diff
        __init__.py
        __init__.py
        inference
        tensorflow_models_tf2.0.patch
        __init__.py
        fp32
        infer_detections.py
        __init__.py
        coco_constants.py
        tensorflow_benchmarks_tf2.0.patch
        int8
        infer_detections.py
        __init__.py
        coco_constants.py
      - rfcn
        __init__.py
        inference
        tf-2.0.patch
        __init__.py
        fp32
        evaluator.py
        eval.py
        run_rfcn_inference.py
        dataset_util.py
        __init__.py
        eval_util.py
        coco_mAP.sh
        int8
        run_rfcn_inference.py
        __init__.py
        coco_mAP.sh
  - language_modeling
    - tensorflow
      - bert_large
        training
        bfloat16
        run_pretraining.py
        modeling_test.py
        run_pretraining_ckppoint.sh
        run_classifier_with_tfhub.py
        modeling.py
        sample_text.txt
        optimization_test.py
        create_pretraining_data.py
        optimization.py
        run_squad.py
        CONTRIBUTING.md
        run_classifier.py
        multilingual.md
        predicting_movie_reviews_with_bert_on_tf_hub.ipynb
        __init__.py
        README.md
        create_pretraining_data.sh
        tokenization.py
        requirements.txt
        tokenization_test.py
        generic_ops.py
        extract_features.py
        __init__.py
        fp32
        run_pretraining.py
        run_classifier.sh
        run_pretraining.sh
        modeling_test.py
        run_pretraining_ckppoint.sh
        run_classifier_with_tfhub.py
        modeling.py
        sample_text.txt
        optimization_test.py
        LICENSE
        create_pretraining_data.py
        optimization.py
        run_squad.py
        CONTRIBUTING.md
        run_classifier.py
        multilingual.md
        predicting_movie_reviews_with_bert_on_tf_hub.ipynb
        __init__.py
        README.md
        create_pretraining_data.sh
        tokenization.py
        requirements.txt
        tokenization_test.py
        run_squad_large.sh
        generic_ops.py
        extract_features.py
        __init__.py
        inference
        run_pretraining.py
        modeling_test.py
        evaluate-v1.1.py
        run_pretraining_ckppoint.sh
        run_classifier_with_tfhub.py
        modeling.py
        sample_text.txt
        optimization_test.py
        create_pretraining_data.py
        optimization.py
        run_squad.py
        CONTRIBUTING.md
        run_classifier.py
        multilingual.md
        predicting_movie_reviews_with_bert_on_tf_hub.ipynb
        __init__.py
        README.md
        create_pretraining_data.sh
        tokenization.py
        requirements.txt
        tokenization_test.py
        generic_ops.py
        extract_features.py
      - __init__.py
- benchmarks
  - language_translation
    - __init__.py
    - tensorflow
      - mlperf_gnmt
        __init__.py
        README.md
        requirements.txt
        inference
        __init__.py
        fp32
        config.json
        model_init.py
        __init__.py
      - transformer_mlperf
        training
        bfloat16
        config.json
        model_init.py
        __init__.py
        __init__.py
        fp32
        config.json
        model_init.py
        __init__.py
        README.md
      - __init__.py
      - transformer_lt_official
        __init__.py
        README.md
        requirements.txt
        inference
        __init__.py
        fp32
        config.json
        model_init.py
        __init__.py
  - reinforcement
    - __init__.py
    - tensorflow
      - minigo
        training
        __init__.py
        fp32
        model_init.py
        __init__.py
        __init__.py
        README.md
        requirements.txt
      - __init__.py
  - recommendation
    - __init__.py
    - tensorflow
      - wide_deep
        __init__.py
        README.md
        inference
        __init__.py
        fp32
        model_init.py
        __init__.py
        data_download.py
      - wide_deep_large_ds
        training
        __init__.py
        fp32
        model_init.py
        __init__.py
        readme.txt
        __init__.py
        README.md
        inference
        readme.txt
        __init__.py
        fp32
        config.json
        model_init.py
        __init__.py
        int8
        config.json
        model_init.py
        __init__.py
      - __init__.py
      - ncf
        training
        bfloat16
        config.json
        model_init.py
        __init__.py
        __init__.py
        fp32
        config.json
        model_init.py
        __init__.py
  - common
    - utils
      - __init__.py
      - validators.py
      - multi_instance.py
    - base_benchmark_util.py
    - base_model_init.py
    - __init__.py
    - platform_util.py
    - tensorflow
      - run_tf_benchmark.py
      - __init__.py
      - start.sh
  - image_recognition
    - __init__.py
    - tensorflow
      - inceptionv4
        __init__.py
        README.md
        inference
        config.json
        inceptionv4_model_init.py
        __init__.py
        fp32
        model_init.py
        __init__.py
        int8
        model_init.py
        __init__.py
      - inceptionv3
        __init__.py
        README.md
        inference
        __init__.py
        fp32
        config.json
        model_init.py
        __init__.py
        int8
        config.json
        model_init.py
        __init__.py
      - resnet101
        __init__.py
        README.md
        inference
        __init__.py
        fp32
        config.json
        model_init.py
        __init__.py
        int8
        config.json
        model_init.py
        __init__.py
      - resnet50v1_5
        training
        bfloat16
        model_init.py
        __init__.py
        __init__.py
        common_resnet50
        config.json
        __init__.py
        resnet50_model_init.py
        fp32
        model_init.py
        __init__.py
        __init__.py
        README.md
        inference
        bfloat16
        config.json
        model_init.py
        __init__.py
        __init__.py
        fp32
        config.json
        model_init.py
        __init__.py
        int8
        config.json
        model_init.py
        __init__.py
      - resnet50
        __init__.py
        README.md
        inference
        __init__.py
        fp32
        config.json
        model_init.py
        __init__.py
        int8
        config.json
        model_init.py
        __init__.py
      - mobilenet_v1
        __init__.py
        README.md
        inference
        __init__.py
        fp32
        config.json
        model_init.py
        __init__.py
        int8
        config.json
        model_init.py
        __init__.py
      - __init__.py
      - densenet169
        __init__.py
        README.md
        inference
        __init__.py
        fp32
        config.json
        model_init.py
        __init__.py
  - __init__.py
  - README.md
  - object_detection
    - __init__.py
    - tensorflow
      - ssd-mobilenet
        __init__.py
        README.md
        requirements.txt
        inference
        __init__.py
        fp32
        config.json
        model_init.py
        __init__.py
        int8
        config.json
        model_init.py
        __init__.py
      - __init__.py
      - ssd-resnet34
        training
        bfloat16
        config.json
        model_init.py
        __init__.py
        __init__.py
        fp32
        config.json
        model_init.py
        __init__.py
        __init__.py
        README.md
        requirements.txt
        inference
        __init__.py
        fp32
        config.json
        model_init.py
        __init__.py
        int8
        config.json
        model_init.py
        __init__.py
      - rfcn
        __init__.py
        README.md
        requirements.txt
        inference
        __init__.py
        fp32
        config.json
        model_init.py
        __init__.py
        int8
        config.json
        model_init.py
        __init__.py
  - launch_benchmark.py
  - language_modeling
    - tensorflow
      - bert_large
        training
        bfloat16
        config.json
        model_init.py
        __init__.py
        fp32
        config.json
        model_init.py
        __init__.py
        __init__.py
        README.md
        inference
        bfloat16
        config.json
        model_init.py
        __init__.py
        __init__.py
        fp32
        config.json
        model_init.py
        __init__.py
- __init__.py
- Contribute.md
- README.md
- tests
  - test_utils
    - platform_config.py
    - io.py
    - __init__.py
  - __init__.py
  - README.md
  - unit
    - test_launch_benchmark.py
    - common
      - utils
        test_validators.py
        files
        sorted_membind_info.json
        inference_prefix_exception.json
        membind_info.json
        inference_prefix_ht.json
        inference_prefix_no_ht.json
        test_multi_instance.py
      - test_platform_util.py
      - test_base_model_init.py
      - __init__.py
      - tensorflow
        test_run_tf_benchmarks.py
        tf_model_args
        tf_resnet50_args.json
        tf_resnet50v1_5_args.json
        tf_mobilenet_v1_args.json
        tf_inceptionv3_args.json
        tf_densenet169_args.json
        tf_rfcn_args.json
        tf_gnmt_args.json
        tf_ssd_resnet34_args.json
        tf_resnet101_args.json
        tf_wide_deep_large_ds_args.json
        tf_bert_args.json
        tf_ssd_mobilenet_args.json
        tf_inceptionv4_args.json
        tf_wide_deep_args.json
        __init__.py
    - __init__.py
  - conftest.py
- CODEOWNERS
- .gitignore
- Jenkinsfile
- docs
  - language_translation
    - tensorflow
      - Tutorial.md
  - recommendation
    - tensorflow
      - Tutorial.md
  - image_recognition
    - quantization
      - Tutorial.md
    - tensorflow
      - Tutorial.md
  - README.md
  - general
    - tensorflow
      - LaunchBenchmark.md
      - BuildContainer.md
      - GeneralBestPractices.md
  - language_modeling
    - tensorflow
      - Tutorial.md
- tox.ini

#
# -*- coding: utf-8 -*-
#
# Copyright (c) 2019 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

#

# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
from tensorflow.python.data.experimental import parallel_interleave
from tensorflow.python.data.experimental import map_and_batch
from tensorflow.python.platform import gfile


def parse_example_proto(example_serialized):
  """Parses an Example proto containing a training example of an image.
  """
  # Dense features in Example proto.
  feature_map = {
    'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string,
                                        default_value=''),
    'image/class/label': tf.io.FixedLenFeature([1], dtype=tf.int64,
                                            default_value=-1),
    'image/filename': tf.io.FixedLenFeature([], dtype=tf.string,
                                         default_value="")
  }
  sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32)
  # Sparse features in Example proto.
  feature_map.update(
    {k: sparse_float32 for k in ['image/object/bbox/xmin',
                                 'image/object/bbox/ymin',
                                 'image/object/bbox/xmax',
                                 'image/object/bbox/ymax']})

  features = tf.io.parse_single_example(serialized=example_serialized, features=feature_map)
  label = tf.cast(features['image/class/label'], dtype=tf.int32)
  filename = tf.cast(features['image/filename'], dtype=tf.string)

  return features['image/encoded'], label, filename


def eval_image(image, height, width, resize_method,
               central_fraction=0.875, scope=None):

  with tf.compat.v1.name_scope('eval_image'):
    if resize_method == 'crop':
      shape = tf.shape(input=image)
      image = tf.cond(pred=tf.less(shape[0], shape[1]),
                      true_fn=lambda: tf.image.resize(image,
                                                     tf.convert_to_tensor(value=[256, 256 * shape[1] / shape[0]],
                                                                          dtype=tf.int32)),
                      false_fn=lambda: tf.image.resize(image,
                                                     tf.convert_to_tensor(value=[256 * shape[0] / shape[1], 256],
                                                                          dtype=tf.int32)))

      shape = tf.shape(input=image)
      y0 = (shape[0] - height) // 2
      x0 = (shape[1] - width) // 2
      distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, width)
      distorted_image.set_shape([height, width, 3])
      means = tf.broadcast_to([123.68, 116.78, 103.94], tf.shape(input=distorted_image))
      return distorted_image - means
    else:  # bilinear
      if image.dtype != tf.float32:
        image = tf.image.convert_image_dtype(image, dtype=tf.float32)
      # Crop the central region of the image with an area containing 87.5% of
      # the original image.
      if central_fraction:
        image = tf.image.central_crop(image, central_fraction=central_fraction)

      if height and width:
        # Resize the image to the specified height and width.
        image = tf.expand_dims(image, 0)
        image = tf.image.resize(image, [height, width],
                                         method=tf.image.ResizeMethod.BILINEAR)
        image = tf.squeeze(image, [0])
      image = tf.subtract(image, 0.5)
      image = tf.multiply(image, 2.0)
      return image

class RecordInputImagePreprocessor(object):
  """Preprocessor for images with RecordInput format."""

  def __init__(self,
               height,
               width,
               batch_size,
               num_cores,
               resize_method="bilinear"):

    self.height = height
    self.width = width
    self.batch_size = batch_size
    self.num_cores = num_cores
    self.resize_method = resize_method

  def parse_and_preprocess(self, value):
    # parse
    image_buffer, label_index, filename = parse_example_proto(value)
    # preprocess
    image = tf.image.decode_jpeg(
      image_buffer, channels=3, fancy_upscaling=False, dct_method='INTEGER_FAST')
    image = eval_image(image, self.height, self.width, self.resize_method)
    return (image, label_index, filename)

  def minibatch(self, dataset, subset, cache_data=False):

    with tf.compat.v1.name_scope('batch_processing'):

      glob_pattern = dataset.tf_record_pattern(subset)
      file_names = gfile.Glob(glob_pattern)
      if not file_names:
        raise ValueError('Found no files in --data_dir matching: {}'
                         .format(glob_pattern))
      ds = tf.data.TFRecordDataset.list_files(file_names)

      ds = ds.apply(
        parallel_interleave(
          tf.data.TFRecordDataset, cycle_length=self.num_cores, block_length=5,
          sloppy=True,
          buffer_output_elements=10000, prefetch_input_elements=10000))

      if cache_data:
        ds = ds.take(1).cache().repeat()

      ds = ds.prefetch(buffer_size=10000)
      #ds = ds.prefetch(buffer_size=self.batch_size)

      # num of parallel batches not greater than 56
      max_num_parallel_batches = min(56, 2 * self.num_cores)
      ds = ds.apply(
        map_and_batch(
          map_func=self.parse_and_preprocess,
          batch_size=self.batch_size,
          num_parallel_batches=max_num_parallel_batches,
          num_parallel_calls=None))

      ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

      ds_iterator = tf.compat.v1.data.make_one_shot_iterator(ds)
      images, labels, filename = ds_iterator.get_next()
      # reshape
      labels = tf.reshape(labels, [self.batch_size])
      filename = tf.reshape(filename, [self.batch_size])

      return images, labels, filename