dlcookbook-dlbs-master
- docs2
  - Makefile
  - source
    - index.rst
    - conf.py
- src
  - tensorrt
    - src
      - engines
        tensorrt_engine.hpp
        mgpu_engine.hpp
        tensorrt_engine.cpp
        tensorrt
        calibrator.hpp
        gpu_cast.h
        profiler.hpp
        tensorrt_utils.hpp
        gpu_cast.cu
        tensorrt_utils.cpp
      - core
        cuda_utils.hpp
        infer_engine.cpp
        infer_engine.hpp
        dataset
        dataset.hpp
        dataset.cpp
        image_dataset.cpp
        tensor_dataset.cpp
        image_dataset.hpp
        tensor_dataset.hpp
        queues.hpp
        logger.hpp
        infer_msg.hpp
        logger.cpp
        queues.ipp
        utils.cpp
        utils.hpp
    - python
      - compute_mprocess_throughput.py
    - CMakeLists.txt
    - README.md
    - tests
      - test_utils.cpp
      - tests_main.cpp
      - tests_direct_reader.cpp
      - CMakeLists.txt
      - tests_ipc.cpp
      - tests_queue.cpp
    - tools
      - images2tensors.cpp
      - tensorrt.cpp
      - benchmark_tensor_dataset.cpp
      - benchmark_host2device_copy.cpp
    - .gitignore
    - docs
      - sprocess_benchmarks.md
      - build.md
      - datasets.md
      - Doxyfile.in
      - index.md
      - images2tensors.md
      - benchmark_host2device.md
      - dataset_benchmarks.md
      - README.md
      - mprocess_benchmarks.md
- ChangeLog
- web
  - README.md
  - index.html
- LICENSE
- models
  - resnet18
    - resnet18.training.prototxt
    - resnet18.inference.prototxt
    - README.md
  - alexnet_owt
    - alexnet_owt.inference.prototxt
    - alexnet_owt.training.prototxt
  - vgg11
    - vgg11.inference.prototxt
    - vgg11.training.prototxt
  - acoustic_model
    - acoustic_model.inference.prototxt
    - README.md
    - acoustic_model.training.prototxt
  - overfeat
    - overfeat.training.prototxt
    - overfeat.inference.prototxt
  - resnet152
    - resnet152.inference.prototxt
    - LICENSE
    - resnet152.training.prototxt
    - README.md
  - bvlc_alexnet
    - bvlc_alexnet.training.prototxt
    - README.md
    - bvlc_alexnet.inference.prototxt
  - vgg13
    - vgg13.training.prototxt
    - vgg13.inference.prototxt
  - resnet200
    - resnet200.training.prototxt
    - generate_units.sh
    - resnet200.inference.prototxt
    - README.md
  - resnet101
    - LICENSE
    - resnet101.training.prototxt
    - resnet101.inference.prototxt
    - README.md
  - googlenet
    - bvlc_googlenet.inference.prototxt
    - bvlc_googlenet.training.prototxt
    - README.md
  - bvlc_googlenet
    - bvlc_googlenet.inference.prototxt
    - bvlc_googlenet.training.prototxt
    - README.md
  - vgg16
    - vgg16.inference.prototxt
    - README.md
    - vgg16.training.prototxt
  - deep_mnist
    - deep_mnist.training.prototxt
    - deep_mnist.inference.prototxt
  - sensor_net
    - sensor_net.inference.prototxt
    - sensor_net.training.prototxt
  - resnet50
    - LICENSE
    - README.md
    - resnet50.training.prototxt
    - resnet50.inference.prototxt
  - inception3
    - inception3.inference.prototxt
    - README.md
    - inception3.training.prototxt
  - vgg19
    - vgg19.inference.prototxt
    - vgg19.training.prototxt
    - README.md
  - README.md
  - text_cnn
    - text_cnn.training.prototxt
    - text_cnn.inference.prototxt
    - README.md
  - resnet269
    - resnet269.training.prototxt
    - README.md
    - resnet269.inference.prototxt
  - alexnet
    - bvlc_alexnet.training.prototxt
    - README.md
    - bvlc_alexnet.inference.prototxt
  - resnet34
    - README.md
    - resnet34.inference.prototxt
    - resnet34.training.prototxt
  - .gitignore
  - inception4
    - inception4.inference.prototxt
    - README.md
    - inception4.training.prototxt
- CONTRIBUTING.md
- ACKNOWLEDGMENTS
- docker
  - versions
  - openvino
    - 19.09
      - README.md
      - Dockerfile
      - .gitignore
    - 19.09-custom-mkldnn
      - README.md
      - Dockerfile
      - .gitignore
  - mxnet
    - 18.11
      - Dockerfile
    - 18.10
      - Dockerfile
  - bvlc_caffe
    - cuda9-cudnn7
      - Dockerfile
    - cuda8-cudnn7
      - Dockerfile
    - cpu
      - Dockerfile
    - cuda8-cudnn6
      - Dockerfile
  - caffe2
    - 18.10
      - Dockerfile
  - pytorch
    - 18.10-py3
      - Dockerfile
    - 19.01-rocm
      - README.md
      - Dockerfile
  - versions.md
  - benchmarks
    - infiniband
      - Dockerfile
    - ethernet
      - Dockerfile
  - README.md
  - nvidia_caffe
    - cuda9-cudnn7
      - Dockerfile
    - cuda8-cudnn7
      - Dockerfile
    - cuda8-cudnn6
      - Dockerfile
  - tensorflow
    - 19.02-mkl
      - jupyter_notebook_config.py
      - README.md
      - Dockerfile
      - run_jupyter.sh
    - cuda9-cudnn7
      - jupyter_notebook_config.py
      - Dockerfile.old
      - Dockerfile
      - run_jupyter.sh
    - cuda8-cudnn7
      - jupyter_notebook_config.py
      - Dockerfile.old
      - Dockerfile
      - run_jupyter.sh
    - cpu
      - jupyter_notebook_config.py
      - Dockerfile
      - run_jupyter.sh
    - cuda8-cudnn6
      - jupyter_notebook_config.py
      - Dockerfile.old
      - Dockerfile
      - run_jupyter.sh
  - .gitignore
  - build.sh
  - intel_caffe
    - cpu
      - Dockerfile
    - 18.12
      - Dockerfile
  - tensorrt
    - 18.08
      - Dockerfile
    - 18.11
      - Dockerfile
    - 18.10
      - Dockerfile
    - README.md
    - 18.12
      - Dockerfile
- tutorials
  - recipes
    - openvino
      - config.json
      - README.md
      - run
    - distributed
      - mxnet
        config.json
        README.md
        run
      - caffe2
        config.json
        README.md
        run
      - pytorch
        config.json
        README.md
        run
    - tensorflow_mkl
      - inference_test.json
      - parse
      - inference.json
      - run
      - .gitignore
    - multi_gpu_compute_scaling
      - config.json
      - README.md
      - run
      - .gitignore
    - inference
      - config.json
      - README.md
      - run
  - README.md
  - dlcookbook
    - introduction.sh
    - caffe2.sh
    - nvcnn.sh
    - log_parser.sh
    - bvlc_caffe.sh
    - tensorflow.sh
    - bench_stats.sh
    - time_analysis.sh
    - summary_builder.sh
    - advanced.sh
    - configs
      - advanced.json
      - bvlc_caffe.json
    - pytorch.sh
    - intel_caffe.sh
    - mxnet.sh
    - nvidia_caffe.sh
    - tensorrt
      - benchmark_host2device_copy.sh
      - benchmark_tensor_dataset.sh
  - .gitignore
- python
  - nvcnn_benchmarks
    - nvcnn.py
  - nvtfcnn_benchmarks
    - postprocess.py
  - pytorch_benchmarks
    - model_factory.py
    - models
      - vgg.py
      - alexnet.py
      - alexnet_owt.py
      - resnet.py
      - model.py
      - deep_mnist.py
      - googlenet.py
      - overfeat.py
      - __init__.py
      - acoustic_model.py
      - inception.py
      - sensor_net.py
    - __init__.py
    - dataset_factory.py
    - benchmarks.py
    - caffe
      - datum_pb2.py
      - datum.proto
      - __init__.py
      - datum.sh
  - mxnet_benchmarks
    - cluster_launcher.py
    - model_factory.py
    - data_iterator.py
    - models
      - vgg.py
      - alexnet.py
      - alexnet_owt.py
      - deep_speech2.py
      - resnet.py
      - model.py
      - deep_mnist.py
      - inception_resnet_v2.py
      - googlenet.py
      - overfeat.py
      - __init__.py
      - acoustic_model.py
      - inception.py
      - sensor_net.py
    - __init__.py
    - tests
      - test_data_iterator.py
      - test_benchmarks.py
      - test_model_factory.py
    - contrib
      - ctc_metrics.py
      - __init__.py
    - benchmarks.py
  - tf_cnn_benchmarks
    - convnet_builder.py
    - variable_mgr.py
    - benchmark_cnn_distributed_test.py
    - cnn_util.py
    - allreduce_test.py
    - models
      - deepmnist_model.py
      - nasnet_utils.py
      - googlenet_model.py
      - inception_model.py
      - vgg_model.py
      - densenet_model.py
      - trivial_model.py
      - alexnet_model.py
      - resnet_model.py
      - lenet_model.py
      - model.py
      - model_config.py
      - __init__.py
      - nasnet_model.py
      - acoustic_model.py
      - overfeat_model.py
      - sensornet_model.py
    - benchmark_storage.py
    - benchmark_cnn_test.py
    - preprocessing.py
    - test_util.py
    - allreduce.py
    - test_data
      - images
      - __init__.py
      - fake_tf_record_data
        validation-00000-of-00002
        validation-00001-of-00002
        train-00003-of-00008
        train-00000-of-00008
        train-00002-of-00008
        train-00001-of-00008
        train-00006-of-00008
        train-00004-of-00008
        train-00005-of-00008
        train-00007-of-00008
    - variable_mgr_util.py
    - cnn_util_test.py
    - flags.py
    - run_tests.py
    - datasets.py
    - README.md
    - variable_mgr_util_test.py
    - cbuild_benchmark_storage.py
    - benchmark_cnn.py
    - platforms
      - util.py
      - default
        util.py
        __init__.py
      - __init__.py
    - tf_cnn_benchmarks.py
    - benchmark_cnn_distributed_test_runner.py
  - openvino_benchmarks
    - postprocess.py
    - get_model_path.py
  - caffe2_benchmarks
    - model_factory.py
    - models
      - vgg.py
      - alexnet.py
      - alexnet_owt.py
      - resnet.py
      - model.py
      - deep_mnist.py
      - inception_resnet_v2.py
      - googlenet.py
      - overfeat.py
      - __init__.py
      - acoustic_model.py
      - inception.py
      - sensor_net.py
    - __init__.py
    - tests
      - test_benchmarks.py
      - test_model_factory.py
    - benchmarks.py
  - .gitignore
  - dlbs
    - help
      - frameworks.json
      - __init__.py
      - helper.py
      - param_doc_builder.py
    - sysinfo
      - __init__.py
      - systemconfig.py
    - exceptions.py
    - web
      - simple_server.py
    - result_processor.py
    - reports
      - time_analysis.py
      - bench_stats.py
      - __init__.py
      - series_builder.py
      - summary_builder.py
    - experimenter.py
    - bench_data.py
    - validator.py
    - launcher.py
    - logger.py
    - data
      - __init__.py
      - imagenet
        tensorflow_process_bboxes.py
        tensorflow_worker.py
        tensorflow_data.py
        tensorflow_build_imagenet_data.py
        imagenet_tools.py
        imagenet_labels.json.gz
        __init__.py
        README.md
    - logparser.py
    - configs
      - base.json
      - pytorch.json
      - tensorrt.json
      - caffe.json
      - nvcnn.json
      - mxnet.json
      - nvtfcnn.json
      - caffe2.json
      - tensorflow.json
      - openvino.json
    - __init__.py
    - README.md
    - utils.py
    - tests
      - env.py
      - test_utils.py
      - test_builder.py
      - test_config_caffe.py
      - test_resource_monitor.py
      - test_config_loader.py
      - test_config_base.py
      - test_helper.py
      - test_config_caffe2.py
      - __init__.py
      - test_configs.py
    - builder.py
    - worker.py
    - processor.py
- VERSION
- README.md
- scripts
  - resource_monitor.sh
  - environment.sh
  - utils.sh
  - make_report.sh
  - test_bandwidth.sh
  - launchers
    - caffe2.sh
    - tensorrt.sh
    - nvcnn.sh
    - nvtfcnn.sh
    - dummy.sh
    - tensorflow_hpm.sh
    - openvino.sh
    - pytorch.sh
    - mxnet.sh
    - caffe.sh
  - parse_options.sh
  - make_imagenet_data.sh
- docs
  - reporting
    - reporting.md
  - intro
    - intro.md
    - imgs
    - advanced_intro.md
  - parameters
    - parameters.md
  - sysinfo
    - sysinfo.md
  - .nojekyll
  - extend
    - dlbs.md
  - index.md
  - models
    - models.md
  - frameworks
    - pytorch.md
    - mxnet.md
    - tensorrt.md
    - frameworks.md
    - tensorflow.md
    - caffe2.md
    - caffe.md
  - dlpg
    - dlpg.md
    - imgs
  - precision
    - precision.md
  - docker
    - docker.md
    - install_docker.md
    - pull_build_images.md
    - docker_network.md
  - tutorials
    - tutorials.md
  - data
    - data.md
  - monitor
    - monitor.md
  - validation
    - validation.md
  - README.md
  - deep_dive
    - new_reporters.md
    - advanced_configuration.md
    - architecture.md
    - new_frameworks.md
  - _sidebar.md
  - index.html
  - apidoc
    - apidoc.md
  - runtimes
    - runtimes.md

Deep Learning Benchmarking Suite

Deep Learning Benchmarking Suite (DLBS) is a collection of command line tools for running consistent and reproducible deep learning benchmark experiments on various hardware/software platforms. In particular, DLBS:

Provides implementation of a number of neural networks in order to enforce apple-to-apple comparison across all supported frameworks. Models that are supported include various VGGs, ResNets, AlexNet and GoogleNet models. DLBS can support many more models via integration with third party benchmark projects such as Google's TF CNN Benchmarks or Tensor2Tensor.
Benchmarks single node multi-GPU or CPU platforms. List of supported frameworks include various forks of Caffe (BVLC/NVIDIA/Intel), Caffe2, TensorFlow, MXNet, PyTorch. DLBS also supports NVIDIA's inference engine TensorRT for which DLBS provides highly optimized benchmark backend.
Supports inference and training phases.
Supports synthetic and real data.
Supports bare metal and docker environments.
Supports single/half/int8 precision and uses tensor cores with Volta GPUs.
Is based on modular architecture enabling easy integration with other projects such Google's TF CNN Benchmarks and Tensor2Tensor or NVIDIA's NVCNN, NVCNN-HVD or similar.
Supports raw performance metric (number of data samples per second like images/sec).

Supported platforms

Deep Learning Benchmarking Suite was tested on various servers with Ubuntu / RedHat / CentOS operating systems with and without NVIDIA GPUs. We have a little success with running DLBS on top of AMD GPUs, but this is mostly untested. It may not work with Mac OS due to slightly different command line API of some of the tools we use (like, for instance, sed) - we will fix this in one of the next releases.

Installation

Install Docker and NVIDIA Docker for containerized benchmarks. Read here why we prefer to use docker and here for installing/troubleshooting tips. This is not required. DLBS can work with bare metal framework installations.

Clone Deep Learning Benchmarking Suite from GitHub

git clone https://github.com/HewlettPackard/dlcookbook-dlbs dlbs

The benchmarking suite mostly uses modules from standard python library (python 2.7). Optional dependencies that do not influence the benchmarking process are listed in python/requirements.txt. If they are not found, the code that uses it will be disabled.
Build/pull docker images for containerized benchmarks or build/install host frameworks for bare metal benchmarks.
1. TensorFlow
2. BVLC Caffe
3. NVIDIA Caffe
4. Intel Caffe
5. Caffe2
6. MXNet
7. TensorRT
8. PyTorch
There are several ways to get Docker images. Read here about various options including images from NVIDIA GPU Cloud. We may not support the newest framework versions due to API change.

Our recommendation is to use docker images specified in default DLBS configuration. Most of them are docker images from NVIDIA GPU Cloud.

Quick start

Assuming CUDA enabled GPU is present, execute the following commands to run simple experiment with ResNet50 model:

git clone https://github.com/HewlettPackard/dlcookbook-dlbs.git ./dlbs   # Install benchmarking suite

cd ./dlbs  &&  source ./scripts/environment.sh                           # Initialize host environment
python ./python/dlbs/experimenter.py help --frameworks                   # List supported DL frameworks
docker pull nvcr.io/nvidia/tensorflow:18.07-py3                          # Pull TensorFlow docker image from NGC

python $experimenter run\                                                # Benchmark ...
       -Pexp.framework='"nvtfcnn"'\                                      #     TensorFlow framework
       -Vexp.model='["resnet50", "alexnet_owt"]'\                        #     with ResNet50 and AlexNetOWT models
       -Vexp.gpus='["0", "0,1", "0,1,2,3"]'\                             #     run on 1, 2 and 4 GPUs
       -Pexp.dtype='"float16"'                                           #     use mixed-precision training
       -Pexp.log_file='"${HOME}/dlbs/logs/${exp.id}.log"' \              #     and write results to these files

python $logparser '${HOME}/dlbs/logs/*.log'\                             # Parse log files and
       --output_file '${HOME}/dlbs/results.json'                         #     print and write summary to this file

python $reporter --summary_file '${HOME}/dlbs/results.json'\             # Parse summary file and build
                 --type 'weak-scaling'\                                  #     weak scaling report
                 --target_variable 'results.time'                        #     using batch time as performance metric

This configuration will run 6 benchmarks (2 models times 3 GPU configurations). DLBS can support multiple benchmark backends for Deep Learning frameworks. In this particular example DLBS uses a TensorFlow's nvtfcnn benchmark backend from NVIDIA which is optimized for single/multi-GPU systems. The introduction section contains more information on what backends actually represent and what users should be using.

The introduction contains more examples of what DLBS can do.

Documentation

We host documentation here.

More information

License

Deep Learning Benchmarking Suite is licensed under Apache 2.0 license.

Contributing

All contributors must include acceptance of the DCO (Developer Certificate of Origin). Please, read this document for more details.

Contact us

Natalia Vassilieva nvassilieva@hpe.com
Sergey Serebryakov sergey.serebryakov@hpe.com