python source code of kitti_object_to

# Copyright 2017 Zehao Shi. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Converts KITTI Object data to TFRecords file format with Example protos.

The raw KITTI Object data set is expected to reside in PNG files located in the
directory 'image_2'. Similarly, annotations are supposed to be stored in the
'label_2'.

"""
import os
import sys
import random
import re

import tensorflow as tf
import cv2
import numpy as np
from datasets.dataset_utils import int64_feature, float_feature, bytes_feature

# TFRecords convertion parameters.
RANDOM_SEED = 4242
SAMPLES_PER_FILES = 200

CLASSES = {
    'Pedestrian': 0,
    'Cyclist': 1,
    'Car': 2,
}

def _process_image(directory, split, name):
    # Read the image file.
    filename = os.path.join(directory, 'image_2', name + '.png')
    image_data = tf.gfile.FastGFile(filename, 'r').read()

    # Get shape
    img = cv2.imread(filename)
    shape = np.shape(img)

    label_list = []
    type_list = []

    bbox_x1_list = []
    bbox_y1_list = []
    bbox_x2_list = []
    bbox_y2_list = []


    # If 'test' split, skip annotations
    if re.findall(r'train', split):
      # Read the txt annotation file.
      filename = os.path.join(directory, 'label_2', name + '.txt')
      with open(filename) as anno_file:
        objects = anno_file.readlines()

      for object in objects:
          obj_anno = object.split(' ')
          type_txt = obj_anno[0].encode('ascii')
          if type_txt in CLASSES:
            label_list.append(CLASSES[type_txt])
            type_list.append(type_txt)

            # Bounding Box
            bbox_x1 = float(obj_anno[4])
            bbox_y1 = float(obj_anno[5])
            bbox_x2 = float(obj_anno[6])
            bbox_y2 = float(obj_anno[7])
            bbox_x1_list.append(bbox_x1)
            bbox_y1_list.append(bbox_y1)
            bbox_x2_list.append(bbox_x2)
            bbox_y2_list.append(bbox_y2)

    image_format = b'PNG'
    example = tf.train.Example(features=tf.train.Features(feature={
            'image/encoded': bytes_feature(image_data),
            'image/height': int64_feature(shape[0]),
            'image/width': int64_feature(shape[1]),
            'image/channels': int64_feature(shape[2]),
            'image/shape': int64_feature(shape),
            'image/object/bbox/xmin': float_feature(bbox_x1_list),
            'image/object/bbox/xmax': float_feature(bbox_x2_list),
            'image/object/bbox/ymin': float_feature(bbox_y1_list),
            'image/object/bbox/ymax': float_feature(bbox_y2_list),
            'image/object/bbox/label': int64_feature(label_list),
            'image/object/bbox/label_text': bytes_feature(type_list),
    }))
    return example


def _add_to_tfrecord(dataset_dir, split, name, tfrecord_writer):
    """Loads data from image and annotations files and add them to a TFRecord.

    Args:
      dataset_dir: Dataset directory;
      split: train/val/test
      name: Image name;
      tfrecord_writer: The TFRecord writer to use for writing.
    """
    example = _process_image(dataset_dir, split, name)
    tfrecord_writer.write(example.SerializeToString())


def _get_output_filename(output_dir, name, idx):
    return '%s/%s_%03d.tfrecord' % (output_dir, name, idx)


def run(kitti_root, split, output_dir, shuffling=False):
    """Runs the conversion operation.

    Args:
      kitti_root: KITTI dataset root dir.
      split: trainval/train/val
      output_dir: Output directory.
    """
    if not tf.gfile.Exists(output_dir):
        tf.gfile.MakeDirs(output_dir)

    # Dataset filenames, and shuffling.
    split_file_path = os.path.join(kitti_root,
                                 'ImageSets',
                                 '%s.txt'%split)
    with open(split_file_path) as f:
        filenames = f.readlines()

    if shuffling:
        random.seed(RANDOM_SEED)
        random.shuffle(filenames)

    # Process dataset files.
    i = 0
    fidx = 0
    image_dir = os.path.join(kitti_root, '%sing'%split)
    if split == 'val':
      image_dir = os.path.join(kitti_root, '%sing' % 'train')
    while i < len(filenames):
        # Open new TFRecord file.
        tf_filename = _get_output_filename(output_dir, split, fidx)
        with tf.python_io.TFRecordWriter(tf_filename) as tfrecord_writer:
            j = 0
            while i < len(filenames) and j < SAMPLES_PER_FILES:
                sys.stdout.write('\r>> Converting image %d/%d' % (i+1, len(filenames)))
                sys.stdout.flush()

                filename = filenames[i].strip()
                _add_to_tfrecord(image_dir, split, filename, tfrecord_writer)
                i += 1
                j += 1
            fidx += 1

    print('\nFinished converting the KITTI dataset!')