python source code of ins_seg

from __future__ import division

# import sys
# sys.path.insert(0, '../')

from utils import logger
import cv2
import numpy as np
import h5py


class InsSegDataset(object):

  def __init__(self, h5_fname):
    self.log = logger.get()
    self.h5_fname = h5_fname
    self.log.info('Reading image IDs')
    self.img_ids = self._read_ids()
    pass

  def _read_ids(self):
    self.log.info(self.h5_fname)
    with h5py.File(self.h5_fname, 'r') as h5f:
      idx = h5f['index_map'][:]
    return idx

  def get_name(self):
    return 'unknown'

  def get_str_id(self, idx):
    return str(idx)

  def get_dataset_size(self):
    """Get number of examples."""
    return len(self.img_ids)

  def get_default_timespan(self):
    raise Exception('Not implemented')

  def get_num_semantic_classes(self):
    return 1

  def get_full_size_labels(self, img_ids, timespan=None):
    """Get full sized labels."""
    if timespan is None:
      timespan = self.get_default_timespan()
    with h5py.File(self.h5_fname, 'r') as h5f:
      num_ex = len(img_ids)
      y_full = []
      for kk, ii in enumerate(img_ids):
        key = self.get_str_id(ii)
        data_group = h5f[key]
        if 'label_segmentation_full_size' in data_group:
          y_gt_group = data_group['label_segmentation_full_size']
          num_obj = len(y_gt_group.keys())
          y_full_kk = None
          for jj in xrange(min(num_obj, timespan)):
            y_full_jj_str = y_gt_group['{:02d}'.format(jj)][:]
            y_full_jj = cv2.imdecode(
                y_full_jj_str, cv2.CV_LOAD_IMAGE_GRAYSCALE).astype('float32')
            if y_full_kk is None:
              y_full_kk = np.zeros(
                  [timespan, y_full_jj.shape[0], y_full_jj.shape[1]])
            y_full_kk[jj] = y_full_jj
          y_full.append(y_full_kk)
        else:
          y_full.append(np.zeros([timespan] + list(data_group['orig_size'][:])))
    return y_full

  def get_batch(self, idx, timespan=None, variables=None):
    """Get a mini-batch."""
    if timespan is None:
      timespan = self.get_default_timespan()
    if variables is None:
      variables = set(
          ['x', 'y_gt', 'y_out', 'c_gt', 'd_gt', 'd_out', 's_gt', 'idx_map'])

    with h5py.File(self.h5_fname, 'r') as h5f:
      img_ids = self.img_ids[idx]
      key = self.get_str_id(img_ids[0])
      num_ex = len(idx)
      created_arr = False
      results = {}
      for kk, ii in enumerate(img_ids):
        key = self.get_str_id(ii)
        data_group = h5f[key]
        x_str = data_group['input'][:]
        # x = cv2.imdecode(x_str, cv2.CV_LOAD_IMAGE_COLOR)
        x = cv2.imdecode(x_str, -1)
        height = x.shape[0]
        width = x.shape[1]
        depth = x.shape[2]
        num_ori_classes = 8
        num_sem_classes = self.get_num_semantic_classes()
        area_sort = None
        if num_sem_classes == 1:
          nc = 1
        else:
          nc = num_sem_classes + 1  # Including background
        # self.log.error(('Num semantic classes', num_sem_classes,
        # self))

        if not created_arr:
          if 'source' in data_group:
            results['source'] = []
          if 'x' in variables:
            results['x'] = np.zeros(
                [num_ex, height, width, depth], dtype='float32')
          if 'y_gt' in variables:
            results['y_gt'] = np.zeros(
                [num_ex, timespan, height, width], dtype='float32')
          if 'x_full' in variables:
            if len(idx) > 1:
              raise Exception(('x_full can be only provided in '
                               'batch_size=1 mode.'))
            results['x_full'] = None
          if 'y_gt_full' in variables:
            if len(idx) > 1:
              raise Exception(('y_gt_full can be only provided in '
                               'batch_size=1 mode.'))
            results['y_gt_full'] = None
          if 'y_out_ins' in variables:
            results['y_out_ins'] = np.zeros(
                [num_ex, timespan, height, width], dtype='float32')
          if 'c_gt' in variables:
            results['c_gt'] = np.zeros(
                [num_ex, height, width, nc], dtype='float32')
          if 'c_gt_idx' in variables:
            results['c_gt_idx'] = np.zeros(
                [num_ex, timespan, nc], dtype='float32')
          if 'd_gt' in variables:
            results['d_gt'] = np.zeros(
                [num_ex, height, width, num_ori_classes], dtype='float32')
          if 'y_out' in variables:
            results['y_out'] = np.zeros(
                [num_ex, height, width, nc], dtype='float32')
          if 'd_out' in variables:
            results['d_out'] = np.zeros(
                [num_ex, height, width, num_ori_classes], dtype='float32')
          if 's_out' in variables:
            results['s_out'] = np.zeros([num_ex, timespan], dtype='float32')
          if 's_gt' in variables:
            results['s_gt'] = np.zeros([num_ex, timespan], dtype='float32')
          if 'orig_size' in variables:
            results['orig_size'] = np.zeros([num_ex, 2], dtype='int32')
          created_arr = True

        if 'x' in variables:
          results['x'][kk] = x.astype('float32') / 255

        if 'x_full' in variables:
          if 'input_full_size' in data_group:
            x_full_group = data_group['input_full_size']
            x_full_str = x_full_group[:]
            x_full = cv2.imdecode(x_full_str, -1).astype('float32') / 255
            results['x_full'] = x_full

        if 'y_gt' in variables:
          if 'label_segmentation' in data_group:
            y_gt_group = data_group['label_segmentation']
            num_obj = len(y_gt_group.keys())
            # if num_obj > timespan:
            _y_gt = []
            # If we cannot fit in all the objects,
            # Sort instances such that the largest will be fed.
            for jj in range(num_obj):
              y_gt_str = y_gt_group['{:02d}'.format(jj)][:]
              _y_gt.append(cv2.imdecode(y_gt_str, -1).astype('float32'))
            area = np.array([yy.sum() for yy in _y_gt])
            area_sort = np.argsort(area)[::-1]
            for jj in range(min(num_obj, timespan)):
              results['y_gt'][kk, jj] = _y_gt[area_sort[jj]]

        if 'y_gt_full' in variables:
          if 'label_segmentation_full_size' in data_group:
            y_gt_full_group = data_group['label_segmentation_full_size']
            num_obj = len(y_gt_full_group.keys())
            _y_gt_full = []
            for jj in range(num_obj):
              y_gt_str = y_gt_full_group['{:02d}'.format(jj)][:]
              _y_gt_full.append(cv2.imdecode(y_gt_str, -1).astype('float32'))
            area = np.array([yy.sum() for yy in _y_gt_full])
            area_sort_full = np.argsort(area)[::-1]
            results['y_gt_full'] = np.zeros(
                [timespan, _y_gt_full[0].shape[0], _y_gt_full[0].shape[1]])
            for jj in range(min(num_obj, timespan)):
              results['y_gt_full'][jj] = _y_gt_full[area_sort_full[jj]]
          else:
            if 'orig_size' in data_group:
              results['y_gt_full'] = \
                  np.zeros([timespan] +
                           list(data_group['orig_size'][:]))
            else:
              results['y_gt_full'] = \
                  np.zeros(
                      [timespan] +
                  list(data_group['input_full_size'].shape))

        if 'y_out_ins' in variables:
          if 'instance_pred' in data_group:
            y_out_ins_group = data_group['instance_pred']
            num_obj = len(y_out_ins_group.keys())
            # if num_obj > timespan:
            # _y_out_ins = []
            # If we cannot fit in all the objects,
            # Sort instances such that the largest will be fed.
            for jj in range(num_obj):
              _y_out_jj_str = y_out_ins_group['{:02d}'.format(jj)][:]
              _y_out_jj = cv2.imdecode(_y_out_jj_str,
                                       -1).astype('float32') / 255
              # _y_out_ins.append(_y_out_jj)
              results['y_out_ins'][kk, jj] = _y_out_jj
          else:
            raise Exception('Key not found: {}'.format('instance_pred'))

        if 'c_gt' in variables:
          if 'label_semantic_segmentation' in data_group:
            c_gt_group = data_group['label_semantic_segmentation']
            if num_sem_classes > 1:
              for jj in range(num_sem_classes):
                if num_sem_classes == 1:
                  cid = jj
                else:
                  cid = jj + 1  # Including background
                cstr = '{:02d}'.format(jj)
                if cstr in c_gt_group:
                  c_gt_str = c_gt_group[cstr][:]
                  results['c_gt'][kk, :, :, cid] = cv2.imdecode(
                      c_gt_str, -1).astype('float32')
              # Background class, everything else.
              results['c_gt'][kk, :, :, 0] = 1 - \
                  results['c_gt'][kk].max(axis=2)
            else:
              c_gt_str = c_gt_group['00'][:]
              results['c_gt'][kk, :, :, 0] = cv2.imdecode(c_gt_str,
                                                          -1).astype('float32')
          # else:
          #     raise Exception('Key not found: {}'.format(
          #         'label_semantic_segmentation'))

        if 'c_gt_idx' in variables:
          if 'instance_semantic_classes' in data_group:
            c_gt_idx = data_group['instance_semantic_classes'][:]
            num_obj = len(c_gt_idx)
            if num_obj > 0:
              c_gt_idx = c_gt_idx[area_sort]

            for jj in range(min(num_obj, timespan)):
              results['c_gt_idx'][kk, :jj, c_gt_idx[jj] + 1] = 1.0
            if num_obj < timespan:
              for jj in range(num_obj, timespan):
                results['c_gt_idx'][kk, :jj, 0] = 1.0
          # else:
          #     raise Exception('Key not found: {}'.format(
          #         'instance_semantic_classes'))

        if 'd_gt' in variables:
          if 'orientation' in data_group:
            d_gt_str = data_group['orientation'][:]
            d_gt_ = cv2.imdecode(d_gt_str, -1).astype('float32')
            for oo in range(num_ori_classes):
              results['d_gt'][kk, :, :, oo] = (d_gt_ == oo).astype('float32')
          # else:
          #     raise Exception('Key not found: {}'.format(
          #         'orientation'))

        if 's_gt' in variables:
          if 'label_segmentation' in data_group:
            y_gt_group = data_group['label_segmentation']
            num_obj = len(y_gt_group.keys())
            results['s_gt'][kk, :min(num_obj, timespan)] = 1.0

        if 'd_out' in variables:
          for oo in range(num_ori_classes):
            d_out_str = data_group['orientation_pred/{:02d}'.format(oo)][:]
            d_out_arr = cv2.imdecode(d_out_str, -1)
            d_out_arr = d_out_arr.astype('float32') / 255
            results['d_out'][kk, :, :, oo] = d_out_arr

        if 'y_out' in variables:
          for cc in range(nc):
            if nc == 1:
              # Backward compatibility.
              if 'foreground_pred/{:02d}'.format(cc) not in data_group:
                y_out_str = data_group['foreground_pred'][:]
              else:
                y_out_str = data_group['foreground_pred/{:02d}'.format(cc)][:]
            else:
              y_out_str = data_group['foreground_pred/{:02d}'.format(cc)][:]
            y_out_arr = cv2.imdecode(y_out_str, -1)
            y_out_arr = y_out_arr.astype('float32') / 255
            results['y_out'][kk, :, :, cc] = y_out_arr

        if 's_out' in variables:
          _s = data_group['score_pred'][:]
          results['s_out'][kk] = _s

        # For combined datasets, the source of the data example.
        if 'source' in data_group:
          results['source'].append(data_group['source'][0])

        if 'orig_size' in variables:
          results['orig_size'][kk] = data_group['orig_size'][:]

      if 'idx_map' in variables:
        results['idx_map'] = img_ids

    return results