python source code of batchLoader

# @Time    : 2018/3/27 10:59
# @File    : roibatchLoader.py
# @Author  : Sky chen
# @Email   : dzhchxk@126.com
# @Personal homepage  : https://coderskychen.cn
"""The data layer used during training to train a Fast R-CNN network.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import torch.utils.data as data
from PIL import Image
import torch

import cv2
import numpy as np
import numpy.random as npr
import torchvision.transforms as transforms
from scipy.misc import imread
import random
import time
import pdb


def bbox_overlap(boxe, query_boxe):
    box_area = ((query_boxe[2] - query_boxe[0] + 1) * (query_boxe[3] - query_boxe[1] + 1))
    overlaps = 0.
    iw = (min(boxe[2], query_boxe[2]) - max(boxe[0], query_boxe[0]) + 1)
    if iw > 0:
        ih = (min(boxe[3], query_boxe[3]) - max(boxe[1], query_boxe[1]) + 1)
        if ih > 0:
            ia = iw * ih  # overlape area
            oa = ((boxe[2] - boxe[0] + 1) * (boxe[3] - boxe[1] + 1))
            ua = float(oa + box_area - ia)
            overlaps = ia / ua
            # contains[n, k] = ia / box_area
    return overlaps


class BatchLoader(data.Dataset):
    def __init__(self, roidb, args, phase):
        """Set the roidb to be used by this layer during training."""
        self.roidb = roidb
        self.args = args
        self.phase = phase

    def get_one_sample(self, index):
        num_images = 1
        # Sample random scales to use for each image in this batch
        scales = (600,)
        max_scale = 600
        random_scale_inds = npr.randint(0, high=len(scales), size=num_images)

        # Get the input image blob
        im_blob, im_scales = self.get_image_blob(index, random_scale_inds, scales, max_scale)

        blobs = {'data': im_blob}

        # gt boxes: (x1, y1, x2, y2, cls)

        gt_inds = np.where(self.roidb[index[0]]['gt_classes'] != 0)[0]
        gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
        gt_boxes[:, 0:4] = self.roidb[index[0]]['boxes'][gt_inds, :] * im_scales[0]
        gt_boxes[:, 4] = self.roidb[index[0]]['gt_classes'][gt_inds]
        blobs['gt_boxes'] = gt_boxes
        # height, width, scale
        blobs['im_info'] = np.array([im_blob.shape[1],
                                     im_blob.shape[2],
                                     im_scales[0]], dtype=np.float32)
        if self.args.with_global:
            blobs['memory_size'] = np.ceil(blobs['im_info'][:2] / self.args.BOTTLE_SCALE / 2.).astype(np.int32)  # conv5
        else:
            blobs['memory_size'] = np.ceil(blobs['im_info'][:2] / self.args.BOTTLE_SCALE).astype(np.int32)  # conv4
        blobs['num_gt'] = np.int32(gt_boxes.shape[0])
        blobs['img'] = self.roidb[index[0]]['image']
        blobs['index'] = index[0]

        return blobs

    def get_image_blob(self, index, scale_inds, scales, max_scale):
        """Builds an input blob from the images in the roidb at the specified
        scales.
        """
        num_images = 1
        processed_ims = []
        im_scales = []
        for i in range(num_images):
            im = cv2.imread(self.roidb[index[i]]['image'])
            if self.roidb[index[i]]['flipped']:
                im = im[:, ::-1, :]
            target_size = scales[scale_inds[i]]
            im, im_scale = self.prep_im_for_blob(im, target_size,
                                            max_scale)
            im_scales.append(im_scale)
            processed_ims.append(im)

        # Create a blob to hold the input images
        blob = self.im_list_to_blob(processed_ims)

        return blob, im_scales

    def im_list_to_blob(self, ims):
        """Convert a list of images into a network input.
  
        Assumes images are already prepared (means subtracted, BGR order, ...).
        """
        max_shape = np.array([im.shape for im in ims]).max(axis=0)
        num_images = len(ims)
        blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
                        dtype=np.float32)
        for i in range(num_images):
            im = ims[i]
            blob[i, 0:im.shape[0], 0:im.shape[1], :] = im

        return blob

    def prep_im_for_blob(self, im, target_size, max_size):
        """Mean subtract and scale an image for use in a blob."""
        im = im.astype(np.float32, copy=False)
        if self.args.caffe is not None:
            im -= np.array([[[103.939, 116.779, 123.68]]])
        else:
            im /= 255.  # Convert range to [0,1]
            im -= np.array([[[0.485, 0.456, 0.406]]])  # Minus mean
            im /= np.array([[[0.229, 0.224, 0.225]]])  # divide by stddev
            im = im[:, :, ::-1]  # BGR to RGB

        im_shape = im.shape
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])
        im_scale = float(target_size) / float(im_size_min)
        # Prevent the biggest axis from being more than MAX_SIZE
        if np.round(im_scale * im_size_max) > max_size:
            im_scale = float(max_size) / float(im_size_max)
        im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)

        return im, im_scale

    # def _get_adjmat_Arr(self, gtboxes):
    #     Arr = np.ones((5, gtboxes.shape[0], gtboxes.shape[0]), dtype=np.float32)  # five types between regions : top bottom left right IOU
    #     Arr = Arr * 1e-14
    #     for i in range(gtboxes.shape[0]):   # top/bottom
    #         for j in range(gtboxes.shape[0]):
    #             if j >= i:
    #                 break
    #             if gtboxes[i][1] > gtboxes[j][1]:
    #                 Arr[0, i, j] = 1.  # top
    #                 Arr[1, j, i] = 1.  # bottom
    #
    #             if gtboxes[i][0] < gtboxes[j][0]:
    #                 Arr[2, i, j] = 1.  # left
    #                 Arr[3, j, i] = 1.  # right
    #
    #             iou = bbox_overlap(gtboxes[i], gtboxes[j])
    #             if iou != 0.:
    #                 Arr[4, i, j] = iou  # left
    #                 Arr[4, j, i] = iou  # right
    #
    #     sums = np.sum(Arr, axis=-1)  # [5, gts]
    #
    #     # normalize each row so that them sum to 1
    #     for n in range(5):
    #         for i in range(gtboxes.shape[0]):
    #             Arr[n, i, :] = Arr[n, i, :] / sums[n, i]
    #
    #     print('Arr mat sum:', Arr.sum(axis=-1))
    #     return Arr

    def __getitem__(self, index):
        indexes = [index]
        blobs = self.get_one_sample(indexes)
        data = torch.from_numpy(blobs['data'])
        im_info = torch.from_numpy(blobs['im_info'])
        mem_size = torch.from_numpy(blobs['memory_size'])
        # we need to random shuffle the bounding box.
        data_height, data_width = data.size(1), data.size(2)

        if self.phase == 'train':
            # if the number of region is greater than 100 then random pick 100 regions
            # this opt can make the used memory of GPUs more stable.
            # only for train and val phase
            np.random.shuffle(blobs['gt_boxes'])
            if blobs['gt_boxes'].shape[0] > 100:
                print('sampling regions from %d to %d' % (blobs['gt_boxes'].shape[0], 100))
                blobs['gt_boxes'] = blobs['gt_boxes'][:100]
        elif self.phase == 'eval':
            # np.random.shuffle(blobs['gt_boxes'])
            if blobs['gt_boxes'].shape[0] > 100:
                print('sampling regions from %d to %d' % (blobs['gt_boxes'].shape[0], 100))
                blobs['gt_boxes'] = blobs['gt_boxes'][:100]
        else:
            pass

        # if self.args.with_global:
        #     Arr_ = self._get_adjmat_Arr(blobs['gt_boxes'])  # 5*r*r
        #     Arr = torch.from_numpy(Arr_)
        # else:
        #     Arr = 0.

        gt_boxes = torch.from_numpy(blobs['gt_boxes'])
        # permute trim_data to adapt to downstream processing
        data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width)
        im_info = im_info.view(3)
        return data, im_info, gt_boxes, mem_size, blobs['data'], blobs['gt_boxes']

    def __len__(self):
        return len(self.roidb)