python source code of fast_rcnn

# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
"""Construct minibatches for Fast R-CNN training. Handles the minibatch blobs
that are specific to Fast R-CNN. Other blobs that are generic to RPN, etc.
are handled by their respecitive roi_data modules.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np
import numpy.random as npr
import logging

from core.config import cfg
import utils.boxes as box_utils
import utils.blob as blob_utils
import utils.fpn as fpn_utils


logger = logging.getLogger(__name__)


def add_rel_blobs(blobs, im_scales, roidb):
    """Add blobs needed for training Fast R-CNN style models."""
    # Sample training RoIs from each image and append them to the blob lists
    for im_i, entry in enumerate(roidb):
        frcn_blobs = _sample_pairs(entry, im_scales[im_i], im_i)
        for k, v in frcn_blobs.items():
            blobs[k].append(v)
    # Concat the training blob lists into tensors
    for k, v in blobs.items():
        if isinstance(v, list) and len(v) > 0:
            blobs[k] = np.concatenate(v)
            
    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_ROIS:
        _add_rel_multilevel_rois(blobs)

    return True


def _sample_pairs(roidb, im_scale, batch_idx):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    fg_pairs_per_image = cfg.TRAIN.FG_REL_SIZE_PER_IM
    pairs_per_image = int(cfg.TRAIN.FG_REL_SIZE_PER_IM / cfg.TRAIN.FG_REL_FRACTION)  # need much more pairs since it's quadratic
    max_pair_overlaps = roidb['max_pair_overlaps']

    gt_pair_inds = np.where(max_pair_overlaps > 1.0 - 1e-4)[0]
    fg_pair_inds = np.where((max_pair_overlaps >= cfg.TRAIN.FG_THRESH) &
                            (max_pair_overlaps <= 1.0 - 1e-4))[0]
    
    fg_pairs_per_this_image = np.minimum(fg_pairs_per_image, gt_pair_inds.size + fg_pair_inds.size)
    # Sample foreground regions without replacement
    if fg_pair_inds.size > 0:
        fg_pair_inds = npr.choice(
            fg_pair_inds, size=(fg_pairs_per_this_image - gt_pair_inds.size), replace=False)
    fg_pair_inds = np.append(fg_pair_inds, gt_pair_inds)

    # Label is the class each RoI has max overlap with
    fg_prd_labels = roidb['max_prd_classes'][fg_pair_inds]
    blob_dict = dict(
        fg_prd_labels_int32=fg_prd_labels.astype(np.int32, copy=False))
    
    bg_pair_inds = np.where((max_pair_overlaps < cfg.TRAIN.BG_THRESH_HI))[0]
        
    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_pairs_per_this_image = pairs_per_image - fg_pairs_per_this_image
    bg_pairs_per_this_image = np.minimum(bg_pairs_per_this_image, bg_pair_inds.size)
    # Sample foreground regions without replacement
    if bg_pair_inds.size > 0:
        bg_pair_inds = npr.choice(
            bg_pair_inds, size=bg_pairs_per_this_image, replace=False)
    keep_pair_inds = np.append(fg_pair_inds, bg_pair_inds)
    all_prd_labels = np.zeros(keep_pair_inds.size, dtype=np.int32)
    all_prd_labels[:fg_pair_inds.size] = fg_prd_labels + 1  # class should start from 1

    blob_dict['all_prd_labels_int32'] = all_prd_labels.astype(np.int32, copy=False)
    blob_dict['fg_size'] = np.array([fg_pair_inds.size], dtype=np.int32)  # this is used to check if there is at least one fg to learn

    sampled_sbj_boxes = roidb['sbj_boxes'][keep_pair_inds]
    sampled_obj_boxes = roidb['obj_boxes'][keep_pair_inds]
    # Scale rois and format as (batch_idx, x1, y1, x2, y2)
    sampled_sbj_rois = sampled_sbj_boxes * im_scale
    sampled_obj_rois = sampled_obj_boxes * im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((keep_pair_inds.shape[0], 1))
    sampled_sbj_rois = np.hstack((repeated_batch_idx, sampled_sbj_rois))
    sampled_obj_rois = np.hstack((repeated_batch_idx, sampled_obj_rois))
    blob_dict['sbj_rois'] = sampled_sbj_rois
    blob_dict['obj_rois'] = sampled_obj_rois
    sampled_rel_rois = box_utils.rois_union(sampled_sbj_rois, sampled_obj_rois)
    blob_dict['rel_rois'] = sampled_rel_rois
    if cfg.MODEL.USE_FREQ_BIAS or cfg.MODEL.USE_SEPARATE_SO_SCORES:
        sbj_labels = roidb['max_sbj_classes'][keep_pair_inds]
        obj_labels = roidb['max_obj_classes'][keep_pair_inds]
        blob_dict['all_sbj_labels_int32'] = sbj_labels.astype(np.int32, copy=False)
        blob_dict['all_obj_labels_int32'] = obj_labels.astype(np.int32, copy=False)

    return blob_dict


def _add_rel_multilevel_rois(blobs):
    """By default training RoIs are added for a single feature map level only.
    When using FPN, the RoIs must be distributed over different FPN levels
    according the level assignment heuristic (see: modeling.FPN.
    map_rois_to_fpn_levels).
    """
    lvl_min = cfg.FPN.ROI_MIN_LEVEL
    lvl_max = cfg.FPN.ROI_MAX_LEVEL

    def _distribute_rois_over_fpn_levels(rois_blob_names):
        """Distribute rois over the different FPN levels."""
        # Get target level for each roi
        # Recall blob rois are in (batch_idx, x1, y1, x2, y2) format, hence take
        # the box coordinates from columns 1:5
        lowest_target_lvls = None
        for rois_blob_name in rois_blob_names:
            target_lvls = fpn_utils.map_rois_to_fpn_levels(
                blobs[rois_blob_name][:, 1:5], lvl_min, lvl_max)
            if lowest_target_lvls is None:
                lowest_target_lvls = target_lvls
            else:
                lowest_target_lvls = np.minimum(lowest_target_lvls, target_lvls)
        for rois_blob_name in rois_blob_names:
            # Add per FPN level roi blobs named like: <rois_blob_name>_fpn<lvl>
            fpn_utils.add_multilevel_roi_blobs(
                blobs, rois_blob_name, blobs[rois_blob_name], lowest_target_lvls, lvl_min,
                lvl_max)

    _distribute_rois_over_fpn_levels(['sbj_rois'])
    _distribute_rois_over_fpn_levels(['obj_rois'])
    _distribute_rois_over_fpn_levels(['rel_rois'])