python source code of mask

# Modified by Minghui Liao and Pengyuan Lyu
###############################################################################
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

"""Construct minibatches for Mask R-CNN training. Handles the minibatch blobs
that are specific to Mask R-CNN. Other blobs that are generic to RPN or
Fast/er R-CNN are handled by their respecitive roi_data modules.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import logging
import numpy as np
import numpy.random as npr
from core.config import cfg
import utils.blob as blob_utils
import utils.boxes as box_utils
import utils.segms as segm_utils
from utils.char_mask import generate_char_maps, generate_char_maps_and_polygon_map

from PIL import Image, ImageDraw

logger = logging.getLogger(__name__)

DEBUG = False

def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):
    """Add Mask R-CNN specific blobs to the input blob dictionary."""
    # Prepare the mask targets by associating one gt mask to each training roi
    # that has a fg (non-bg) class label.
    M = cfg.MRCNN.RESOLUTION
    polys_gt_inds = np.where(
        (roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0)
    )[0]
    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
    boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)
    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
    roi_has_mask = blobs['labels_int32'].copy()
    roi_has_mask[roi_has_mask > 0] = 1

    if fg_inds.shape[0] > 0:
        # Class labels for the foreground rois
        mask_class_labels = blobs['labels_int32'][fg_inds]
        masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)

        # Find overlap between all foreground rois and the bounding boxes
        # enclosing each segmentation
        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False)
        )
        # Map from each fg rois to the index of the mask with highest overlap
        # (measured by bbox overlap)
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        # add fg targets
        for i in range(rois_fg.shape[0]):
            fg_polys_ind = fg_polys_inds[i]
            poly_gt = polys_gt[fg_polys_ind]
            roi_fg = rois_fg[i]
            # Rasterize the portion of the polygon mask within the given fg roi
            # to an M x M binary image
            mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M)
            mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
            masks[i, :] = np.reshape(mask, M**2)
    else:  # If there are no fg masks (it does happen)
        # The network cannot handle empty blobs, so we must provide a mask
        # We simply take the first bg roi, given it an all -1's mask (ignore
        # label), and label it with class zero (bg).
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        # rois_fg is actually one background roi, but that's ok because ...
        rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
        # We give it an -1's blob (ignore label)
        masks = -blob_utils.ones((1, M**2), int32=True)
        # We label it with class = 0 (background)
        mask_class_labels = blob_utils.zeros((1, ))
        # Mark that the first roi has a mask
        roi_has_mask[0] = 1

    if cfg.MRCNN.CLS_SPECIFIC_MASK:
        masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels)

    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))

    # Update blobs dict with Mask R-CNN blobs
    blobs['mask_rois'] = rois_fg
    blobs['roi_has_mask_int32'] = roi_has_mask
    blobs['masks_int32'] = masks

def add_charmask_rcnn_blobs(blobs, sampled_boxes, gt_boxes, gt_inds, roidb, im_scale, batch_idx):
    """Add Mask R-CNN specific blobs to the input blob dictionary."""
    # Prepare the mask targets by associating one gt mask to each training roi
    # that has a fg (non-bg) class label.
    is_e2e = cfg.MRCNN.IS_E2E
    M_HEIGHT = cfg.MRCNN.RESOLUTION_H
    M_WIDTH = cfg.MRCNN.RESOLUTION_W
    mask_rois_per_this_image = cfg.MRCNN.MASK_BATCH_SIZE_PER_IM
    polys_gt_inds = np.where(
        (roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0)
    )[0]
    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
    chars_gts = roidb['charboxes']
    boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)
    if DEBUG:
        img_path = roidb['image']
        img = Image.open(img_path)
        # img = blobs['data'][0]
        # img = img.transpose((1,2,0))
        # img  += cfg.PIXEL_MEANS
        # img = img.astype(np.int8)
        # img = Image.fromarray(img)

    if is_e2e:
        fg_inds = np.where(blobs['labels_int32'] > 0)[0]
        if fg_inds.size > mask_rois_per_this_image:
            fg_inds = npr.choice(
                fg_inds, size=mask_rois_per_this_image, replace=False
            )
        roi_has_mask = np.ones((fg_inds.shape[0], ), dtype=np.int32)

        if fg_inds.shape[0] > 0:
            # Class labels for the foreground rois
            mask_class_labels = blobs['labels_int32'][fg_inds]
            masks = blob_utils.zeros((fg_inds.shape[0], 2, M_HEIGHT*M_WIDTH), int32=True)
            mask_weights = np.zeros((fg_inds.shape[0], M_HEIGHT*M_WIDTH), dtype=np.float32)
            char_boxes = np.zeros((fg_inds.shape[0], M_HEIGHT*M_WIDTH, 4), dtype=np.float32)
            char_boxes_inside_weight = np.zeros((fg_inds.shape[0], M_HEIGHT*M_WIDTH, 4), dtype=np.float32)
            char_boxes_outside_weight = np.zeros((fg_inds.shape[0], M_HEIGHT*M_WIDTH, 4), dtype=np.float32)

            # Find overlap between all foreground rois and the bounding boxes
            # enclosing each segmentation
            rois_fg = sampled_boxes[fg_inds]
            overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
                rois_fg.astype(np.float32, copy=False),
                boxes_from_polys.astype(np.float32, copy=False)
            )
            # Map from each fg rois to the index of the mask with highest overlap
            # (measured by bbox overlap)
            fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

            # add fg targets
            for i in range(rois_fg.shape[0]):
                fg_polys_ind = fg_polys_inds[i]
                poly_gt = polys_gt[fg_polys_ind]
                indexes_rec_rois_gt_chars = np.where(chars_gts[:, 9] == fg_polys_ind)
                chars_gt = chars_gts[indexes_rec_rois_gt_chars, :9]
                roi_fg = rois_fg[i]
                # Rasterize the portion of the polygon mask within the given fg roi
                # to an M_HEIGHT x M_WIDTH binary image
                mask, mask_weight, char_box, char_box_inside_weight = segm_utils.polys_to_mask_wrt_box_rec(chars_gt.copy(), poly_gt, roi_fg.copy(), M_HEIGHT, M_WIDTH, weight_wh=cfg.MRCNN.WEIGHT_WH)
                if DEBUG:
                    draw = ImageDraw.Draw(img)
                    draw.rectangle([(roi_fg[0],roi_fg[1]), (roi_fg[2],roi_fg[3])])
                    img.save('./tests/image.jpg')
                    _visu_global_map(mask[0,:,:].copy(), './tests/proposals_visu_global.jpg')
                    _visu_char_map(mask[1,:,:].copy(), './tests/proposals_visu_char.jpg')
                    _visu_char_box(char_box, char_box_inside_weight, './tests/char_box.jpg', M_HEIGHT, M_WIDTH)
                masks[i, 0, :] = np.reshape(mask[0,:,:], M_HEIGHT*M_WIDTH)
                masks[i, 1, :] = np.reshape(mask[1,:,:], M_HEIGHT*M_WIDTH)
                mask_weights[i, :] = np.reshape(mask_weight, M_HEIGHT*M_WIDTH)
                char_boxes[i, :, :] = np.reshape(char_box, (M_HEIGHT*M_WIDTH, 4))
                char_boxes_inside_weight[i, :, :] = np.reshape(char_box_inside_weight, (M_HEIGHT*M_WIDTH, 4))
        else:  # If there are no fg masks (it does happen)
            # The network cannot handle empty blobs, so we must provide a mask
            # We simply take the first bg roi, given it an all -1's mask (ignore
            # label), and label it with class zero (bg).
            bg_inds = np.where(blobs['labels_int32'] == 0)[0]
            # rois_fg is actually one background roi, but that's ok because ...
            rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
            # We give it an -1's blob (ignore label)
            masks = -blob_utils.ones((1, 2, M_HEIGHT*M_WIDTH), int32=True)
            mask_weights = -blob_utils.ones((1, 2, M_HEIGHT*M_WIDTH), int32=True)
            char_boxes_inside_weight = np.zeros(1, M_HEIGHT*M_WIDTH, 4, dtype=np.float32)
            # We label it with class = 0 (background)
            mask_class_labels = blob_utils.zeros((1, ))
            # Mark that the first roi has a mask
            roi_has_mask[0] = 1
    else:
        fg_inds = gt_inds
        roi_has_mask = np.ones((fg_inds.shape[0], ), dtype=np.int32)

        if fg_inds.shape[0] > 0:
            # Class labels for the foreground rois
            mask_class_labels = np.ones((fg_inds.shape[0], ), dtype=np.int32)
            masks = blob_utils.zeros((fg_inds.shape[0], 2, M_HEIGHT*M_WIDTH), int32=True)
            char_boxes = np.zeros((fg_inds.shape[0], M_HEIGHT*M_WIDTH, 4), dtype=np.float32)
            char_boxes_inside_weight = np.zeros((fg_inds.shape[0], M_HEIGHT*M_WIDTH, 4), dtype=np.float32)
            char_boxes_outside_weight = np.zeros((fg_inds.shape[0], M_HEIGHT*M_WIDTH, 4), dtype=np.float32)
            # mask_weights = blob_utils.zeros((fg_inds.shape[0], 2, M_HEIGHT*M_WIDTH), int32=True)

            rois_fg = gt_boxes
            # print(gt_boxes.shape[0])
            # add fg targets
            for i in range(rois_fg.shape[0]):
                fg_polys_ind = fg_inds[i]
                poly_gt = polys_gt[fg_polys_ind]
                indexes_rec_rois_gt_chars = np.where(chars_gts[:, 9] == fg_polys_ind)
                chars_gt = chars_gts[indexes_rec_rois_gt_chars, :9]
                roi_fg = rois_fg[i]
                # Rasterize the portion of the polygon mask within the given fg roi
                # to an M_HEIGHT x M_WIDTH binary image
                mask, char_box, char_box_inside_weight = segm_utils.polys_to_mask_wrt_box_rec(chars_gt, poly_gt, roi_fg, M_HEIGHT, M_WIDTH, weight_wh=cfg.MRCNN.WEIGHT_WH)
                if DEBUG:
                    _visu_char_box(char_box, char_box_inside_weight, './tests/char_box.jpg', M_HEIGHT, M_WIDTH)
                mask = np.array(mask, dtype=np.int32)  # Ensure it's binary
                # mask_weight = np.array(mask_weight, dtype=np.int32)  # Ensure it's binary
                masks[i, 0, :] = np.reshape(mask[0,:,:], M_HEIGHT*M_WIDTH)
                masks[i, 1, :] = np.reshape(mask[1,:,:], M_HEIGHT*M_WIDTH)
                char_boxes[i, :, :] = np.reshape(char_box, (M_HEIGHT*M_WIDTH, 4))
                char_boxes_inside_weight[i, :, :] = np.reshape(char_box_inside_weight, (M_HEIGHT*M_WIDTH, 4))
        else:  # If there are no fg masks (it does happen)
            # The network cannot handle empty blobs, so we must provide a mask
            # We simply take the first bg roi, given it an all -1's mask (ignore
            # label), and label it with class zero (bg).
            bg_inds = np.where(blobs['labels_int32'] == 0)[0]
            # rois_fg is actually one background roi, but that's ok because ...
            rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
            # We give it an -1's blob (ignore label)
            masks = -blob_utils.ones((1, 2, M_HEIGHT*M_WIDTH), int32=True)
            mask_weights = -blob_utils.ones((1, 2, M_HEIGHT*M_WIDTH), int32=True)
            char_boxes = -np.ones(1, M_HEIGHT*M_WIDTH, 4, dtype=np.int32)
            char_boxes_inside_weight = -np.zeros(1, M_HEIGHT*M_WIDTH, 4, dtype=np.float32)
            # We label it with class = 0 (background)
            mask_class_labels = blob_utils.zeros((1, ))
            # Mark that the first roi has a mask
            roi_has_mask[0] = 1


    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))

    char_boxes_outside_weight = np.array(
        char_boxes_inside_weight > 0, dtype=char_boxes_inside_weight.dtype
    )

    # Update blobs dict with Mask R-CNN blobs
    blobs['mask_rois'] = rois_fg
    blobs['roi_has_mask_int32'] = roi_has_mask
    blobs['masks_global_int32'] = masks[:, 0, :]
    blobs['masks_char_int32'] = masks[:, 1, :].reshape((-1, M_HEIGHT, M_WIDTH))
    blobs['masks_char_weight'] = mask_weights
    blobs['char_bbox_targets'] = char_boxes.reshape((-1,4))
    blobs['char_bbox_inside_weights'] = char_boxes_inside_weight.reshape((-1,4))
    blobs['char_bbox_outside_weights'] = char_boxes_outside_weight.reshape((-1,4))



def _expand_to_class_specific_mask_targets(masks, mask_class_labels):
    """Expand masks from shape (#masks, M ** 2) to (#masks, #classes * M ** 2)
    to encode class specific mask targets.
    """
    assert masks.shape[0] == mask_class_labels.shape[0]
    M = cfg.MRCNN.RESOLUTION

    # Target values of -1 are "don't care" / ignore labels
    mask_targets = -blob_utils.ones(
        (masks.shape[0], cfg.MODEL.NUM_CLASSES * M**2), int32=True
    )

    for i in range(masks.shape[0]):
        cls = int(mask_class_labels[i])
        start = M**2 * cls
        end = start + M**2
        # Ignore background instance
        # (only happens when there is no fg samples in an image)
        if cls > 0:
            mask_targets[i, start:end] = masks[i, :]

    return mask_targets

def _visu_global_map(mask, save_path):
    mask = mask*225
    mask = mask.astype('uint8')
    im = Image.fromarray(mask)
    im.save(save_path)

def _visu_char_map(char_mask, save_path):
    char_mask = 255 - char_mask*5
    char_mask = char_mask.astype('uint8')
    im = Image.fromarray(char_mask)
    im.save(save_path)

def _visu_char_box(char_box, char_box_weight, save_path, height, width):
    im=np.ones((height, width, 3))
    im = im.astype('uint8')
    im = Image.fromarray(im)
    img_draw = ImageDraw.Draw(im)
    for i in range(char_box.shape[0]):
        for j in range(char_box.shape[1]):
            if char_box[i,j,0]>0:
                # assert(char_box_weight[i,j,0]==1 and char_box_weight[i,j,1]==1 and char_box_weight[i,j,2]==1 and char_box_weight[i,j,3]==1)
                ymin = int((i - char_box[i,j,0]*height))
                xmax = int((j + char_box[i,j,1]*height))
                ymax = int((i + char_box[i,j,2]*height))
                xmin = int((j - char_box[i,j,3]*height))
                box = [xmin, ymin, xmax, ymax]
                img_draw.rectangle(box, outline=(255, 0, 0))

    im.save(save_path)