""" Keras RFCN Copyright (c) 2018 Licensed under the MIT License (see LICENSE for details) Written by parap1uie-s@github.com """ import tensorflow as tf import keras.backend as K ############################################################ # Loss Functions ############################################################ def smooth_l1_loss(y_true, y_pred): """Implements Smooth-L1 loss. y_true and y_pred are typicallly: [N, 4], but could be any shape. """ diff = K.abs(y_true - y_pred) less_than_one = K.cast(K.less(diff, 1.0), "float32") loss = (less_than_one * 0.5 * diff**2) + (1 - less_than_one) * (diff - 0.5) return loss def rpn_class_loss_graph(rpn_match, rpn_class_logits): """RPN anchor classifier loss. rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive, -1=negative, 0=neutral anchor. rpn_class_logits: [batch, anchors, 2]. RPN classifier logits for FG/BG. """ # Squeeze last dim to simplify rpn_match = tf.squeeze(rpn_match, -1) # Get anchor classes. Convert the -1/+1 match to 0/1 values. anchor_class = K.cast(K.equal(rpn_match, 1), tf.int32) # Positive and Negative anchors contribute to the loss, # but neutral anchors (match value = 0) don't. indices = tf.where(K.not_equal(rpn_match, 0)) # Pick rows that contribute to the loss and filter out the rest. rpn_class_logits = tf.gather_nd(rpn_class_logits, indices) anchor_class = tf.gather_nd(anchor_class, indices) # Crossentropy loss loss = K.sparse_categorical_crossentropy(target=anchor_class, output=rpn_class_logits, from_logits=True) loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0)) return loss def rpn_bbox_loss_graph(config, target_bbox, rpn_match, rpn_bbox): """Return the RPN bounding box loss graph. config: the model config object. target_bbox: [batch, max positive anchors, (dy, dx, log(dh), log(dw))]. Uses 0 padding to fill in unsed bbox deltas. rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive, -1=negative, 0=neutral anchor. rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))] """ # Positive anchors contribute to the loss, but negative and # neutral anchors (match value of 0 or -1) don't. rpn_match = K.squeeze(rpn_match, -1) indices = tf.where(K.equal(rpn_match, 1)) # Pick bbox deltas that contribute to the loss rpn_bbox = tf.gather_nd(rpn_bbox, indices) # Trim target bounding box deltas to the same length as rpn_bbox. batch_counts = K.sum(K.cast(K.equal(rpn_match, 1), tf.int32), axis=1) target_bbox = batch_pack_graph(target_bbox, batch_counts, config.IMAGES_PER_GPU) # TODO: use smooth_l1_loss() rather than reimplementing here # to reduce code duplication diff = K.abs(target_bbox - rpn_bbox) less_than_one = K.cast(K.less(diff, 1.0), "float32") loss = (less_than_one * 0.5 * diff**2) + (1 - less_than_one) * (diff - 0.5) loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0)) return loss def mrcnn_class_loss_graph(target_class_ids, pred_class_logits, active_class_ids): """Loss for the classifier head of Mask RCNN. target_class_ids: [batch, num_rois]. Integer class IDs. Uses zero padding to fill in the array. pred_class_logits: [batch, num_rois, num_classes] active_class_ids: [batch, num_classes]. Has a value of 1 for classes that are in the dataset of the image, and 0 for classes that are not in the dataset. """ target_class_ids = tf.cast(target_class_ids, 'int64') # Find predictions of classes that are not in the dataset. pred_class_ids = tf.argmax(pred_class_logits, axis=2) # TODO: Update this line to work with batch > 1. Right now it assumes all # images in a batch have the same active_class_ids pred_active = tf.gather(active_class_ids[0], pred_class_ids) # Loss loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=target_class_ids, logits=pred_class_logits) # Erase losses of predictions of classes that are not in the active # classes of the image. loss = loss * pred_active # Computer loss mean. Use only predictions that contribute # to the loss to get a correct mean. loss = tf.reduce_sum(loss) / tf.reduce_sum(pred_active) return loss def mrcnn_bbox_loss_graph(target_bbox, target_class_ids, pred_bbox): """Loss for Mask R-CNN bounding box refinement. target_bbox: [batch, num_rois, (dy, dx, log(dh), log(dw))] target_class_ids: [batch, num_rois]. Integer class IDs. pred_bbox: [batch, num_rois, num_classes, (dy, dx, log(dh), log(dw))] """ # Reshape to merge batch and roi dimensions for simplicity. target_class_ids = K.reshape(target_class_ids, (-1,)) target_bbox = K.reshape(target_bbox, (-1, 4)) pred_bbox = K.reshape(pred_bbox, (-1, K.int_shape(pred_bbox)[2], 4)) # Only positive ROIs contribute to the loss. And only # the right class_id of each ROI. Get their indicies. positive_roi_ix = tf.where(target_class_ids > 0)[:, 0] positive_roi_class_ids = tf.cast( tf.gather(target_class_ids, positive_roi_ix), tf.int64) indices = tf.stack([positive_roi_ix, positive_roi_class_ids], axis=1) # Gather the deltas (predicted and true) that contribute to loss target_bbox = tf.gather(target_bbox, positive_roi_ix) pred_bbox = tf.gather_nd(pred_bbox, indices) # Smooth-L1 Loss loss = K.switch(tf.size(target_bbox) > 0, smooth_l1_loss(y_true=target_bbox, y_pred=pred_bbox), tf.constant(0.0)) loss = K.mean(loss) loss = K.reshape(loss, [1, 1]) return loss def batch_pack_graph(x, counts, num_rows): """Picks different number of values from each row in x depending on the values in counts. """ outputs = [] for i in range(num_rows): outputs.append(x[i, :counts[i]]) return tf.concat(outputs, axis=0)