python source code of postprocess

#!/usr/bin/python3
# -*- coding=utf-8 -*-

import tensorflow as tf
from tensorflow.keras.layers import Layer
from tensorflow.keras import backend as K

def yolo3_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    """Convert final layer features to bounding box parameters."""
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3] # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
        [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
        [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[..., ::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[..., ::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs


def yolo3_correct_boxes(box_xy, box_wh, input_shape, image_shape):
    '''Get corrected boxes'''
    input_shape = K.cast(input_shape, K.dtype(box_xy))
    image_shape = K.cast(image_shape, K.dtype(box_xy))

    #reshape the image_shape tensor to align with boxes dimension
    image_shape = K.reshape(image_shape, [-1, 1, 1, 1, 2])

    new_shape = K.round(image_shape * K.min(input_shape/image_shape))
    offset = (input_shape-new_shape)/2./input_shape
    scale = input_shape/new_shape
    # reverse offset/scale to match (w,h) order
    offset = offset[..., ::-1]
    scale = scale[..., ::-1]

    box_xy = (box_xy - offset) * scale
    box_wh *= scale

    box_mins = box_xy - (box_wh / 2.)
    box_maxes = box_xy + (box_wh / 2.)
    boxes =  K.concatenate([
        box_mins[..., 0:1],  # x_min
        box_mins[..., 1:2],  # y_min
        box_maxes[..., 0:1],  # x_max
        box_maxes[..., 1:2]  # y_max
    ])

    # Scale boxes back to original image shape.
    image_wh = image_shape[..., ::-1]
    boxes *= K.concatenate([image_wh, image_wh])
    return boxes


def yolo3_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
    '''Process Conv layer output'''
    box_xy, box_wh, box_confidence, box_class_probs = yolo3_head(feats,
        anchors, num_classes, input_shape)
    boxes = yolo3_correct_boxes(box_xy, box_wh, input_shape, image_shape)
    boxes = K.reshape(boxes, [-1, 4])
    box_scores = box_confidence * box_class_probs
    box_scores = K.reshape(box_scores, [-1, num_classes])
    return boxes, box_scores


def get_anchorset(anchors, num_layers, l):
    if num_layers == 3: #YOLOv3 arch
        if l == 0:
            anchorset = anchors[6:]
        elif l == 1:
            anchorset = anchors[3:6]
        elif l == 2:
            anchorset = anchors[:3]
    elif num_layers == 2: # Tiny YOLOv3 arch
        if l == 0:
            anchorset = anchors[3:]
        elif l == 1:
            anchorset = anchors[:3]
    else:
        raise ValueError('Invalid layer number')

    return anchorset


def yolo3_postprocess(args,
              anchors,
              num_classes,
              max_boxes=100,
              confidence=0.1,
              iou_threshold=0.4):
    """Postprocess for YOLOv3 model on given input and return filtered boxes."""

    num_layers = len(anchors)//3 # default setting
    yolo_outputs = args[:num_layers]
    image_shape = args[num_layers]

    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]] # default setting
    input_shape = K.shape(yolo_outputs[0])[1:3] * 32

    # print("yolo_outputs",yolo_outputs)
    boxes = []
    box_scores = []
    for l in range(num_layers):
        _boxes, _box_scores = yolo3_boxes_and_scores(yolo_outputs[l],
            anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
        boxes.append(_boxes)
        box_scores.append(_box_scores)
    boxes = K.concatenate(boxes, axis=0)
    box_scores = K.concatenate(box_scores, axis=0)

    mask = box_scores >= confidence
    max_boxes_tensor = K.constant(max_boxes, dtype='int32')
    boxes_ = []
    scores_ = []
    classes_ = []
    for c in range(num_classes):
        # TODO: use keras backend instead of tf.
        class_boxes = tf.boolean_mask(boxes, mask[:, c])
        class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
        nms_index = tf.image.non_max_suppression(
            class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
        class_boxes = K.gather(class_boxes, nms_index)
        class_box_scores = K.gather(class_box_scores, nms_index)
        classes = K.ones_like(class_box_scores, 'int32') * c
        boxes_.append(class_boxes)
        scores_.append(class_box_scores)
        classes_.append(classes)
    boxes_ = K.concatenate(boxes_, axis=0)
    scores_ = K.concatenate(scores_, axis=0)
    classes_ = K.concatenate(classes_, axis=0)

    return boxes_, scores_, classes_


def batched_yolo3_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
    '''Process Conv layer output'''
    box_xy, box_wh, box_confidence, box_class_probs = yolo3_head(feats,
        anchors, num_classes, input_shape)

    num_anchors = len(anchors)
    grid_shape = K.shape(feats)[1:3] # height, width
    total_anchor_num = grid_shape[0] * grid_shape[1] * num_anchors

    boxes = yolo3_correct_boxes(box_xy, box_wh, input_shape, image_shape)
    boxes = K.reshape(boxes, [-1, total_anchor_num, 4])
    box_scores = box_confidence * box_class_probs
    box_scores = K.reshape(box_scores, [-1, total_anchor_num, num_classes])
    return boxes, box_scores


def batched_yolo3_postprocess(args,
              anchors,
              num_classes,
              max_boxes=100,
              confidence=0.1,
              iou_threshold=0.4):
    """Postprocess for YOLOv3 model on given input and return filtered boxes."""

    num_layers = len(anchors)//3 # default setting
    yolo_outputs = args[:num_layers]
    image_shape = args[num_layers]

    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]] # default setting
    input_shape = K.shape(yolo_outputs[0])[1:3] * 32

    batch_size = K.shape(image_shape)[0] # batch size, tensor
    # print("yolo_outputs",yolo_outputs)
    boxes = []
    box_scores = []
    for l in range(num_layers):
        _boxes, _box_scores = batched_yolo3_boxes_and_scores(yolo_outputs[l],
            anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
        boxes.append(_boxes)
        box_scores.append(_box_scores)
    boxes = K.concatenate(boxes, axis=1)
    box_scores = K.concatenate(box_scores, axis=1)

    mask = box_scores >= confidence
    max_boxes_tensor = K.constant(max_boxes, dtype='int32')

    def single_image_nms(b, batch_boxes, batch_scores, batch_classes):
        boxes_ = []
        scores_ = []
        classes_ = []
        for c in range(num_classes):
            # TODO: use keras backend instead of tf.
            class_boxes = tf.boolean_mask(boxes[b], mask[b, :, c])
            class_box_scores = tf.boolean_mask(box_scores[b, :, c], mask[b, :, c])
            nms_index = tf.image.non_max_suppression(
                class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
            class_boxes = K.gather(class_boxes, nms_index)
            class_box_scores = K.gather(class_box_scores, nms_index)
            classes = K.ones_like(class_box_scores, 'int32') * c
            boxes_.append(class_boxes)
            scores_.append(class_box_scores)
            classes_.append(classes)

        boxes_ = K.concatenate(boxes_, axis=0)
        scores_ = K.concatenate(scores_, axis=0)
        classes_ = K.concatenate(classes_, axis=0)

        batch_boxes = batch_boxes.write(b, boxes_)
        batch_scores = batch_scores.write(b, scores_)
        batch_classes = batch_classes.write(b, classes_)

        return b+1, batch_boxes, batch_scores, batch_classes

    batch_boxes = tf.TensorArray(K.dtype(boxes), size=1, dynamic_size=True)
    batch_scores = tf.TensorArray(K.dtype(box_scores), size=1, dynamic_size=True)
    batch_classes = tf.TensorArray(dtype=tf.int32, size=1, dynamic_size=True)
    _, batch_boxes, batch_scores, batch_classes = tf.while_loop(lambda b,*args: b<batch_size, single_image_nms, [0, batch_boxes, batch_scores, batch_classes])

    batch_boxes = batch_boxes.stack()
    batch_scores = batch_scores.stack()
    batch_classes = batch_classes.stack()

    return batch_boxes, batch_scores, batch_classes



def batched_yolo3_prenms(args,
              anchors,
              num_classes,
              input_shape,
              max_boxes=100,
              confidence=0.1,
              iou_threshold=0.4):
    """Postprocess part for YOLOv3 model except NMS."""

    num_layers = len(anchors)//3 # default setting
    yolo_outputs = args[:num_layers]
    image_shape = args[num_layers]

    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]] # default setting
    #input_shape = K.shape(yolo_outputs[0])[1:3] * 32

    batch_size = K.shape(image_shape)[0] # batch size, tensor

    boxes = []
    box_scores = []
    for l in range(num_layers):
        # get anchor set for each feature layer
        if num_layers == 3: #YOLOv3 arch
            if l == 0:
                anchorset = anchors[6:]
                grid_shape = [input_shape[0]//32, input_shape[1]//32]
            elif l == 1:
                anchorset = anchors[3:6]
                grid_shape = [input_shape[0]//16, input_shape[1]//16]
            elif l == 2:
                anchorset = anchors[:3]
                grid_shape = [input_shape[0]//8, input_shape[1]//8]
        elif num_layers == 2: # Tiny YOLOv3 arch
            if l == 0:
                anchorset = anchors[3:]
                grid_shape = [input_shape[0]//32, input_shape[1]//32]
            elif l == 1:
                anchorset = anchors[:3]
                grid_shape = [input_shape[0]//16, input_shape[1]//16]
        else:
            raise ValueError('Invalid layer number')

        feats = yolo_outputs[l]
        # Convert final layer features to bounding box parameters
        num_anchors = len(anchorset)
        # Reshape to batch, height, width, num_anchors, box_params.
        anchors_tensor = K.reshape(K.constant(anchorset), [1, 1, 1, num_anchors, 2])

        #grid_shape = K.shape(feats)[1:3] # height, width
        # get total anchor number for each feature layer
        total_anchor_num = grid_shape[0] * grid_shape[1] * num_anchors
        grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
            [1, grid_shape[1], 1, 1])
        grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
            [grid_shape[0], 1, 1, 1])
        grid = K.concatenate([grid_x, grid_y])
        grid = K.cast(grid, K.dtype(feats))

        reshape_feats = K.reshape(
            feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

        # Adjust preditions to each spatial grid point and anchor size.
        box_xy = (K.sigmoid(reshape_feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(reshape_feats))
        box_wh = K.exp(reshape_feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(reshape_feats))
        box_confidence = K.sigmoid(reshape_feats[..., 4:5])
        box_class_probs = K.sigmoid(reshape_feats[..., 5:])

        # correct boxes to the original image shape
        input_shape = K.cast(input_shape, K.dtype(box_xy))
        image_shape = K.cast(image_shape, K.dtype(box_xy))
        #new_shape = K.round(image_shape * K.min(input_shape/image_shape))
        new_shape = K.cast(image_shape * K.min(input_shape/image_shape), dtype='int32')
        new_shape = K.cast(new_shape, dtype='float32')
        offset = (input_shape-new_shape)/2./input_shape
        scale = input_shape/new_shape
        box_xy = (box_xy - offset) * scale
        box_wh *= scale

        box_mins = box_xy - (box_wh / 2.)
        box_maxes = box_xy + (box_wh / 2.)
        _boxes =  K.concatenate([
            box_mins[..., 0:1],  # x_min
            box_mins[..., 1:2],  # y_min
            box_maxes[..., 0:1],  # x_max
            box_maxes[..., 1:2]  # y_max
        ])

        # Scale boxes back to original image shape.
        _boxes *= K.concatenate([image_shape, image_shape])

        # Reshape boxes to flatten the boxes
        _boxes = K.reshape(_boxes, [-1, total_anchor_num, 4])
        _box_scores = box_confidence * box_class_probs
        _box_scores = K.reshape(_box_scores, [-1, total_anchor_num, num_classes])

        boxes.append(_boxes)
        box_scores.append(_box_scores)

    # Merge boxes for all feature layers, for further NMS option
    boxes = K.concatenate(boxes, axis=1)
    box_scores = K.concatenate(box_scores, axis=1)

    return boxes, box_scores



class Yolo3PostProcessLayer(Layer):
    def __init__(self, anchors, num_classes, input_dim, **kwargs):
        self.anchors = anchors
        self.num_classes = num_classes
        self.input_dim = input_dim

        self.num_layers = len(self.anchors)//3 # default setting

        if self.num_layers == 3: #YOLOv3 arch
            self.total_anchor_num = ((input_dim[0]//32 * input_dim[1]//32) +
                                     (input_dim[0]//16 * input_dim[1]//16) +
                                     (input_dim[0]//8 * input_dim[1]//8)) * 3

        elif self.num_layers == 2: # Tiny YOLOv3 arch
            self.total_anchor_num = ((input_dim[0]//32 * input_dim[1]//32) +
                                     (input_dim[0]//16 * input_dim[1]//16)) * 3

        else:
            raise ValueError('Invalid layer number')

        super(Yolo3PostProcessLayer, self).__init__(**kwargs)

    def get_config(self):
        config = {
            'anchors': self.anchors,
            'num_classes': self.num_classes,
            'input_dim': self.input_dim,
        }
        base_config = super(Yolo3PostProcessLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))


    def call(self, x):
        """Postprocess part for YOLOv3 model except NMS."""
        assert isinstance(x, list)

        #num_layers = len(anchors)//3 # default setting
        yolo_outputs, image_shape = x

        #anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]] # default setting
        #input_shape = K.shape(yolo_outputs[0])[1:3] * 32

        batch_size = K.shape(image_shape)[0] # batch size, tensor

        boxes = []
        box_scores = []
        for l in range(self.num_layers):
            # get anchor set for each feature layer
            if self.num_layers == 3: #YOLOv3 arch
                if l == 0:
                    anchorset = self.anchors[6:]
                    grid_shape = [self.input_dim[0]//32, self.input_dim[1]//32]
                elif l == 1:
                    anchorset = self.anchors[3:6]
                    grid_shape = [self.input_dim[0]//16, self.input_dim[1]//16]
                elif l == 2:
                    anchorset = self.anchors[:3]
                    grid_shape = [self.input_dim[0]//8, self.input_dim[1]//8]
            elif self.num_layers == 2: # Tiny YOLOv3 arch
                if l == 0:
                    anchorset = self.anchors[3:]
                    grid_shape = [self.input_dim[0]//32, self.input_dim[1]//32]
                elif l == 1:
                    anchorset = self.anchors[:3]
                    grid_shape = [self.input_dim[0]//16, self.input_dim[1]//16]
            else:
                raise ValueError('Invalid layer number')

            feats = yolo_outputs[l]
            # Convert final layer features to bounding box parameters
            num_anchors = len(anchorset)
            # Reshape to batch, height, width, num_anchors, box_params.
            anchors_tensor = K.reshape(K.constant(anchorset), [1, 1, 1, num_anchors, 2])

            #grid_shape = K.shape(feats)[1:3] # height, width
            # get total anchor number for each feature layer
            total_anchor_num = grid_shape[0] * grid_shape[1] * num_anchors
            grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                [1, grid_shape[1], 1, 1])
            grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                [grid_shape[0], 1, 1, 1])
            grid = K.concatenate([grid_x, grid_y])
            grid = K.cast(grid, K.dtype(feats))

            reshape_feats = K.reshape(
                feats, [-1, grid_shape[0], grid_shape[1], num_anchors, self.num_classes + 5])

            # Adjust preditions to each spatial grid point and anchor size.
            box_xy = (K.sigmoid(reshape_feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(reshape_feats))
            box_wh = K.exp(reshape_feats[..., 2:4]) * anchors_tensor / K.cast(self.input_dim[::-1], K.dtype(reshape_feats))
            box_confidence = K.sigmoid(reshape_feats[..., 4:5])
            box_class_probs = K.sigmoid(reshape_feats[..., 5:])

            # correct boxes to the original image shape
            input_shape = K.cast(self.input_dim, K.dtype(box_xy))
            image_shape = K.cast(image_shape, K.dtype(box_xy))
            #new_shape = K.round(image_shape * K.min(input_shape/image_shape))
            new_shape = K.cast(image_shape * K.min(input_shape/image_shape), dtype='int32')
            new_shape = K.cast(new_shape, dtype='float32')
            offset = (input_shape-new_shape)/2./input_shape
            scale = input_shape/new_shape
            box_xy = (box_xy - offset) * scale
            box_wh *= scale

            box_mins = box_xy - (box_wh / 2.)
            box_maxes = box_xy + (box_wh / 2.)
            _boxes =  K.concatenate([
                box_mins[..., 0:1],  # x_min
                box_mins[..., 1:2],  # y_min
                box_maxes[..., 0:1],  # x_max
                box_maxes[..., 1:2]  # y_max
            ])

            # Scale boxes back to original image shape.
            _boxes *= K.concatenate([image_shape, image_shape])

            # Reshape boxes to flatten the boxes
            _boxes = K.reshape(_boxes, [-1, total_anchor_num, 4])
            _box_scores = box_confidence * box_class_probs
            _box_scores = K.reshape(_box_scores, [-1, total_anchor_num, self.num_classes])

            boxes.append(_boxes)
            box_scores.append(_box_scores)

        # Merge boxes for all feature layers, for further NMS option
        boxes = K.concatenate(boxes, axis=1)
        box_scores = K.concatenate(box_scores, axis=1)

        return boxes, box_scores

    def compute_output_shape(self, input_shape):
        assert isinstance(input_shape, list)
        shape_yolo, shape_image = input_shape
        return [(shape_yolo[0], self.total_anchor_num, 4), (shape_image[0], self.total_anchor_num, self.num_classes)]