python source code of models

from absl import flags
from absl.flags import FLAGS
import numpy as np
import tensorflow as tf
from tensorflow.keras import Model
import cv2
from tensorflow.keras.layers import Add, Concatenate, Conv2D, Input, Lambda, LeakyReLU, UpSampling2D, \
    ZeroPadding2D, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.losses import binary_crossentropy, sparse_categorical_crossentropy
from .utils import broadcast_iou

yolo_anchors = np.array([(1 / 9., 3 / 9.), (2 / 9., 2 / 9.), (3 / 9., 1 / 9.), (2 / 9., 6 / 9.), (4 / 9., 4 / 9.),
                         (6 / 9., 2 / 9.), (3 / 9., 9 / 9.), (6 / 9., 6 / 9.), (9 / 9., 3 / 9.)], np.float32)
yolo_anchor_masks = np.array([[6, 7, 8], [3, 4, 5], [0, 1, 2]])

yolo_tiny_anchors = np.array([(1 / 6., 3 / 6.), (2 / 6., 2 / 6.), (3 / 6., 1 / 6.), (2 / 6., 6 / 6.), (4 / 6., 4 / 6.),
                              (
                                  6 / 6.,
                                  2 / 6,
                              )], np.float32)
yolo_tiny_anchor_masks = np.array([[3, 4, 5], [0, 1, 2]])


def DarknetConv(x, filters, size, strides=1, padding='same', batch_norm=True):
    x = Conv2D(filters=filters,
               kernel_size=size,
               strides=strides,
               padding=padding,
               use_bias=not batch_norm,
               kernel_regularizer=l2(0.0005))(x)
    if batch_norm:
        x = BatchNormalization()(x)
        x = LeakyReLU(alpha=0.1)(x)
    return x


def DarknetResidual(x, filters):
    prev = x
    x = DarknetConv(x, filters // 2, 1)
    x = DarknetConv(x, filters, 3)
    x = Add()([prev, x])
    return x


def DarknetBlock(x, filters, blocks):
    x = DarknetConv(x, filters, 3, strides=2)
    for _ in range(blocks):
        x = DarknetResidual(x, filters)
    return x


def Darknet(name=None, num_channels=3):
    x = inputs = Input([None, None, num_channels])
    x = DarknetConv(x, 16, 3)
    x = DarknetBlock(x, 32, 1)
    x = DarknetBlock(x, 64, 2)  # skip connection
    x = x_36 = DarknetBlock(x, 128, 8)  # skip connection
    x = x_61 = DarknetBlock(x, 256, 8)
    x = DarknetBlock(x, 512, 4)
    return tf.keras.Model(inputs, (x_36, x_61, x), name=name)


def DarknetTiny(name=None, num_channels=3):
    x = inputs = Input([None, None, num_channels])
    x = DarknetConv(x, 8, 3, 2)
    x = DarknetConv(x, 16, 3, 2)
    x = x_8 = DarknetConv(x, 24, 3, 2)
    x = DarknetConv(x, 48, 3, 2)
    x = DarknetConv(x, 64, 3)
    return tf.keras.Model(inputs, (x_8, x), name=name)


def YoloConv(filters, name=None):
    def yolo_conv(x_in):
        if isinstance(x_in, tuple):
            inputs = Input(x_in[0].shape[1:]), Input(x_in[1].shape[1:])
            x, x_skip = inputs

            # concat with skip connection
            x = DarknetConv(x, filters, 1)
            x = UpSampling2D(2)(x)
            x = Concatenate()([x, x_skip])
        else:
            x = inputs = Input(x_in.shape[1:])

        x = DarknetConv(x, filters, 1)
        x = DarknetConv(x, filters * 2, 3)
        x = DarknetConv(x, filters, 1)
        x = DarknetConv(x, filters * 2, 3)
        x = DarknetConv(x, filters, 1)
        return Model(inputs, x, name=name)(x_in)

    return yolo_conv


def YoloConvTiny(filters, name=None):
    def yolo_conv(x_in):
        if isinstance(x_in, tuple):
            inputs = Input(x_in[0].shape[1:]), Input(x_in[1].shape[1:])
            x, x_skip = inputs

            # concat with skip connection
            x = DarknetConv(x, filters, 1)
            x = UpSampling2D(2)(x)
            x = Concatenate()([x, x_skip])
        else:
            x = inputs = Input(x_in.shape[1:])
            x = DarknetConv(x, filters, 1)

        return Model(inputs, x, name=name)(x_in)

    return yolo_conv


def YoloOutput(filters, anchors, classes, name=None):
    def yolo_output(x_in):
        x = inputs = Input(x_in.shape[1:])
        x = DarknetConv(x, filters, 3)
        x = DarknetConv(x, anchors * (classes + 5), 1, batch_norm=False)
        x = Lambda(lambda x: tf.reshape(x, (-1, tf.shape(x)[1], tf.shape(x)[2], anchors, classes + 5)))(x)
        return tf.keras.Model(inputs, x, name=name)(x_in)

    return yolo_output


def yolo_boxes(pred, anchors, num_classes, training=True):
    # pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...classes))
    grid_size = tf.shape(pred)[1:3][::-1]
    grid_y, grid_x = tf.shape(pred)[1], tf.shape(pred)[2]

    box_xy, box_wh, objectness, class_probs = tf.split(pred, (2, 2, 1, num_classes), axis=-1)
    box_xy = tf.sigmoid(box_xy)

    objectness = tf.sigmoid(objectness)
    class_probs = tf.nn.softmax(class_probs)
    pred_box = tf.concat((box_xy, box_wh), axis=-1)  # original xywh for loss

    # !!! grid[x][y] == (y, x)
    grid = tf.meshgrid(tf.range(grid_x), tf.range(grid_y))
    grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2)  # [gx, gy, 1, 2]

    box_xy = (box_xy + tf.cast(grid, tf.float32)) / tf.cast(grid_size, tf.float32)
    box_wh = tf.exp(box_wh) * anchors

    box_x1y1 = box_xy - box_wh / 2
    box_x2y2 = box_xy + box_wh / 2
    bbox = tf.concat([box_x1y1, box_x2y2], axis=-1)

    return bbox, objectness, class_probs, pred_box


def nms(bboxes, scores, num_classes, iou_threshold):
    # If no bounding boxes, return empty list
    if len(bboxes) == 0:
        return np.zeros((0, 4), np.float32), np.zeros((0, num_classes), np.int32)
    boxes = np.array(bboxes)
    start_x, start_y, end_x, end_y = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
    score = np.array(scores)
    # Picked bounding boxes
    picked_boxes, picked_scores = [], []

    # Compute areas of bounding boxes
    areas = (end_x - start_x + 1) * (end_y - start_y + 1)
    # Sort by confidence score of bounding boxes
    order = np.argsort(np.amax(score, axis=-1))
    # Iterate bounding boxes
    while order.size > 0:
        # The index of largest confidence score
        index = order[-1]
        # Pick the bounding box with largest confidence score
        picked_boxes.append(bboxes[index])
        picked_scores.append(scores[index])
        # Compute ordinates of intersection-over-union(IOU)
        x1, x2 = np.maximum(start_x[index], start_x[order[:-1]]), np.minimum(end_x[index], end_x[order[:-1]])
        y1, y2 = np.maximum(start_y[index], start_y[order[:-1]]), np.minimum(end_y[index], end_y[order[:-1]])
        # Compute areas of intersection-over-union
        w, h = np.maximum(0.0, x2 - x1 + 1), np.maximum(0.0, y2 - y1 + 1)
        intersection = w * h
        # Compute the iou
        iou = intersection / (areas[index] + areas[order[:-1]] - intersection)

        left = np.where(iou < iou_threshold)
        order = order[left]
    return np.stack(picked_boxes), np.stack(picked_scores)


def batched_nms(bboxes, scores, num_classes, iou_threshold):
    bboxes, scores, num_classes, iou_threshold = bboxes.numpy(), scores.numpy(), num_classes.numpy(
    ), iou_threshold.numpy()
    picked_boxes, picked_scores = [], []
    for i in range(bboxes.shape[0]):
        bboxes_this_bacth = bboxes[i, ...]
        scores_this_batch = scores[i, ...]
        picked_boxes_this_batch, picked_scores_this_batch = nms(bboxes_this_bacth, scores_this_batch, num_classes,
                                                                iou_threshold)
        picked_boxes.append(picked_boxes_this_batch)
        picked_scores.append(picked_scores_this_batch)
    picked_boxes = np.stack(picked_boxes)
    picked_scores = np.stack(picked_scores)
    return picked_boxes, picked_scores


def yolo_nms(outputs, anchors, masks, num_classes, iou_threshold=0.6, score_threshold=0.15):
    boxes, confs, classes = [], [], []

    for o in outputs:
        boxes.append(tf.reshape(o[0], (tf.shape(o[0])[0], -1, tf.shape(o[0])[-1])))
        confs.append(tf.reshape(o[1], (tf.shape(o[0])[0], -1, tf.shape(o[1])[-1])))
        classes.append(tf.reshape(o[2], (tf.shape(o[0])[0], -1, tf.shape(o[2])[-1])))
    boxes = tf.concat(boxes, axis=1)
    confs = tf.concat(confs, axis=1)
    class_probs = tf.concat(classes, axis=1)
    box_scores = confs * class_probs
    mask = box_scores >= score_threshold
    mask = tf.reduce_any(mask, axis=-1)

    class_boxes = tf.boolean_mask(boxes, mask)
    class_boxes = tf.reshape(class_boxes, (tf.shape(boxes)[0], -1, 4))
    class_box_scores = tf.boolean_mask(box_scores, mask)
    class_box_scores = tf.reshape(class_box_scores, (tf.shape(boxes)[0], -1, num_classes))

    class_boxes, class_box_scores = tf.py_function(func=batched_nms,
                                                   inp=[class_boxes, class_box_scores, num_classes, iou_threshold],
                                                   Tout=[tf.float32, tf.float32])
    classes = tf.argmax(class_box_scores, axis=-1)

    return class_boxes, class_box_scores, classes


def YoloV3(size=None, num_channels=3, anchors=yolo_anchors, masks=yolo_anchor_masks, num_classes=10, training=False):
    x = inputs = Input([*size, num_channels])

    x_36, x_61, x = Darknet(name='yolo_darknet', num_channels=num_channels)(x)

    x = YoloConv(512, name='yolo_conv_0')(x)
    output_0 = YoloOutput(512, len(masks[0]), num_classes, name='yolo_output_0')(x)

    x = YoloConv(256, name='yolo_conv_1')((x, x_61))
    output_1 = YoloOutput(256, len(masks[1]), num_classes, name='yolo_output_1')(x)

    x = YoloConv(128, name='yolo_conv_2')((x, x_36))
    output_2 = YoloOutput(128, len(masks[2]), num_classes, name='yolo_output_2')(x)

    if training:
        return Model(inputs, (output_0, output_1, output_2), name='yolov3')

    boxes_0 = Lambda(lambda x: yolo_boxes(x, anchors[masks[0]], num_classes), name='yolo_boxes_0')(output_0)
    boxes_1 = Lambda(lambda x: yolo_boxes(x, anchors[masks[1]], num_classes), name='yolo_boxes_1')(output_1)
    boxes_2 = Lambda(lambda x: yolo_boxes(x, anchors[masks[2]], num_classes), name='yolo_boxes_2')(output_2)

    outputs = Lambda(lambda x: yolo_nms(x, anchors, masks, num_classes),
                     name='yolo_nms')((boxes_0[:3], boxes_1[:3], boxes_2[:3]))

    return Model(inputs, outputs, name='yolov3')


def YoloV3Tiny(size=None,
               num_channels=3,
               anchors=yolo_tiny_anchors,
               masks=yolo_tiny_anchor_masks,
               num_classes=10,
               training=False):
    x = inputs = Input([*size, num_channels])

    x_8, x = DarknetTiny(name='yolo_darknet', num_channels=num_channels)(x)

    x = YoloConvTiny(128, name='yolo_conv_0')(x)
    output_0 = YoloOutput(128, len(masks[0]), num_classes, name='yolo_output_0')(x)

    x = YoloConvTiny(64, name='yolo_conv_1')((x, x_8))
    output_1 = YoloOutput(64, len(masks[1]), num_classes, name='yolo_output_1')(x)
    if training:
        return Model(inputs, (output_0, output_1), name='yolov3')
    boxes_0 = Lambda(lambda x: yolo_boxes(x, anchors[masks[0]], num_classes), name='yolo_boxes_0')(output_0)

    boxes_1 = Lambda(lambda x: yolo_boxes(x, anchors[masks[1]], num_classes), name='yolo_boxes_1')(output_1)
    outputs = Lambda(lambda x: yolo_nms(x, anchors, masks, num_classes), name='yolo_nms')((boxes_0[:3], boxes_1[:3]))
    return Model(inputs, outputs, name='yolov3_tiny')


def YoloLoss(anchors, num_classes=10, ignore_thresh=0.5):
    def yolo_loss(y_true, y_pred):
        # 1. transform all pred outputs
        # y_pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...cls))
        pred_box, pred_obj, pred_class, pred_xywh = yolo_boxes(y_pred, anchors, num_classes)
        pred_xy = pred_xywh[..., 0:2]
        pred_wh = pred_xywh[..., 2:4]

        # 2. transform all true outputs
        # y_true: (batch_size, grid, grid, anchors, (x1, y1, x2, y2, obj, cls))
        true_box, true_obj, true_class_idx = tf.split(y_true, (4, 1, 1), axis=-1)
        true_xy = (true_box[..., 0:2] + true_box[..., 2:4]) / 2.
        true_wh = true_box[..., 2:4] - true_box[..., 0:2]

        # give higher weights to small boxes
        box_loss_scale = 2 - true_wh[..., 0] * true_wh[..., 1]

        # 3. inverting the pred box equations
        grid_size = tf.shape(y_pred)[1:3][::-1]

        grid_y, grid_x = tf.shape(y_pred)[1], tf.shape(y_pred)[2]
        grid = tf.meshgrid(tf.range(grid_x), tf.range(grid_y))

        grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2)
        true_xy = true_xy * tf.cast(grid_size, tf.float32) - tf.cast(grid, tf.float32)
        true_wh = tf.math.log(true_wh / anchors)
        true_wh = tf.where(tf.math.is_inf(true_wh), tf.zeros_like(true_wh), true_wh)
        # 4. calculate all masks
        obj_mask = tf.squeeze(true_obj, -1)
        # ignore false positive when iou is over threshold
        true_box_flat = tf.boolean_mask(true_box, tf.cast(obj_mask, tf.bool))
        best_iou = tf.reduce_max(broadcast_iou(pred_box, true_box_flat), axis=-1)
        ignore_mask = tf.cast(best_iou < ignore_thresh, tf.float32)
        # 5. calculate all losses
        xy_loss = obj_mask * box_loss_scale * tf.reduce_sum(tf.square(true_xy - pred_xy), axis=-1)
        wh_loss = obj_mask * box_loss_scale * tf.reduce_sum(tf.square(true_wh - pred_wh), axis=-1)
        obj_loss = binary_crossentropy(true_obj, pred_obj)
        obj_loss = obj_mask * obj_loss + (1 - obj_mask) * ignore_mask * obj_loss
        class_loss = obj_mask * sparse_categorical_crossentropy(true_class_idx, pred_class)
        # 6. sum over (batch, gridx, gridy, anchors) => (batch, 1)
        xy_loss = tf.reduce_sum(xy_loss, axis=(1, 2, 3))
        wh_loss = tf.reduce_sum(wh_loss, axis=(1, 2, 3))
        obj_loss = tf.reduce_sum(obj_loss, axis=(1, 2, 3))
        class_loss = tf.reduce_sum(class_loss, axis=(1, 2, 3))
        loss = xy_loss + wh_loss + obj_loss + class_loss
        return loss

    return yolo_loss