import sys import numpy as np import tensorflow as tf from keras import backend as K from keras.layers import Lambda from keras.layers.merge import concatenate from keras.models import Model from utils.yolo_utils import scale_boxes def yolo_head(feats, anchors, num_classes, n): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) conv_dims = K.shape(feats)[1:3] # assuming channels last # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape(feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. # Note: YOLO iterates over height index before width index. # TODO: It works with +1, don't know why. box_xy = (box_xy + conv_index + 1) / conv_dims # TODO: Input layer size box_wh = box_wh * anchors_tensor / conv_dims / {0:32, 1:16, 2:8}[n] return [box_xy, box_wh, box_confidence, box_class_probs] def yolo_boxes_to_corners(box_xy, box_wh): """Convert YOLO box predictions to bounding box corners.""" box_mins = box_xy - (box_wh / 2.) box_maxes = box_xy + (box_wh / 2.) return K.concatenate([ box_mins[..., 1:2], # y_min box_mins[..., 0:1], # x_min box_maxes[..., 1:2], # y_max box_maxes[..., 0:1] # x_max ]) def yolo_boxes_and_scores(feats, anchors, num_classes, n): '''Process Conv layer output''' box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, anchors, num_classes, n) # Convert boxes to be ready for filtering functions boxes = yolo_boxes_to_corners(box_xy, box_wh) boxes = K.reshape(boxes, [-1, 3, 4]) # Compute box scores box_scores = box_confidence * box_class_probs box_scores = K.reshape(box_scores, [-1, 3, num_classes]) return boxes, box_scores def yolo_filter_boxes(boxes, box_scores, box_class_probs, threshold = .6): # Find the box_classes thanks to the max box_scores, keep track of the corresponding score box_classes = K.argmax(box_scores, axis=-1) box_class_scores = K.max(box_scores, axis=-1, keepdims=False) # Create a filtering mask based on "box_class_scores" by using "threshold". The mask should have the # same dimension as box_class_scores, and be True for the boxes you want to keep (with probability >= threshold) filtering_mask = box_class_scores >= threshold # (3549, 3) # Apply the mask to scores, boxes and classes scores = tf.boolean_mask(box_class_scores, filtering_mask) boxes = tf.boolean_mask(boxes, filtering_mask) classes = tf.boolean_mask(box_classes, filtering_mask) return scores, boxes, classes def yolo_non_max_suppression(scores, boxes, classes, max_boxes = 10, iou_threshold = 0.5): max_boxes_tensor = K.variable(max_boxes, dtype='int32') # tensor to be used in tf.image.non_max_suppression() K.get_session().run(tf.variables_initializer([max_boxes_tensor])) # initialize variable max_boxes_tensor # Use tf.image.non_max_suppression() to get the list of indices corresponding to boxes you keep nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes, iou_threshold) # Use K.gather() to select only nms_indices from scores, boxes and classes scores = K.gather(scores, nms_indices) boxes = K.gather(boxes, nms_indices) classes = K.gather(classes, nms_indices) return scores, boxes, classes def yolo_eval( yolo_outputs, anchors, num_classes, image_shape=(720., 1280.), max_boxes=10, score_threshold=.6, iou_threshold=.5): # Get three scales outputs of the YOLO model for i in range(0,3): _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[i], anchors[6-3*i:9-3*i], num_classes, i) if i==0: boxes, box_scores= _boxes, _box_scores else: boxes = K.concatenate([boxes,_boxes], axis=0) box_scores = K.concatenate([box_scores,_box_scores], axis=0) # Use one of the functions you've implemented to perform Score-filtering with a threshold of score_threshold (≈1 line) scores, boxes, classes = yolo_filter_boxes(boxes, box_scores, score_threshold) # Scale boxes back to original image shape. boxes = scale_boxes(boxes, image_shape) # Use one of the functions you've implemented to perform Non-max suppression with a threshold of iou_threshold (≈1 line) scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes, iou_threshold) return scores, boxes, classes