from __future__ import print_function import numpy as np import os, sys import cv2 from cntk import load_model, Axis, input_variable from cntk.core import Value from cntk.io import MinibatchData from cntk.layers import Constant from utils.annotations.annotations_helper import parse_class_map_file from config import cfg from plot_helpers import visualizeResultsFaster, imsave, apply_nms_to_single_image_results from cntk_helpers import regress_rois ############################################################### # Variables ############################################################### image_width = cfg["CNTK"].IMAGE_WIDTH image_height = cfg["CNTK"].IMAGE_HEIGHT num_channels = cfg["CNTK"].NUM_CHANNELS # dims_input -- (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) dims_input_const = MinibatchData(Value(batch=np.asarray( [image_width, image_height, image_width, image_height, image_width, image_height], dtype=np.float32)), 1, 1, False) # Color used for padding and normalization (Caffe model uses [102.98010, 115.94650, 122.77170]) img_pad_value = [103, 116, 123] if cfg["CNTK"].BASE_MODEL == "VGG16" else [114, 114, 114] normalization_const = Constant([[[103]], [[116]], [[123]]]) if cfg["CNTK"].BASE_MODEL == "VGG16" else Constant([[[114]], [[114]], [[114]]]) globalvars = {} map_file_path = cfg["CNTK"].MODEL_DIRECTORY globalvars['class_map_file'] = os.path.join(map_file_path, cfg["CNTK"].CLASS_MAP_FILE) globalvars['classes'] = parse_class_map_file(globalvars['class_map_file']) globalvars['num_classes'] = len(globalvars['classes']) globalvars['temppath'] = cfg["CNTK"].TEMP_PATH feature_node_name = cfg["CNTK"].FEATURE_NODE_NAME model_path = os.path.join(cfg["CNTK"].MODEL_DIRECTORY, cfg["CNTK"].MODEL_NAME) # helper function def load_resize_and_pad(image_path, width, height, pad_value=114): if "@" in image_path: print("WARNING: zipped image archives are not supported for visualizing results.") exit(0) img = cv2.imread(image_path) img_width = len(img[0]) img_height = len(img) scale_w = img_width > img_height target_w = width target_h = height if scale_w: target_h = int(np.round(img_height * float(width) / float(img_width))) else: target_w = int(np.round(img_width * float(height) / float(img_height))) resized = cv2.resize(img, (target_w, target_h), 0, 0, interpolation=cv2.INTER_NEAREST) top = int(max(0, np.round((height - target_h) / 2))) left = int(max(0, np.round((width - target_w) / 2))) bottom = height - top - target_h right = width - left - target_w resized_with_pad = cv2.copyMakeBorder(resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[pad_value, pad_value, pad_value]) # transpose(2,0,1) converts the image to the HWC format which CNTK accepts model_arg_rep = np.ascontiguousarray(np.array(resized_with_pad, dtype=np.float32).transpose(2, 0, 1)) dims = (width, height, target_w, target_h, img_width, img_height) return resized_with_pad, model_arg_rep, dims # mode="returnimage" or "returntags" def eval_faster_rcnn(eval_model, imgPath, img_shape, results_base_path, feature_node_name, classes, mode, drawUnregressedRois=False, drawNegativeRois=False, nmsThreshold=0.5, nmsConfThreshold=0.0, bgrPlotThreshold = 0.8): # prepare model image_input = input_variable(img_shape, dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) dims_input = input_variable((1,6), dynamic_axes=[Axis.default_batch_axis()], name='dims_input') frcn_eval = eval_model(image_input, dims_input) #dims_input_const = cntk.constant([image_width, image_height, image_width, image_height, image_width, image_height], (1, 6)) print("Plotting results from Faster R-CNN model for image.") # evaluate single image _, cntk_img_input, dims = load_resize_and_pad(imgPath, img_shape[2], img_shape[1]) dims_input = np.array(dims, dtype=np.float32) dims_input.shape = (1,) + dims_input.shape output = frcn_eval.eval({frcn_eval.arguments[0]: [cntk_img_input], frcn_eval.arguments[1]: dims_input}) out_dict = dict([(k.name, k) for k in output]) out_cls_pred = output[out_dict['cls_pred']][0] out_rpn_rois = output[out_dict['rpn_rois']][0] out_bbox_regr = output[out_dict['bbox_regr']][0] labels = out_cls_pred.argmax(axis=1) scores = out_cls_pred.max(axis=1).tolist() if mode=="returntags": class Tag(object): def __init__(self, label, score, bbox): self.label = label self.score = score self.bbox = bbox def serialize(self): return { 'label': self.label, 'score': self.score, 'bbox': self.bbox, } results = [] for i in range(len(out_rpn_rois)): if labels[i] != 0: x = Tag(str(classes[labels[i]]), str(scores[i]), str(out_rpn_rois[i])) results.append(x) return results elif mode=="returnimage": evaluated_image_path = "{}/{}".format(results_base_path, 'evaluated_' + os.path.basename(imgPath)) if drawUnregressedRois: # plot results without final regression imgDebug = visualizeResultsFaster(imgPath, labels, scores, out_rpn_rois, img_shape[2], img_shape[1], classes, nmsKeepIndices=None, boDrawNegativeRois=drawNegativeRois, decisionThreshold=bgrPlotThreshold) imsave(evaluated_image_path, imgDebug) else: # apply regression and nms to bbox coordinates regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims) nmsKeepIndices = apply_nms_to_single_image_results(regressed_rois, labels, scores, nms_threshold=nmsThreshold, conf_threshold=nmsConfThreshold) img = visualizeResultsFaster(imgPath, labels, scores, regressed_rois, img_shape[2], img_shape[1], classes, nmsKeepIndices=nmsKeepIndices, boDrawNegativeRois=drawNegativeRois, decisionThreshold=bgrPlotThreshold) imsave(evaluated_image_path, img) return evaluated_image_path else: raise ValueError("Unsupported value found in 'mode' parameter") # mode="returnimage" or "returntags" def evaluateimage(file_path, mode, eval_model=None): #from plot_helpers import eval_and_plot_faster_rcnn if eval_model==None: print("Loading existing model from %s" % model_path) eval_model = load_model(model_path) img_shape = (num_channels, image_height, image_width) results_folder = globalvars['temppath'] results=eval_faster_rcnn(eval_model, file_path, img_shape, results_folder, feature_node_name, globalvars['classes'], mode, drawUnregressedRois=cfg["CNTK"].DRAW_UNREGRESSED_ROIS, drawNegativeRois=cfg["CNTK"].DRAW_NEGATIVE_ROIS, nmsThreshold=cfg["CNTK"].RESULTS_NMS_THRESHOLD, nmsConfThreshold=cfg["CNTK"].RESULTS_NMS_CONF_THRESHOLD, bgrPlotThreshold=cfg["CNTK"].RESULTS_BGR_PLOT_THRESHOLD) return results