# Modified by Minghui Liao and Pengyuan Lyu ############################################################################### # Copyright (c) 2017-present, Facebook, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## # # Based on: # -------------------------------------------------------- # Fast R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- """Inference functionality for most Detectron models.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from collections import defaultdict import cv2 import logging import numpy as np from caffe2.python import core from caffe2.python import workspace import pycocotools.mask as mask_util from core.config import cfg from utils.timer import Timer import modeling.FPN as fpn import utils.blob as blob_utils import utils.boxes as box_utils import utils.image as image_utils import utils.keypoints as keypoint_utils import lanms from PIL import Image, ImageDraw, ImageFont import os logger = logging.getLogger(__name__) def im_detect_all(model, im, image_name, box_proposals, timers=None, vis=False): print(image_name) if timers is None: timers = defaultdict(Timer) timers['im_detect_bbox'].tic() if cfg.TEST.BBOX_AUG.ENABLED: scores, boxes, im_scales = im_detect_bbox_aug(model, im, box_proposals) else: scores, boxes, im_scales = im_detect_bbox(model, im, box_proposals) timers['im_detect_bbox'].toc() # score and boxes are from the whole image after score thresholding and nms # (they are not separated by class) # cls_boxes boxes and scores are separated by class and in the format used # for evaluating results timers['misc_bbox'].tic() scores, boxes, cls_boxes = box_results_with_nms_and_limit(scores, boxes) timers['misc_bbox'].toc() result_logs = [] model_path = cfg.TEST.WEIGHTS model_name = model_path.split('/')[-1] model_dir = model_path[0:len(model_path)-len(model_name)] save_dir_res = os.path.join(model_dir, cfg.TEST.DATASETS[0], model_name+'_results') if not os.path.isdir(save_dir_res): os.makedirs(save_dir_res) if cfg.MODEL.MASK_ON and boxes.shape[0] > 0: timers['im_detect_mask'].tic() if cfg.TEST.MASK_AUG.ENABLED: global_masks, char_masks, char_boxes = im_detect_mask_aug(model, im, boxes) else: global_masks, char_masks, char_boxes = im_detect_mask(model, im_scales, boxes) timers['im_detect_mask'].toc() scale = im_scales[0] if vis: img_char = np.zeros((im.shape[0], im.shape[1])) img_poly = np.zeros((im.shape[0], im.shape[1])) im = cv2.cvtColor(im,cv2.COLOR_BGR2RGB) for index in range(global_masks.shape[0]): box = boxes[index] box = map(int, box) box_w = box[2] - box[0] box_h = box[3] - box[1] cls_polys = (global_masks[index, 0, :, :]*255).astype(np.uint8) poly_map = np.array(Image.fromarray(cls_polys).resize((box_w, box_h))) poly_map = poly_map.astype(np.float32) / 255 poly_map=cv2.GaussianBlur(poly_map,(3,3),sigmaX=3) ret, poly_map = cv2.threshold(poly_map,0.5,1,cv2.THRESH_BINARY) if cfg.TEST.OUTPUT_POLYGON: SE1=cv2.getStructuringElement(cv2.MORPH_RECT,(3,3)) poly_map = cv2.erode(poly_map,SE1) poly_map = cv2.dilate(poly_map,SE1); poly_map = cv2.morphologyEx(poly_map,cv2.MORPH_CLOSE,SE1) im2,contours,hierarchy = cv2.findContours((poly_map*255).astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) max_area=0 max_cnt = contours[0] for cnt in contours: area=cv2.contourArea(cnt) if area > max_area: max_area = area max_cnt = cnt perimeter = cv2.arcLength(max_cnt,True) epsilon = 0.01*cv2.arcLength(max_cnt,True) approx = cv2.approxPolyDP(max_cnt,epsilon,True) pts = approx.reshape((-1,2)) pts[:,0] = pts[:,0] + box[0] pts[:,1] = pts[:,1] + box[1] segms = list(pts.reshape((-1,))) segms = map(int, segms) if len(segms)<6: continue else: SE1=cv2.getStructuringElement(cv2.MORPH_RECT,(3,3)) poly_map = cv2.erode(poly_map,SE1) poly_map = cv2.dilate(poly_map,SE1); poly_map = cv2.morphologyEx(poly_map,cv2.MORPH_CLOSE,SE1) idy,idx=np.where(poly_map == 1) xy=np.vstack((idx,idy)) xy=np.transpose(xy) hull = cv2.convexHull(xy, clockwise=True) #reverse order of points. if hull is None: continue hull=hull[::-1] # print(hull) #find minimum area bounding box. rect = cv2.minAreaRect(hull) corners = cv2.boxPoints(rect) corners = np.array(corners, dtype="int") pts = get_tight_rect(corners, box[0], box[1], im.shape[0], im.shape[1], 1) pts_origin = [x * 1.0 for x in pts] pts_origin = map(int, pts_origin) text, rec_score, rec_char_scores = getstr_grid(char_masks[index,:,:,:].copy(), box_w, box_h) if cfg.TEST.OUTPUT_POLYGON: result_log = [int(x * 1.0) for x in box[:4]] + segms + [text] + [scores[index]] + [rec_score] + [rec_char_scores] +[len(segms)] else: result_log = [int(x * 1.0) for x in box[:4]] + pts_origin + [text] + [scores[index]] + [rec_score] + [rec_char_scores] result_logs.append(result_log) if vis: if cfg.TEST.OUTPUT_POLYGON: cv2.polylines(im, [np.array(segms).reshape((-1,2)).astype(np.int32)], True, color=(0, 255, 0), thickness=5) # img_draw.polygon(segms, outline=(0, 255, 0)) else: img_draw.polygon(pts, outline=(0, 255, 0)) poly = np.array(Image.fromarray(cls_polys).resize((box_w, box_h))) cls_chars = 255 - (char_masks[index, 0, :, :]*255).astype(np.uint8) char = np.array(Image.fromarray(cls_chars).resize((box_w, box_h))) img_poly[box[1]:box[3], box[0]:box[2]] = poly img_char[box[1]:box[3], box[0]:box[2]] = char if vis: save_dir_visu = os.path.join(model_dir, model_name+'_visu') if not os.path.isdir(save_dir_visu): os.mkdir(save_dir_visu) img_char = Image.fromarray(img_char).convert('RGB') img = Image.fromarray(im).convert('RGB') Image.blend(img, img_char, 0.5).save(os.path.join(save_dir_visu, str(image_name) + '_blend_char.jpg')) format_output(save_dir_res, result_logs, image_name) def format_output(out_dir, boxes, img_name): res = open(os.path.join(out_dir, 'res_' + img_name.split('.')[0] + '.txt'), 'w') ## char score save dir ssur_name = os.path.join(out_dir, 'res_' + img_name.split('.')[0]) for i, box in enumerate(boxes): save_name = ssur_name + '_' + str(i) + '.mat' if cfg.TEST.OUTPUT_POLYGON: np.save(save_name, box[-2]) box = ','.join([str(x) for x in box[:4]]) + ';' + ','.join([str(x) for x in box[4:4+int(box[-1])]]) + ';' + ','.join([str(x) for x in box[4+int(box[-1]):-2]]) + ',' + save_name else: np.save(save_name, box[-1]) box = ','.join([str(x) for x in box[:-1]]) + ',' + save_name # print(box) res.write(box + '\n') res.close() def im_conv_body_only(model, im): """Runs `model.conv_body_net` on the given image `im`.""" im_blob, im_scale_factors = _get_image_blob(im) workspace.FeedBlob(core.ScopedName('data'), im_blob) workspace.RunNet(model.conv_body_net.Proto().name) return im_scale_factors def im_detect_bbox(model, im, boxes=None): """Bounding box object detection for an image with given box proposals. Arguments: model (DetectionModelHelper): the detection model to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals in 0-indexed [x1, y1, x2, y2] format, or None if using RPN Returns: scores (ndarray): R x K array of object class scores for K classes (K includes background as object category 0) boxes (ndarray): R x 4*K array of predicted bounding boxes im_scales (list): list of image scales used in the input blob (as returned by _get_blobs and for use with im_detect_mask, etc.) """ inputs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique( hashes, return_index=True, return_inverse=True ) inputs['rois'] = inputs['rois'][index, :] boxes = boxes[index, :] # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN: _add_multilevel_rois_for_test(inputs, 'rois') for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.net.Proto().name) # Read out blobs if cfg.MODEL.FASTER_RCNN: assert len(im_scales) == 1, \ 'Only single-image / single-scale batch implemented' rois = workspace.FetchBlob(core.ScopedName('rois')) # unscale back to raw image space boxes = rois[:, 1:5] / im_scales[0] # Softmax class probabilities scores = workspace.FetchBlob(core.ScopedName('cls_prob')).squeeze() # In case there is 1 proposal scores = scores.reshape([-1, scores.shape[-1]]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = workspace.FetchBlob(core.ScopedName('bbox_pred')).squeeze() # In case there is 1 proposal box_deltas = box_deltas.reshape([-1, box_deltas.shape[-1]]) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # Remove predictions for bg class (compat with MSRA code) box_deltas = box_deltas[:, -4:] pred_boxes = box_utils.bbox_transform( boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS ) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: pred_boxes = np.tile(pred_boxes, (1, scores.shape[1])) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes, im_scales def im_detect_bbox_aug(model, im, box_proposals=None): """Performs bbox detection with test-time augmentations. Function signature is the same as for im_detect_bbox. """ assert not cfg.TEST.BBOX_AUG.SCALE_SIZE_DEP, \ 'Size dependent scaling not implemented' assert not cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION' or \ cfg.TEST.BBOX_AUG.COORD_HEUR == 'UNION', \ 'Coord heuristic must be union whenever score heuristic is union' assert not cfg.TEST.BBOX_AUG.COORD_HEUR == 'UNION' or \ cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION', \ 'Score heuristic must be union whenever coord heuristic is union' assert not cfg.MODEL.FASTER_RCNN or \ cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION', \ 'Union heuristic must be used to combine Faster RCNN predictions' # Collect detections computed under different transformations scores_ts = [] boxes_ts = [] def add_preds_t(scores_t, boxes_t): scores_ts.append(scores_t) boxes_ts.append(boxes_t) # Perform detection on the horizontally flipped image if cfg.TEST.BBOX_AUG.H_FLIP: scores_hf, boxes_hf, _im_scales_hf = im_detect_bbox_hflip( model, im, box_proposals ) add_preds_t(scores_hf, boxes_hf) # Compute detections at different scales for scale in cfg.TEST.BBOX_AUG.SCALES: max_size = cfg.TEST.BBOX_AUG.MAX_SIZE scores_scl, boxes_scl = im_detect_bbox_scale( model, im, scale, max_size, box_proposals ) add_preds_t(scores_scl, boxes_scl) if cfg.TEST.BBOX_AUG.SCALE_H_FLIP: scores_scl_hf, boxes_scl_hf = im_detect_bbox_scale( model, im, scale, max_size, box_proposals, hflip=True ) add_preds_t(scores_scl_hf, boxes_scl_hf) # Perform detection at different aspect ratios for aspect_ratio in cfg.TEST.BBOX_AUG.ASPECT_RATIOS: scores_ar, boxes_ar = im_detect_bbox_aspect_ratio( model, im, aspect_ratio, box_proposals ) add_preds_t(scores_ar, boxes_ar) if cfg.TEST.BBOX_AUG.ASPECT_RATIO_H_FLIP: scores_ar_hf, boxes_ar_hf = im_detect_bbox_aspect_ratio( model, im, aspect_ratio, box_proposals, hflip=True ) add_preds_t(scores_ar_hf, boxes_ar_hf) # Compute detections for the original image (identity transform) last to # ensure that the Caffe2 workspace is populated with blobs corresponding # to the original image on return (postcondition of im_detect_bbox) scores_i, boxes_i, im_scales_i = im_detect_bbox(model, im, box_proposals) add_preds_t(scores_i, boxes_i) # Combine the predicted scores if cfg.TEST.BBOX_AUG.SCORE_HEUR == 'ID': scores_c = scores_i elif cfg.TEST.BBOX_AUG.SCORE_HEUR == 'AVG': scores_c = np.mean(scores_ts, axis=0) elif cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION': scores_c = np.vstack(scores_ts) else: raise NotImplementedError( 'Score heur {} not supported'.format(cfg.TEST.BBOX_AUG.SCORE_HEUR) ) # Combine the predicted boxes if cfg.TEST.BBOX_AUG.COORD_HEUR == 'ID': boxes_c = boxes_i elif cfg.TEST.BBOX_AUG.COORD_HEUR == 'AVG': boxes_c = np.mean(boxes_ts, axis=0) elif cfg.TEST.BBOX_AUG.COORD_HEUR == 'UNION': boxes_c = np.vstack(boxes_ts) else: raise NotImplementedError( 'Coord heur {} not supported'.format(cfg.TEST.BBOX_AUG.COORD_HEUR) ) return scores_c, boxes_c, im_scales_i def im_detect_bbox_hflip(model, im, box_proposals=None): """Performs bbox detection on the horizontally flipped image. Function signature is the same as for im_detect_bbox. """ # Compute predictions on the flipped image im_hf = im[:, ::-1, :] im_width = im.shape[1] if not cfg.MODEL.FASTER_RCNN: box_proposals_hf = box_utils.flip_boxes(box_proposals, im_width) else: box_proposals_hf = None scores_hf, boxes_hf, im_scales = im_detect_bbox( model, im_hf, box_proposals_hf ) # Invert the detections computed on the flipped image boxes_inv = box_utils.flip_boxes(boxes_hf, im_width) return scores_hf, boxes_inv, im_scales def im_detect_bbox_scale( model, im, scale, max_size, box_proposals=None, hflip=False ): """Computes bbox detections at the given scale. Returns predictions in the original image space. """ # Remember the original scale orig_scales = cfg.TEST.SCALES orig_max_size = cfg.TEST.MAX_SIZE # Perform detection at the given scale cfg.TEST.SCALES = (scale, ) cfg.TEST.MAX_SIZE = max_size if hflip: scores_scl, boxes_scl, _ = im_detect_bbox_hflip( model, im, box_proposals ) else: scores_scl, boxes_scl, _ = im_detect_bbox(model, im, box_proposals) # Restore the original scale cfg.TEST.SCALES = orig_scales cfg.TEST.MAX_SIZE = orig_max_size return scores_scl, boxes_scl def im_detect_bbox_aspect_ratio( model, im, aspect_ratio, box_proposals=None, hflip=False ): """Computes bbox detections at the given width-relative aspect ratio. Returns predictions in the original image space. """ # Compute predictions on the transformed image im_ar = image_utils.aspect_ratio_rel(im, aspect_ratio) if not cfg.MODEL.FASTER_RCNN: box_proposals_ar = box_utils.aspect_ratio(box_proposals, aspect_ratio) else: box_proposals_ar = None if hflip: scores_ar, boxes_ar, _ = im_detect_bbox_hflip( model, im_ar, box_proposals_ar ) else: scores_ar, boxes_ar, _ = im_detect_bbox(model, im_ar, box_proposals_ar) # Invert the detected boxes boxes_inv = box_utils.aspect_ratio(boxes_ar, 1.0 / aspect_ratio) return scores_ar, boxes_inv def im_detect_mask(model, im_scales, boxes): """Infer instance segmentation masks. This function must be called after im_detect_bbox as it assumes that the Caffe2 workspace is already populated with the necessary blobs. Arguments: model (DetectionModelHelper): the detection model to use im_scales (list): image blob scales as returned by im_detect_bbox boxes (ndarray): R x 4 array of bounding box detections (e.g., as returned by im_detect_bbox) Returns: pred_masks (ndarray): R x K x M x M array of class specific soft masks output by the network (must be processed by segm_results to convert into hard masks in the original image coordinate space) """ assert len(im_scales) == 1, \ 'Only single-image / single-scale batch implemented' M_HEIGHT = cfg.MRCNN.RESOLUTION_H M_WIDTH = cfg.MRCNN.RESOLUTION_W if boxes.shape[0] == 0: pred_masks = np.zeros((0, M, M), np.float32) return pred_masks inputs = {'mask_rois': _get_rois_blob(boxes, im_scales)} # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS: _add_multilevel_rois_for_test(inputs, 'mask_rois') for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.mask_net.Proto().name) # Fetch masks pred_global_masks = workspace.FetchBlob( core.ScopedName('mask_fcn_global_probs') ).squeeze() pred_char_masks = workspace.FetchBlob( core.ScopedName('mask_fcn_char_probs') ).squeeze() # pred_char_boxes = workspace.FetchBlob( # core.ScopedName('mask_fcn_charbox_pred') # ).squeeze() pred_global_masks = pred_global_masks.reshape([-1, 1, M_HEIGHT, M_WIDTH]) pred_char_masks = pred_char_masks.reshape([-1, M_HEIGHT, M_WIDTH, 37]) pred_char_masks = pred_char_masks.transpose([0,3,1,2]) # pred_char_boxes = pred_char_boxes.reshape([-1, 4, M_HEIGHT, M_WIDTH]) return pred_global_masks, pred_char_masks, None def im_detect_mask_aug(model, im, boxes): """Performs mask detection with test-time augmentations. Arguments: model (DetectionModelHelper): the detection model to use im (ndarray): BGR image to test boxes (ndarray): R x 4 array of bounding boxes Returns: masks (ndarray): R x K x M x M array of class specific soft masks """ assert not cfg.TEST.MASK_AUG.SCALE_SIZE_DEP, \ 'Size dependent scaling not implemented' # Collect masks computed under different transformations global_masks_ts = [] char_masks_ts = [] char_boxes_ts = [] # Compute masks for the original image (identity transform) im_scales_i = im_conv_body_only(model, im) global_masks_i, char_masks_i, char_boxes_i = im_detect_mask(model, im_scales_i, boxes) global_masks_ts.append(global_masks_i) char_masks_ts.append(char_masks_i) char_boxes_ts.append(char_boxes_i) # Perform mask detection on the horizontally flipped image if cfg.TEST.MASK_AUG.H_FLIP: global_masks_hf, char_masks_hf, char_boxes_hf = im_detect_mask_hflip(model, im, boxes) global_masks_ts.append(global_masks_hf) char_masks_ts.append(char_masks_hf) char_boxes_ts.append(char_boxes_hf) # Compute detections at different scales for scale in cfg.TEST.MASK_AUG.SCALES: max_size = cfg.TEST.MASK_AUG.MAX_SIZE global_masks_scl, char_masks_scl, char_boxes_scl = im_detect_mask_scale(model, im, scale, max_size, boxes) global_masks_ts.append(global_masks_scl) char_masks_ts.append(char_masks_scl) char_boxes_ts.append(char_boxes_scl) if cfg.TEST.MASK_AUG.SCALE_H_FLIP: global_masks_scl_hf, char_masks_scl_hf, char_boxes_scl_hf = im_detect_mask_scale( model, im, scale, max_size, boxes, hflip=True ) global_masks_ts.append(global_masks_scl_hf) char_masks_ts.append(char_masks_scl_hf) char_boxes_ts.append(char_boxes_scl_hf) # Compute masks at different aspect ratios for aspect_ratio in cfg.TEST.MASK_AUG.ASPECT_RATIOS: global_masks_ar, char_masks_ar, char_boxes_ar = im_detect_mask_aspect_ratio(model, im, aspect_ratio, boxes) global_masks_ts.append(global_masks_ar) char_masks_ts.append(char_masks_ar) char_boxes_ts.append(char_boxes_ar) if cfg.TEST.MASK_AUG.ASPECT_RATIO_H_FLIP: global_masks_ar_hf, char_masks_ar_hf, char_boxes_ar_hf = im_detect_mask_aspect_ratio( model, im, aspect_ratio, boxes, hflip=True ) global_masks_ts.append(global_masks_ar_hf) char_masks_ts.append(char_masks_ar_hf) char_boxes_ts.append(char_boxes_ar_hf) # Combine the predicted soft masks if cfg.TEST.MASK_AUG.HEUR == 'SOFT_AVG': global_masks_c = np.mean(global_masks_ts, axis=0) char_masks_c = np.mean(char_masks_ts, axis=0) # char_boxes_c = np.mean(char_boxes_ts, axis=0) elif cfg.TEST.MASK_AUG.HEUR == 'SOFT_MAX': global_masks_c = np.amax(global_masks_ts, axis=0) char_masks_c = np.amax(char_masks_ts, axis=0) # char_boxes_c = np.amax(char_boxes_ts, axis=0) elif cfg.TEST.MASK_AUG.HEUR == 'LOGIT_AVG': def logit(y): return -1.0 * np.log((1.0 - y) / np.maximum(y, 1e-20)) global_logit_masks = [logit(y) for y in global_masks_ts] global_logit_masks = np.mean(global_logit_masks, axis=0) global_masks_c = 1.0 / (1.0 + np.exp(-global_logit_masks)) char_logit_masks = [logit(y) for y in char_masks_ts] char_logit_masks = np.mean(char_logit_masks, axis=0) char_masks_c = 1.0 / (1.0 + np.exp(-char_logit_masks)) # char_logit_boxes = [logit(y) for y in char_boxes_ts] # char_logit_boxes = np.mean(char_logit_boxes, axis=0) # char_boxes_c = 1.0 / (1.0 + np.exp(-char_logit_boxes)) else: raise NotImplementedError( 'Heuristic {} not supported'.format(cfg.TEST.MASK_AUG.HEUR) ) return global_masks_c, char_masks_c, None def im_detect_mask_hflip(model, im, boxes): """Performs mask detection on the horizontally flipped image. Function signature is the same as for im_detect_mask_aug. """ # Compute the masks for the flipped image im_hf = im[:, ::-1, :] boxes_hf = box_utils.flip_boxes(boxes, im.shape[1]) im_scales = im_conv_body_only(model, im_hf) global_masks_hf, char_masks_hf, char_boxes_hf = im_detect_mask(model, im_scales, boxes_hf) # Invert the predicted soft masks global_masks_inv = global_masks_hf[:, :, :, ::-1] # char_masks_inv = char_masks_hf[:, :, :, ::-1] # char_boxes_inv = char_boxes_hf[:, :, :, ::-1] return global_masks_inv, char_masks_inv, None def im_detect_mask_scale(model, im, scale, max_size, boxes, hflip=False): """Computes masks at the given scale.""" # Remember the original scale orig_scales = cfg.TEST.SCALES orig_max_size = cfg.TEST.MAX_SIZE # Perform mask detection at the given scale cfg.TEST.SCALES = (scale, ) cfg.TEST.MAX_SIZE = max_size if hflip: global_masks_scl, char_masks_scl, char_boxes_scl = im_detect_mask_hflip(model, im, boxes) else: im_scales = im_conv_body_only(model, im) global_masks_scl, char_masks_scl, char_boxes_scl = im_detect_mask(model, im_scales, boxes) # Restore the original scale cfg.TEST.SCALES = orig_scales cfg.TEST.MAX_SIZE = orig_max_size return global_masks_scl, char_masks_scl, None def im_detect_mask_aspect_ratio(model, im, aspect_ratio, boxes, hflip=False): """Computes mask detections at the given width-relative aspect ratio.""" # Perform mask detection on the transformed image im_ar = image_utils.aspect_ratio_rel(im, aspect_ratio) # boxes_ar = box_utils.aspect_ratio(boxes, aspect_ratio) if hflip: global_masks_ar, char_masks_ar, char_boxes_ar = im_detect_mask_hflip(model, im_ar, None) else: im_scales = im_conv_body_only(model, im_ar) global_masks_ar, char_masks_ar, char_boxes_ar = im_detect_mask(model, im_scales, None) return global_masks_ar, char_masks_ar, None def im_detect_keypoints(model, im_scales, boxes): """Infer instance keypoint poses. This function must be called after im_detect_bbox as it assumes that the Caffe2 workspace is already populated with the necessary blobs. Arguments: model (DetectionModelHelper): the detection model to use im_scales (list): image blob scales as returned by im_detect_bbox boxes (ndarray): R x 4 array of bounding box detections (e.g., as returned by im_detect_bbox) Returns: pred_heatmaps (ndarray): R x J x M x M array of keypoint location logits (softmax inputs) for each of the J keypoint types output by the network (must be processed by keypoint_results to convert into point predictions in the original image coordinate space) """ assert len(im_scales) == 1, \ 'Only single-image / single-scale batch implemented' M = cfg.KRCNN.HEATMAP_SIZE if boxes.shape[0] == 0: pred_heatmaps = np.zeros((0, cfg.KRCNN.NUM_KEYPOINTS, M, M), np.float32) return pred_heatmaps inputs = {'keypoint_rois': _get_rois_blob(boxes, im_scales)} # Add multi-level rois for FPN if cfg.FPN.MULTILEVEL_ROIS: _add_multilevel_rois_for_test(inputs, 'keypoint_rois') for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.keypoint_net.Proto().name) pred_heatmaps = workspace.FetchBlob(core.ScopedName('kps_score')).squeeze() # In case of 1 if pred_heatmaps.ndim == 3: pred_heatmaps = np.expand_dims(pred_heatmaps, axis=0) return pred_heatmaps def im_detect_keypoints_aug(model, im, boxes): """Computes keypoint predictions with test-time augmentations. Arguments: model (DetectionModelHelper): the detection model to use im (ndarray): BGR image to test boxes (ndarray): R x 4 array of bounding boxes Returns: heatmaps (ndarray): R x J x M x M array of keypoint location logits """ # Collect heatmaps predicted under different transformations heatmaps_ts = [] # Tag predictions computed under downscaling and upscaling transformations ds_ts = [] us_ts = [] def add_heatmaps_t(heatmaps_t, ds_t=False, us_t=False): heatmaps_ts.append(heatmaps_t) ds_ts.append(ds_t) us_ts.append(us_t) # Compute the heatmaps for the original image (identity transform) im_scales = im_conv_body_only(model, im) heatmaps_i = im_detect_keypoints(model, im_scales, boxes) add_heatmaps_t(heatmaps_i) # Perform keypoints detection on the horizontally flipped image if cfg.TEST.KPS_AUG.H_FLIP: heatmaps_hf = im_detect_keypoints_hflip(model, im, boxes) add_heatmaps_t(heatmaps_hf) # Compute detections at different scales for scale in cfg.TEST.KPS_AUG.SCALES: ds_scl = scale < cfg.TEST.SCALES[0] us_scl = scale > cfg.TEST.SCALES[0] heatmaps_scl = im_detect_keypoints_scale( model, im, scale, cfg.TEST.KPS_AUG.MAX_SIZE, boxes ) add_heatmaps_t(heatmaps_scl, ds_scl, us_scl) if cfg.TEST.KPS_AUG.SCALE_H_FLIP: heatmaps_scl_hf = im_detect_keypoints_scale( model, im, scale, cfg.TEST.KPS_AUG.MAX_SIZE, boxes, hflip=True ) add_heatmaps_t(heatmaps_scl_hf, ds_scl, us_scl) # Compute keypoints at different aspect ratios for aspect_ratio in cfg.TEST.KPS_AUG.ASPECT_RATIOS: heatmaps_ar = im_detect_keypoints_aspect_ratio( model, im, aspect_ratio, boxes ) add_heatmaps_t(heatmaps_ar) if cfg.TEST.KPS_AUG.ASPECT_RATIO_H_FLIP: heatmaps_ar_hf = im_detect_keypoints_aspect_ratio( model, im, aspect_ratio, boxes, hflip=True ) add_heatmaps_t(heatmaps_ar_hf) # Select the heuristic function for combining the heatmaps if cfg.TEST.KPS_AUG.HEUR == 'HM_AVG': np_f = np.mean elif cfg.TEST.KPS_AUG.HEUR == 'HM_MAX': np_f = np.amax else: raise NotImplementedError( 'Heuristic {} not supported'.format(cfg.TEST.KPS_AUG.HEUR) ) def heur_f(hms_ts): return np_f(hms_ts, axis=0) # Combine the heatmaps if cfg.TEST.KPS_AUG.SCALE_SIZE_DEP: heatmaps_c = combine_heatmaps_size_dep( heatmaps_ts, ds_ts, us_ts, boxes, heur_f ) else: heatmaps_c = heur_f(heatmaps_ts) return heatmaps_c def im_detect_keypoints_hflip(model, im, boxes): """Computes keypoint predictions on the horizontally flipped image. Function signature is the same as for im_detect_keypoints_aug. """ # Compute keypoints for the flipped image im_hf = im[:, ::-1, :] boxes_hf = box_utils.flip_boxes(boxes, im.shape[1]) im_scales = im_conv_body_only(model, im_hf) heatmaps_hf = im_detect_keypoints(model, im_scales, boxes_hf) # Invert the predicted keypoints heatmaps_inv = keypoint_utils.flip_heatmaps(heatmaps_hf) return heatmaps_inv def im_detect_keypoints_scale(model, im, scale, max_size, boxes, hflip=False): """Computes keypoint predictions at the given scale.""" # Store the original scale orig_scales = cfg.TEST.SCALES orig_max_size = cfg.TEST.MAX_SIZE # Perform detection at the given scale cfg.TEST.SCALES = (scale, ) cfg.TEST.MAX_SIZE = max_size if hflip: heatmaps_scl = im_detect_keypoints_hflip(model, im, boxes) else: im_scales = im_conv_body_only(model, im) heatmaps_scl = im_detect_keypoints(model, im_scales, boxes) # Restore the original scale cfg.TEST.SCALES = orig_scales cfg.TEST.MAX_SIZE = orig_max_size return heatmaps_scl def im_detect_keypoints_aspect_ratio( model, im, aspect_ratio, boxes, hflip=False ): """Detects keypoints at the given width-relative aspect ratio.""" # Perform keypoint detectionon the transformed image im_ar = image_utils.aspect_ratio_rel(im, aspect_ratio) boxes_ar = box_utils.aspect_ratio(boxes, aspect_ratio) if hflip: heatmaps_ar = im_detect_keypoints_hflip(model, im_ar, boxes_ar) else: im_scales = im_conv_body_only(model, im_ar) heatmaps_ar = im_detect_keypoints(model, im_scales, boxes_ar) return heatmaps_ar def combine_heatmaps_size_dep(hms_ts, ds_ts, us_ts, boxes, heur_f): """Combines heatmaps while taking object sizes into account.""" assert len(hms_ts) == len(ds_ts) and len(ds_ts) == len(us_ts), \ 'All sets of hms must be tagged with downscaling and upscaling flags' # Classify objects into small+medium and large based on their box areas areas = box_utils.boxes_area(boxes) sm_objs = areas < cfg.TEST.KPS_AUG.AREA_TH l_objs = areas >= cfg.TEST.KPS_AUG.AREA_TH # Combine heatmaps computed under different transformations for each object hms_c = np.zeros_like(hms_ts[0]) for i in range(hms_c.shape[0]): hms_to_combine = [] for hms_t, ds_t, us_t in zip(hms_ts, ds_ts, us_ts): # Discard downscaling predictions for small and medium objects if sm_objs[i] and ds_t: continue # Discard upscaling predictions for large objects if l_objs[i] and us_t: continue hms_to_combine.append(hms_t[i]) hms_c[i] = heur_f(hms_to_combine) return hms_c def box_results_with_nms_and_limit(scores, boxes, thresh=0.0001): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). `boxes` has shape (#detections, 4 * #classes), where each row represents a list of predicted bounding boxes for each of the object classes in the dataset (including the background class). The detections in each row originate from the same object proposal. `scores` has shape (#detection, #classes), where each row represents a list of object detection confidence scores for each of the object classes in the dataset (including the background class). `scores[i, j]`` corresponds to the box at `boxes[i, j * 4:(j + 1) * 4]`. """ num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(num_classes)] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class for j in range(1, num_classes): inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype( np.float32, copy=False ) if cfg.TEST.SOFT_NMS.ENABLED: nms_dets, _ = box_utils.soft_nms( dets_j, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=cfg.TEST.NMS, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD ) else: keep = box_utils.nms(dets_j, cfg.TEST.NMS) nms_dets = dets_j[keep, :] # Refine the post-NMS boxes using bounding-box voting if cfg.TEST.BBOX_VOTE.ENABLED: nms_dets = box_utils.box_voting( nms_dets, dets_j, cfg.TEST.BBOX_VOTE.VOTE_TH, scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD ) cls_boxes[j] = nms_dets # Limit to max_per_image detections **over all classes** if cfg.TEST.DETECTIONS_PER_IM > 0: image_scores = np.hstack( [cls_boxes[j][:, -1] for j in range(1, num_classes)] ) if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] for j in range(1, num_classes): keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] cls_boxes[j] = cls_boxes[j][keep, :] im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)]) boxes = im_results[:, :-1] scores = im_results[:, -1] return scores, boxes, cls_boxes def segm_results(cls_boxes, masks, ref_boxes, im_h, im_w): num_classes = cfg.MODEL.NUM_CLASSES cls_segms = [[] for _ in range(num_classes)] mask_ind = 0 # To work around an issue with cv2.resize (it seems to automatically pad # with repeated border values), we manually zero-pad the masks by 1 pixel # prior to resizing back to the original image resolution. This prevents # "top hat" artifacts. We therefore need to expand the reference boxes by an # appropriate factor. M_HEIGHT = cfg.MRCNN.RESOLUTION_H M_WIDTH = cfg.MRCNN.RESOLUTION_W scale_h = (M_HEIGHT + 2.0) / M_HEIGHT scale_w = (M_WIDTH + 2.0) / M_WIDTH ref_boxes = box_utils.expand_boxes_hw(ref_boxes, scale_h, scale_w) ref_boxes = ref_boxes.astype(np.int32) padded_mask = np.zeros((M_HEIGHT + 2, M_WIDTH + 2), dtype=np.float32) # skip j = 0, because it's the background class for j in range(1, num_classes): segms = [] for _ in range(cls_boxes[j].shape[0]): # if cfg.MRCNN.CLS_SPECIFIC_MASK: # padded_mask[1:-1, 1:-1] = masks[mask_ind, j, :, :] # else: # padded_mask[1:-1, 1:-1] = masks[mask_ind, 0, :, :] padded_mask[1:-1, 1:-1] = masks[mask_ind, 0, :, :] ref_box = ref_boxes[mask_ind, :] w = ref_box[2] - ref_box[0] + 1 h = ref_box[3] - ref_box[1] + 1 w = np.maximum(w, 1) h = np.maximum(h, 1) mask = cv2.resize(padded_mask, (w, h)) mask = np.array(mask > cfg.MRCNN.THRESH_BINARIZE, dtype=np.uint8) im_mask = np.zeros((im_h, im_w), dtype=np.uint8) x_0 = max(ref_box[0], 0) x_1 = min(ref_box[2] + 1, im_w) y_0 = max(ref_box[1], 0) y_1 = min(ref_box[3] + 1, im_h) im_mask[y_0:y_1, x_0:x_1] = mask[ (y_0 - ref_box[1]):(y_1 - ref_box[1]), (x_0 - ref_box[0]):(x_1 - ref_box[0]) ] # Get RLE encoding used by the COCO evaluation API rle = mask_util.encode( np.array(im_mask[:, :, np.newaxis], order='F') )[0] segms.append(rle) mask_ind += 1 cls_segms[j] = segms assert mask_ind == masks.shape[0] return cls_segms def getstr(seg, charboxes, box_w, box_h, thresh_s=0.15, is_lanms=True, weight_wh=False): bg_map = (1 - seg[0, :, :]) # bg_map = cv2.GaussianBlur(bg_map, (3, 3), sigmaX=3) ret, thresh = cv2.threshold(bg_map, 0.15, 1, cv2.THRESH_BINARY) # cv2.imwrite('./bin.jpg', (thresh*255).astype(np.uint8)) kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(3, 3)) eroded = cv2.erode(thresh,kernel) # cv2.imwrite('./eroded.jpg', (eroded*255).astype(np.uint8)) # raw_input() eroded = eroded.reshape((-1, 1)) mask_index = np.argmax(seg, axis=0) mask_index = mask_index.astype(np.uint8).reshape((-1, 1)) charboxes = charboxes.transpose([1, 2, 0]) ## 4*h*w -> h*w*4 ## trans charboxes to x1, y1, x2, y2 charboxes = dis2xyxy(charboxes, weight_wh) scores = seg.transpose([1, 2, 0]).reshape((-1, 37)) keep_pixels = np.where(eroded ==1)[0] mask_index = mask_index[keep_pixels] scores = scores[keep_pixels] charboxes = charboxes[keep_pixels] pos_index = np.where(mask_index > 0)[0] mask_index = mask_index[pos_index] ## N*1 scores = scores[pos_index] ## N*37 charboxes = charboxes[pos_index] ## N*4 all_charboxes = [] all_labels = [] for i in range(1, 37): m_idx = np.where(mask_index == i)[0] s_idx = np.where(scores[:, i].copy()[m_idx] > thresh_s)[0] if s_idx.size >= 1: ## nms temp_score = scores[:, i].copy()[m_idx][s_idx] temp_boxes = charboxes[m_idx][s_idx] if is_lanms: dets = np.hstack((box2poly(temp_boxes), temp_score[:, np.newaxis])).astype(np.float32, copy=False) res_boxes = lanms.merge_quadrangle_n9(dets, 0.3) for idx, box in enumerate(res_boxes): mask = np.zeros_like(seg[0, :, :], dtype=np.uint8) box = shrink_single_box(box[:8]) cv2.fillPoly(mask, box.reshape((-1, 4, 2)).astype(np.int32), 1) res_boxes[idx, 8] = cv2.mean(seg[i, :, :], mask)[0] nms_dets = np.hstack((poly2box(res_boxes[:, :8]), res_boxes[:, -1].reshape((-1, 1)))) else: dets = np.hstack((temp_boxes, temp_score[:, np.newaxis])).astype(np.float32, copy=False) keep = box_utils.nms(dets, 0.3) nms_dets = dets[keep, :] all_charboxes.append(nms_dets) all_labels += [i]*(nms_dets.shape[0]) if len(all_charboxes) > 0: all_charboxes = np.vstack(all_charboxes) all_labels = np.array(all_labels).reshape((-1, 1)) ## another nms with high nms thresh to filter out some boxes with high overlap and diferent classes keep = box_utils.nms(all_charboxes, 0.6) all_labels = all_labels[keep] all_charboxes = all_charboxes[keep] chars = [] for i in range(all_charboxes.shape[0]): char = {} char['x'] = (all_charboxes[i][0] + all_charboxes[i][2])/2.0 char['y'] = (all_charboxes[i][1] + all_charboxes[i][3])/2.0 char['s'] = all_charboxes[i][4] char['c'] = num2char(all_labels[i]) char['w'] = (all_charboxes[i][2] - all_charboxes[i][0]) char['h'] = (all_charboxes[i][3] - all_charboxes[i][1]) if char['w'] > 3 and char['h'] > 3: ## shrink char box sx1, sy1, sx2, sy2 = shrink_rect_with_ratio([char['x'], char['y'], char['w'], char['h']], 0.25) ## get mean cs = seg[1:, sy1:sy2, sx1:sx2].reshape((36, -1)).mean(axis=1).reshape((-1, 1)) char['cs'] = cs chars.append(char) chars = sorted(chars, key = lambda x: x['x']) string = '' score = 0 scores = [] css = [] for char in chars: string = string + char['c'] score += char['s'] scores.append(char['s']) css.append(char['cs']) if len(chars) > 0: score = score / len(chars) return string, score, scores, all_charboxes, np.hstack((css)) else: return '', 0, [], np.zeros((0, 5)), None def getstr_grid(seg, box_w, box_h): pos = 255 - (seg[0]*255).astype(np.uint8) mask_index = np.argmax(seg, axis=0) mask_index = mask_index.astype(np.uint8) pos = pos.astype(np.uint8) # seg = seg*255 # seg = seg.astype(np.uint8) ## resize pos and mask_index # pos = np.array(Image.fromarray(pos).resize((box_w, box_h))) # seg_resize = np.zeros((seg.shape[0], box_h, box_w)) # for i in range(seg.shape[0]): # seg_resize[i,:,:] = np.array(Image.fromarray(seg[i,:,:]).resize((box_w, box_h))) # mask_index = np.array(Image.fromarray(mask_index).resize((box_w, box_h), Image.NEAREST)) # string, score = seg2text(pos, mask_index, seg_resize) string, score, rec_scores = seg2text(pos, mask_index, seg) return string, score, rec_scores def shrink_rect_with_ratio(rect, ratio): ## rect:[xc, yc, w, h] xc, yc, w, h = rect[0], rect[1], rect[2], rect[3] x1, y1, x2, y2 = int(xc - w*ratio), int(yc - h*ratio), int(xc + w*ratio), int(yc + h*ratio) ## keep the area of the shrinked box no less than 1 if x2 == x1: x2 = x1 + 1 if y2 == y1: y2 = y1 + 1 return x1, y1, x2, y2 def shrink_single_box(poly): xc = (poly[0] + poly[2])/2.0 yc = (poly[1] + poly[7])/2.0 w_ = (poly[2] - poly[0])/4.0 h_ = (poly[7] - poly[1])/4.0 return np.array([xc-w_, yc-h_, xc+w_, yc-h_, xc+w_, yc+h_, xc-w_, yc+h_]) def dis2xyxy(boxes, weight_wh): h, w = boxes.shape[0], boxes.shape[1] y_index, x_index = np.where(np.ones((h, w)) > 0) boxes = boxes.reshape((-1, 4)) if weight_wh: top = (y_index - boxes[:, 0]*h).reshape((-1, 1)) right = (x_index + boxes[:, 1]*h).reshape((-1, 1)) bottom = (y_index + boxes[:, 2]*h).reshape((-1, 1)) left = (x_index - boxes[:, 3]*h).reshape((-1, 1)) else: top = (y_index - boxes[:, 0]*h).reshape((-1, 1)) right = (x_index + boxes[:, 1]*w).reshape((-1, 1)) bottom = (y_index + boxes[:, 2]*h).reshape((-1, 1)) left = (x_index - boxes[:, 3]*w).reshape((-1, 1)) return np.hstack((left, top, right, bottom)) def box2poly(boxes): x1 = boxes[:, 0].copy().reshape((-1, 1)) y1 = boxes[:, 1].copy().reshape((-1, 1)) x2 = boxes[:, 2].copy().reshape((-1, 1)) y2 = boxes[:, 3].copy().reshape((-1, 1)) return np.hstack((x1, y1, x2, y1, x2, y2, x1, y2)) def poly2box(polys): x1 = polys[:, :8:2].min(axis=1).reshape((-1, 1)) x2 = polys[:, :8:2].max(axis=1).reshape((-1, 1)) y1 = polys[:, 1:8:2].min(axis=1).reshape((-1, 1)) y2 = polys[:, 1:8:2].max(axis=1).reshape((-1, 1)) return np.hstack((x1, y1, x2, y2)) # def dis2xyxy(boxes): # h, w = boxes.shape[0], boxes.shape[1] # y_index, x_index = np.where(np.ones((h, w)) > 0) # for i in range(h): # for j in range(w): # top, right, bottom, left = boxes[i][j][0], boxes[i][j][1], boxes[i][j][2], boxes[i][j][3] # boxes[i][j] = np.array([j-left*w, i-top*h, j+right*w, i+bottom*h]) # return boxes # def getstr(seg, box_w, box_h): # pos = 255 - (seg[0]*255).astype(np.uint8) # mask_index = np.argmax(seg, axis=0) # mask_index = mask_index.astype(np.uint8) # pos = pos.astype(np.uint8) # seg = seg*255 # seg = seg.astype(np.uint8) # ## resize pos and mask_index # pos = np.array(Image.fromarray(pos).resize((box_w, box_h))) # seg_resize = np.zeros((seg.shape[0], box_h, box_w)) # for i in range(seg.shape[0]): # seg_resize[i,:,:] = np.array(Image.fromarray(seg[i,:,:]).resize((box_w, box_h))) # mask_index = np.array(Image.fromarray(mask_index).resize((box_w, box_h), Image.NEAREST)) # string, score = seg2text(pos, mask_index, seg_resize) # return string, score def char2num(char): if char in '0123456789': num = ord(char) - ord('0') + 1 elif char in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ': num = ord(char.lower()) - ord('a') + 11 else: print('error symbol') exit() return num def num2char(num): if num >=1 and num <=10: char = chr(ord('0') + num - 1) elif num > 10 and num <= 36: char = chr(ord('a') + num - 11) else: print('error number:%d'%(num)) exit() return char def seg2text(gray, mask, seg): ## input numpy img_h, img_w = gray.shape ret, thresh = cv2.threshold(gray, 192, 255, cv2.THRESH_BINARY) im2, contours, hierarchy = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) chars = [] scores = [] for i in range(len(contours)): char = {} temp = np.zeros((img_h, img_w)).astype(np.uint8) cv2.drawContours(temp, [contours[i]], 0, (255), -1) x, y, w, h = cv2.boundingRect(contours[i]) c_x, c_y = x + w/2, y + h/2 # tmax = 0 # sym = -1 # score = 0 # pixs = mask[temp == 255] # seg_contour = seg[:, temp == 255] # seg_contour = seg_contour.astype(np.float32) / 255 # for j in range(1, 37): # tnum = (pixs == j).sum() # if tnum > tmax: # tmax = tnum # sym = j # if sym == -1: # continue # contour_score = seg_contour[sym,:].sum() / pixs.size regions = seg[1:, temp ==255].reshape((36, -1)) cs = np.mean(regions, axis=1) sym = num2char(np.argmax(cs.reshape((-1))) + 1) char['x'] = c_x char['y'] = c_y char['s'] = sym char['cs'] = cs.reshape((-1, 1)) scores.append(np.max(char['cs'], axis=0)[0]) chars.append(char) chars = sorted(chars, key = lambda x: x['x']) string = '' css = [] for char in chars: string = string + char['s'] css.append(char['cs']) if len(scores)>0: score = sum(scores) / len(scores) else: score = 0.00 if not css: css=[0.] return string, score, np.hstack(css) def get_tight_rect(points, start_x, start_y, image_height, image_width, scale): points = list(points) ps = sorted(points,key = lambda x:x[0]) if ps[1][1] > ps[0][1]: px1 = ps[0][0] * scale + start_x py1 = ps[0][1] * scale + start_y px4 = ps[1][0] * scale + start_x py4 = ps[1][1] * scale + start_y else: px1 = ps[1][0] * scale + start_x py1 = ps[1][1] * scale + start_y px4 = ps[0][0] * scale + start_x py4 = ps[0][1] * scale + start_y if ps[3][1] > ps[2][1]: px2 = ps[2][0] * scale + start_x py2 = ps[2][1] * scale + start_y px3 = ps[3][0] * scale + start_x py3 = ps[3][1] * scale + start_y else: px2 = ps[3][0] * scale + start_x py2 = ps[3][1] * scale + start_y px3 = ps[2][0] * scale + start_x py3 = ps[2][1] * scale + start_y if px1<0: px1=1 if px1>image_width: px1 = image_width - 1 if px2<0: px2=1 if px2>image_width: px2 = image_width - 1 if px3<0: px3=1 if px3>image_width: px3 = image_width - 1 if px4<0: px4=1 if px4>image_width: px4 = image_width - 1 if py1<0: py1=1 if py1>image_height: py1 = image_height - 1 if py2<0: py2=1 if py2>image_height: py2 = image_height - 1 if py3<0: py3=1 if py3>image_height: py3 = image_height - 1 if py4<0: py4=1 if py4>image_height: py4 = image_height - 1 return [px1, py1, px2, py2, px3, py3, px4, py4] def get_polygon(points, start_x, start_y, image_height, image_width, scale): points = list(points) # ps = sorted(points,key = lambda x:x[0]) polygon = [] for i in range(len(points)): point = points[i] x = point[0][0] * scale + start_x y = point[0][1] * scale + start_y polygon.append(x) polygon.append(y) return polygon def segm_char_results(cls_boxes, masks, ref_boxes, im_h, im_w): num_classes = 37 char_strs = [[] for _ in range(cls_boxes[1].shape[0])] char_strs_scores = [[] for _ in range(cls_boxes[1].shape[0])] M_HEIGHT = cfg.MRCNN.RESOLUTION_H M_WIDTH = cfg.MRCNN.RESOLUTION_W for k in range(cls_boxes[1].shape[0]): text, rec_score = getstr(masks[k,:,:,:], M_HEIGHT, M_WIDTH) char_strs.append(text) char_strs_scores.append(rec_score) # print(text, rec_score) return char_strs, char_strs_scores def keypoint_results(cls_boxes, pred_heatmaps, ref_boxes): num_classes = cfg.MODEL.NUM_CLASSES cls_keyps = [[] for _ in range(num_classes)] person_idx = keypoint_utils.get_person_class_index() xy_preds = keypoint_utils.heatmaps_to_keypoints(pred_heatmaps, ref_boxes) # NMS OKS if cfg.KRCNN.NMS_OKS: keep = keypoint_utils.nms_oks(xy_preds, ref_boxes, 0.3) xy_preds = xy_preds[keep, :, :] ref_boxes = ref_boxes[keep, :] pred_heatmaps = pred_heatmaps[keep, :, :, :] cls_boxes[person_idx] = cls_boxes[person_idx][keep, :] kps = [xy_preds[i] for i in range(xy_preds.shape[0])] cls_keyps[person_idx] = kps return cls_keyps def _get_image_blob(im): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (ndarray): array of image scales (relative to im) used in the image pyramid """ processed_ims, im_scale_factors = blob_utils.prep_im_for_blob( im, cfg.PIXEL_MEANS, cfg.TEST.SCALES, cfg.TEST.MAX_SIZE ) blob = blob_utils.im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors) def _get_rois_blob(im_rois, im_scale_factors): """Converts RoIs into network inputs. Arguments: im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates im_scale_factors (list): scale factors as returned by _get_image_blob Returns: blob (ndarray): R x 5 matrix of RoIs in the image pyramid with columns [level, x1, y1, x2, y2] """ rois, levels = _project_im_rois(im_rois, im_scale_factors) rois_blob = np.hstack((levels, rois)) return rois_blob.astype(np.float32, copy=False) def _project_im_rois(im_rois, scales): """Project image RoIs into the image pyramid built by _get_image_blob. Arguments: im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates scales (list): scale factors as returned by _get_image_blob Returns: rois (ndarray): R x 4 matrix of projected RoI coordinates levels (ndarray): image pyramid levels used by each projected RoI """ im_rois = im_rois.astype(np.float, copy=False) if len(scales) > 1: widths = im_rois[:, 2] - im_rois[:, 0] + 1 heights = im_rois[:, 3] - im_rois[:, 1] + 1 areas = widths * heights scaled_areas = areas[:, np.newaxis] * (scales[np.newaxis, :]**2) diff_areas = np.abs(scaled_areas - 224 * 224) levels = diff_areas.argmin(axis=1)[:, np.newaxis] else: levels = np.zeros((im_rois.shape[0], 1), dtype=np.int) rois = im_rois * scales[levels] return rois, levels def _add_multilevel_rois_for_test(blobs, name): """Distributes a set of RoIs across FPN pyramid levels by creating new level specific RoI blobs. Arguments: blobs (dict): dictionary of blobs name (str): a key in 'blobs' identifying the source RoI blob Returns: [by ref] blobs (dict): new keys named by `name + 'fpn' + level` are added to dict each with a value that's an R_level x 5 ndarray of RoIs (see _get_rois_blob for format) """ lvl_min = cfg.FPN.ROI_MIN_LEVEL lvl_max = cfg.FPN.ROI_MAX_LEVEL lvls = fpn.map_rois_to_fpn_levels(blobs[name][:, 1:5], lvl_min, lvl_max) fpn.add_multilevel_roi_blobs( blobs, name, blobs[name], lvls, lvl_min, lvl_max ) def _get_blobs(im, rois): """Convert an image and RoIs within that image into network inputs.""" blobs = {} blobs['data'], im_scale_factors = _get_image_blob(im) if cfg.MODEL.FASTER_RCNN and rois is None: height, width = blobs['data'].shape[2], blobs['data'].shape[3] scale = im_scale_factors[0] blobs['im_info'] = np.array([[height, width, scale]], dtype=np.float32) if rois is not None: blobs['rois'] = _get_rois_blob(rois, im_scale_factors) return blobs, im_scale_factors