import zmMagik_helpers.utils as utils import zmMagik_helpers.globals as g import zmMagik_helpers.log as log import cv2 import numpy as np from shapely.geometry import Polygon import dateparser from datetime import datetime, timedelta import zmMagik_helpers.simpleyolo.simpleYolo as yolo from ctypes import * import re class IMAGE(Structure): _fields_ = [("w", c_int), ("h", c_int), ("c", c_int), ("data", POINTER(c_float))] class DetectYolo: def __init__(self,configPath=None, weightPath=None, labelsPath=None, kernel_fill=3): if g.args['gpu'] and not g.args['use_opencv_dnn_cuda']: utils.success_print('Using Darknet GPU model for YOLO') utils.success_print('If you run out of memory, please tweak yolo.cfg') if not g.args['use_opencv_dnn_cuda']: self.m = yolo.SimpleYolo(configPath=configPath, weightPath=weightPath, darknetLib=g.args['darknet_lib'], labelsPath=labelsPath, useGPU=True) else: utils.success_print('Using OpenCV model for YOLO') utils.success_print('If you run out of memory, please tweak yolo.cfg') self.net = cv2.dnn.readNetFromDarknet(configPath, weightPath) self.labels = open(labelsPath).read().strip().split("\n") np.random.seed(42) self.colors = np.random.randint( 0, 255, size=(len(self.labels), 3), dtype="uint8") self.kernel_fill = np.ones((kernel_fill,kernel_fill),np.uint8) if g.args['use_opencv_dnn_cuda'] and g.args['gpu']: (maj,minor,patch) = cv2.__version__.split('.') min_ver = int (maj+minor) if min_ver < 42: utils.fail_print('Not setting CUDA backend for OpenCV DNN') utils.dim_print ('You are using OpenCV version {} which does not support CUDA for DNNs. A minimum of 4.2 is required. See https://www.pyimagesearch.com/2020/02/03/how-to-use-opencvs-dnn-module-with-nvidia-gpus-cuda-and-cudnn/ on how to compile and install openCV 4.2'.format(cv2.__version__)) else: utils.success_print ('Setting CUDA backend for OpenCV. If you did not set your CUDA_ARCH_BIN correctly during OpenCV compilation, you will get errors during detection related to invalid device/make_policy') self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA) utils.success_print('YOLO initialized') def detect(self, frame, frame_b, frame_cnt, orig_fps, starttime, set_frames): relevant = False (H, W) = frame.shape[:2] frame_mask = np.zeros((H, W), dtype=np.uint8) boxes = [] confidences = [] labels = [] boxed_frame = frame.copy() if not g.args['gpu'] or g.args['use_opencv_dnn_cuda']: # we use OpenCV's optimized CPU or GPU code ln = self.net.getLayerNames() ln = [ln[i[0] - 1] for i in self.net.getUnconnectedOutLayers()] blob = cv2.dnn.blobFromImage( frame, 1 / 255.0, (416, 416), swapRB=True, crop=False) self.net.setInput(blob) layerOutputs = self.net.forward(ln) for output in layerOutputs: for detection in output: scores = detection[5:] classID = np.argmax(scores) confidence = scores[classID] label = self.labels[classID] if confidence > g.args['confidence']: r = re.compile(g.args['detectpattern']) if not re.match(r, label): #utils.dim_print('object "{}" does not match "{}"'.format(label, g.args['detectpattern'])) continue box = detection[0:4] * np.array([W, H, W, H]) (centerX, centerY, width, height) = box.astype("int") x = int(centerX - (width / 2)) y = int(centerY - (height / 2)) boxes.append([x, y, int(width), int(height)]) confidences.append(float(confidence)) labels.append(label) idxs = cv2.dnn.NMSBoxes(boxes, confidences, g.args["confidence"], 0.3) if len(idxs) > 0: # loop over the indexes we are keeping for i in idxs.flatten(): # extract the bounding box coordinates (x, y) = (boxes[i][0], boxes[i][1]) (width, height) = (boxes[i][2], boxes[i][3]) label = labels[i] confidence = confidences[i] pts = Polygon([[x,y], [x+width,y], [x+width, y+height], [x,y+height]]) if g.poly_mask is None or g.poly_mask.intersects(pts): relevant = True boxes.append([x, y, int(width), int(height)]) confidences.append(float(confidence)) labels.append(label) color = (255,0,0) cv2.rectangle(boxed_frame, (x, y), (x + width, y + height), color, 2) text = "{}: {:.2f}".format(label, confidence) cv2.putText(boxed_frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,0.5, color, 2) if g.args['drawboxes']: cv2.rectangle(frame_b, (x, y), (x + width, y + height), (255,255,255), 1) obj_info = { 'name': label, 'time':int(frame_cnt/orig_fps), 'frame': frame_cnt, 'location': ((x,y),(x+width, y+height)), 'confidence': '{:.4f}'.format(confidence) } # form text text = '{}: {}s, Frame: {}'.format(label, int(frame_cnt/orig_fps), frame_cnt) if starttime: st = dateparser.parse(starttime) #from_time = to_time - datetime.timedelta(hours = 1) # print (st) dt = st + timedelta(seconds=int(frame_cnt/orig_fps)) text = label + ':' +dt.strftime('%b %d, %I:%M%p') obj_info['time'] = text set_frames['frames'].append (obj_info) # work on displaying text properly text = text.upper() delta = 0 d_x = max (x-delta, 0) d_y = max (y-delta, 0) d_w = min (W, width+delta) d_h = min (H, height+delta) bsx, bsy, bex, bey = utils.write_text(frame=frame_b, text=text, x=d_x, y=d_y, W=W, H=H, adjust=True) # frame mask of text #cv2.rectangle(frame_mask, (bsx, bsy), (bex, bey), (255, 255, 255), cv2.FILLED) # frame mask of object cv2.rectangle(frame_mask, (d_x,d_y), (d_x+d_w, d_y+d_h), (255, 255, 255), cv2.FILLED) else: # darknet GPU code # we use darknet directly # if you haven't conmpiled darknet in gpu mode, you are going # to see terrible performance im = self.m.array_to_image(frame) detections = self.m.detect_image(im) boxes = [] confidences = [] labels =[] for detect in detections: (label, confidence, bbox) = detect if confidence > g.args['confidence']: r = re.compile(g.args['detectpattern']) if not re.match(r, label): # utils.dim_print('object "{}" does not match "{}"'.format(label, g.args['detectpattern'])) continue box = bbox (centerX, centerY, width, height) = box x = int(centerX - (width / 2)) y = int(centerY - (height / 2)) width = int(width) height = int(height) pts = Polygon([[x,y], [x+width,y], [x+width, y+height], [x,y+height]]) if g.poly_mask is None or g.poly_mask.intersects(pts): relevant = True boxes.append([x, y, width, height]) confidences.append(float(confidence)) labels.append(label) color = (255,0,0) cv2.rectangle(boxed_frame, (x, y), (x + width, y + height), color, 2) text = "{}: {:.2f}".format(label, confidence) cv2.putText(boxed_frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,0.5, color, 2) if g.args['drawboxes']: cv2.rectangle(frame_b, (x, y), (x + width, y + height), (255,255,255), 1) obj_info = { 'name': label, 'time':int(frame_cnt/orig_fps), 'frame': frame_cnt, 'location': ((x,y),(x+width, y+height)), 'confidence': '{:.4f}'.format(confidence) } # form text text = '{}: {}s, Frame: {}'.format(label, int(frame_cnt/orig_fps), frame_cnt) if starttime: st = dateparser.parse(starttime) #from_time = to_time - datetime.timedelta(hours = 1) # print (st) dt = st + timedelta(seconds=int(frame_cnt/orig_fps)) text = label + ':' +dt.strftime('%b %d, %I:%M%p') obj_info['time'] = text set_frames['frames'].append (obj_info) # work on displaying text properly text = text.upper() delta = 0 d_x = max (x-delta, 0) d_y = max (y-delta, 0) d_w = min (W, width+delta) d_h = min (H, height+delta) bsx, bsy, bex, bey = utils.write_text(frame=frame_b, text=text, x=d_x, y=d_y, W=W, H=H, adjust=True) # frame mask of text #cv2.rectangle(frame_mask, (bsx, bsy), (bex, bey), (255, 255, 255), cv2.FILLED) # frame mask of object cv2.rectangle(frame_mask, (d_x,d_y), (d_x+d_w, d_y+d_h), (255, 255, 255), cv2.FILLED) foreground_a = cv2.bitwise_and(frame,frame, mask=frame_mask) foreground_b = cv2.bitwise_and(frame_b,frame_b, mask=frame_mask) combined_fg= cv2.addWeighted(foreground_b, 0.5, foreground_a, 0.5,0) frame_mask_inv = cv2.bitwise_not(frame_mask) # blend frame with foreground a missing modified_frame_b = cv2.bitwise_and(frame_b, frame_b, mask=frame_mask_inv) merged_frame = cv2.add(modified_frame_b, combined_fg) # draw mask on blend frame cv2.polylines(merged_frame, [g.raw_poly_mask], True, (0,0,255), thickness=1) #return merged_frame, foreground_a, frame_mask, relevant return merged_frame, foreground_a, frame_mask, relevant, boxed_frame