"""Provides a basic hand detector in depth images. HandDetector provides interface for detecting hands in depth image, by using the center of mass. Copyright 2015 Markus Oberweger, ICG, Graz University of Technology <oberweger@icg.tugraz.at> This file is part of DeepPrior. DeepPrior is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. DeepPrior is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with DeepPrior. If not, see <http://www.gnu.org/licenses/>. """ import numpy import cv2 from scipy import stats, ndimage import pylab import matplotlib.pyplot as plt from data.transformations import rotatePoint2D, rotatePoints2D, rotatePoints3D, transformPoints2D, transformPoint2D __author__ = "Markus Oberweger <oberweger@icg.tugraz.at>" __copyright__ = "Copyright 2015, ICG, Graz University of Technology, Austria" __credits__ = ["Markus Oberweger"] __license__ = "GPL" __version__ = "1.0" __maintainer__ = "Markus Oberweger" __email__ = "oberweger@icg.tugraz.at" __status__ = "Development" class HandDetector(object): """ Detect hand based on simple heuristic, centered at Center of Mass """ RESIZE_BILINEAR = 0 RESIZE_CV2_NN = 1 RESIZE_CV2_LINEAR = 2 def __init__(self, dpt, fx, fy, importer=None, refineNet=None): """ Constructor :param dpt: depth image :param fx: camera focal lenght :param fy: camera focal lenght """ self.dpt = dpt self.maxDepth = min(6500, dpt.max()) self.minDepth = max(10, dpt.min()) # set values out of range to 0 self.dpt[self.dpt > self.maxDepth] = 0. self.dpt[self.dpt < self.minDepth] = 0. # camera settings self.fx = fx self.fy = fy # Optional refinement of CoM self.refineNet = refineNet self.importer = importer # depth resize method self.resizeMethod = self.RESIZE_CV2_NN @staticmethod def detectionModeToString(com, refineNet): """ Get string for detection method :param com: center of mass :param refineNet: CoM refinement :return: string """ if com is False and refineNet is False: cfg = 'gt' elif com is True and refineNet is False: cfg = 'com' elif com is True and refineNet is True: cfg = 'comref' else: raise NotImplementedError("com {}, refineNet {}".format(com, refineNet)) return cfg def calculateCoM(self, dpt): """ Calculate the center of mass :param dpt: depth image :return: (x,y,z) center of mass """ dc = dpt.copy() dc[dc < self.minDepth] = 0 dc[dc > self.maxDepth] = 0 cc = ndimage.measurements.center_of_mass(dc > 0) num = numpy.count_nonzero(dc) com = numpy.array((cc[1]*num, cc[0]*num, dc.sum()), numpy.float) if num == 0: return numpy.array((0, 0, 0), numpy.float) else: return com/num def checkImage(self, tol): """ Check if there is some content in the image :param tol: tolerance :return:True if image is contentful, otherwise false """ # print numpy.std(self.dpt) if numpy.std(self.dpt) < tol: return False else: return True def getNDValue(self): """ Get value of not defined depth value distances :return:value of not defined depth value """ if self.dpt[self.dpt < self.minDepth].shape[0] > self.dpt[self.dpt > self.maxDepth].shape[0]: return stats.mode(self.dpt[self.dpt < self.minDepth])[0][0] else: return stats.mode(self.dpt[self.dpt > self.maxDepth])[0][0] @staticmethod def bilinearResize(src, dsize, ndValue): """ Bilinear resizing with sparing out not defined parts of the depth map :param src: source depth map :param dsize: new size of resized depth map :param ndValue: value of not defined depth :return:resized depth map """ dst = numpy.zeros((dsize[1], dsize[0]), dtype=numpy.float32) x_ratio = float(src.shape[1] - 1) / dst.shape[1] y_ratio = float(src.shape[0] - 1) / dst.shape[0] for row in range(dst.shape[0]): y = int(row * y_ratio) y_diff = (row * y_ratio) - y # distance of the nearest pixel(y axis) y_diff_2 = 1 - y_diff for col in range(dst.shape[1]): x = int(col * x_ratio) x_diff = (col * x_ratio) - x # distance of the nearest pixel(x axis) x_diff_2 = 1 - x_diff y2_cross_x2 = y_diff_2 * x_diff_2 y2_cross_x = y_diff_2 * x_diff y_cross_x2 = y_diff * x_diff_2 y_cross_x = y_diff * x_diff # mathematically impossible, but just to be sure... if(x+1 >= src.shape[1]) | (y+1 >= src.shape[0]): raise UserWarning("Shape mismatch") # set value to ND if there are more than two values ND numND = int(src[y, x] == ndValue) + int(src[y, x + 1] == ndValue) + int(src[y + 1, x] == ndValue) + int( src[y + 1, x + 1] == ndValue) if numND > 2: dst[row, col] = ndValue continue # print y2_cross_x2, y2_cross_x, y_cross_x2, y_cross_x # interpolate only over known values, switch to linear interpolation if src[y, x] == ndValue: y2_cross_x2 = 0. y2_cross_x = 1. - y_cross_x - y_cross_x2 if src[y, x + 1] == ndValue: y2_cross_x = 0. if y2_cross_x2 != 0.: y2_cross_x2 = 1. - y_cross_x - y_cross_x2 if src[y + 1, x] == ndValue: y_cross_x2 = 0. y_cross_x = 1. - y2_cross_x - y2_cross_x2 if src[y + 1, x + 1] == ndValue: y_cross_x = 0. if y_cross_x2 != 0.: y_cross_x2 = 1. - y2_cross_x - y2_cross_x2 # print src[y, x], src[y, x+1],src[y+1, x],src[y+1, x+1] # normalize weights if not ((y2_cross_x2 == 0.) & (y2_cross_x == 0.) & (y_cross_x2 == 0.) & (y_cross_x == 0.)): sc = 1. / (y_cross_x + y_cross_x2 + y2_cross_x + y2_cross_x2) y2_cross_x2 *= sc y2_cross_x *= sc y_cross_x2 *= sc y_cross_x *= sc # print y2_cross_x2, y2_cross_x, y_cross_x2, y_cross_x if (y2_cross_x2 == 0.) & (y2_cross_x == 0.) & (y_cross_x2 == 0.) & (y_cross_x == 0.): dst[row, col] = ndValue else: dst[row, col] = y2_cross_x2 * src[y, x] + y2_cross_x * src[y, x + 1] + y_cross_x2 * src[ y + 1, x] + y_cross_x * src[y + 1, x + 1] return dst def comToBounds(self, com, size): """ Calculate boundaries, project to 3D, then add offset and backproject to 2D (ux, uy are canceled) :param com: center of mass, in image coordinates (x,y,z), z in mm :param size: (x,y,z) extent of the source crop volume in mm :return: xstart, xend, ystart, yend, zstart, zend """ if numpy.isclose(com[2], 0.): print "Warning: CoM ill-defined!" xstart = self.dpt.shape[0]//4 xend = xstart + self.dpt.shape[0]//2 ystart = self.dpt.shape[1]//4 yend = ystart + self.dpt.shape[1]//2 zstart = self.minDepth zend = self.maxDepth else: zstart = com[2] - size[2] / 2. zend = com[2] + size[2] / 2. xstart = int(numpy.floor((com[0] * com[2] / self.fx - size[0] / 2.) / com[2]*self.fx+0.5)) xend = int(numpy.floor((com[0] * com[2] / self.fx + size[0] / 2.) / com[2]*self.fx+0.5)) ystart = int(numpy.floor((com[1] * com[2] / self.fy - size[1] / 2.) / com[2]*self.fy+0.5)) yend = int(numpy.floor((com[1] * com[2] / self.fy + size[1] / 2.) / com[2]*self.fy+0.5)) return xstart, xend, ystart, yend, zstart, zend def comToTransform(self, com, size, dsize=(128, 128)): """ Calculate affine transform from crop :param com: center of mass, in image coordinates (x,y,z), z in mm :param size: (x,y,z) extent of the source crop volume in mm :return: affine transform """ xstart, xend, ystart, yend, _, _ = self.comToBounds(com, size) trans = numpy.eye(3) trans[0, 2] = -xstart trans[1, 2] = -ystart wb = (xend - xstart) hb = (yend - ystart) if wb > hb: scale = numpy.eye(3) * dsize[0] / float(wb) sz = (dsize[0], hb * dsize[0] / wb) else: scale = numpy.eye(3) * dsize[1] / float(hb) sz = (wb * dsize[1] / hb, dsize[1]) scale[2, 2] = 1 xstart = int(numpy.floor(dsize[0] / 2. - sz[1] / 2.)) ystart = int(numpy.floor(dsize[1] / 2. - sz[0] / 2.)) off = numpy.eye(3) off[0, 2] = xstart off[1, 2] = ystart return numpy.dot(off, numpy.dot(scale, trans)) def getCrop(self, dpt, xstart, xend, ystart, yend, zstart, zend, thresh_z=True, background=0): """ Crop patch from image :param dpt: depth image to crop from :param xstart: start x :param xend: end x :param ystart: start y :param yend: end y :param zstart: start z :param zend: end z :param thresh_z: threshold z values :return: cropped image """ if len(dpt.shape) == 2: cropped = dpt[max(ystart, 0):min(yend, dpt.shape[0]), max(xstart, 0):min(xend, dpt.shape[1])].copy() # add pixels that are out of the image in order to keep aspect ratio cropped = numpy.pad(cropped, ((abs(ystart)-max(ystart, 0), abs(yend)-min(yend, dpt.shape[0])), (abs(xstart)-max(xstart, 0), abs(xend)-min(xend, dpt.shape[1]))), mode='constant', constant_values=background) elif len(dpt.shape) == 3: cropped = dpt[max(ystart, 0):min(yend, dpt.shape[0]), max(xstart, 0):min(xend, dpt.shape[1]), :].copy() # add pixels that are out of the image in order to keep aspect ratio cropped = numpy.pad(cropped, ((abs(ystart)-max(ystart, 0), abs(yend)-min(yend, dpt.shape[0])), (abs(xstart)-max(xstart, 0), abs(xend)-min(xend, dpt.shape[1])), (0, 0)), mode='constant', constant_values=background) else: raise NotImplementedError() if thresh_z is True: msk1 = numpy.logical_and(cropped < zstart, cropped != 0) msk2 = numpy.logical_and(cropped > zend, cropped != 0) cropped[msk1] = zstart cropped[msk2] = 0. # backface is at 0, it is set later return cropped def getInverseCrop(self, crop, sz, xstart, xend, ystart, yend, zstart, zend, thresh_z=True, background=0): """ Crop patch from image :param crop: cropped depth image :param xstart: start x :param xend: end x :param ystart: start y :param yend: end y :param zstart: start z :param zend: end z :param thresh_z: threshold z values :return: depth image with crop put on position """ dpt = numpy.ones(sz, dtype=crop.dtype)*background if (xend < 0 and xstart < 0) or (yend < 0 and ystart < 0): return dpt if (xend > dpt.shape[1] and xstart > dpt.shape[1]) or (yend > dpt.shape[0] and ystart > dpt.shape[0]): return dpt if xend == xstart or yend == ystart: return dpt cropped = self.resizeCrop(crop, (xend-xstart, yend-ystart)) if len(dpt.shape) == 2: dpt[max(ystart, 0):min(yend, dpt.shape[0]), max(xstart, 0):min(xend, dpt.shape[1])] = cropped[max(-ystart, 0):cropped.shape[0]-max(yend-dpt.shape[0], 0), max(-xstart, 0):cropped.shape[1]-max(xend-dpt.shape[1], 0)] elif len(dpt.shape) == 3: dpt[max(ystart, 0):min(yend, dpt.shape[0]), max(xstart, 0):min(xend, dpt.shape[1]), :] = cropped[max(-ystart, 0):cropped.shape[0]-max(yend-dpt.shape[0], 0), max(-xstart, 0):cropped.shape[1]-max(xend-dpt.shape[1], 0)] else: raise NotImplementedError() if thresh_z is True: msk1 = numpy.logical_and(dpt < zstart, dpt != 0) msk2 = numpy.logical_and(dpt > zend, dpt != 0) dpt[msk1] = zstart dpt[msk2] = 0. # backface is at 0, it is set later return dpt def resizeCrop(self, crop, sz): """ Resize cropped image :param crop: crop :param sz: size :return: resized image """ if self.resizeMethod == self.RESIZE_CV2_NN: rz = cv2.resize(crop, sz, interpolation=cv2.INTER_NEAREST) elif self.resizeMethod == self.RESIZE_BILINEAR: rz = self.bilinearResize(crop, sz, self.getNDValue()) elif self.resizeMethod == self.RESIZE_CV2_LINEAR: rz = cv2.resize(crop, sz, interpolation=cv2.INTER_LINEAR) else: raise NotImplementedError("Unknown resize method!") return rz def applyCrop3D(self, dpt, com, size, dsize, thresh_z=True, background=None): # calculate boundaries xstart, xend, ystart, yend, zstart, zend = self.comToBounds(com, size) # crop patch from source cropped = self.getCrop(dpt, xstart, xend, ystart, yend, zstart, zend, thresh_z, background) wb = (xend - xstart) hb = (yend - ystart) if wb > hb: sz = (dsize[0], hb * dsize[0] / wb) else: sz = (wb * dsize[1] / hb, dsize[1]) # depth resize rz = self.resizeCrop(cropped, sz) if background is None: background = self.getNDValue() # use background as filler ret = numpy.ones(dsize, numpy.float32) * background xstart = int(numpy.floor(dsize[0] / 2. - rz.shape[1] / 2.)) xend = int(xstart + rz.shape[1]) ystart = int(numpy.floor(dsize[1] / 2. - rz.shape[0] / 2.)) yend = int(ystart + rz.shape[0]) ret[ystart:yend, xstart:xend] = rz return ret def cropArea3D(self, com=None, size=(250, 250, 250), dsize=(128, 128), docom=False): """ Crop area of hand in 3D volumina, scales inverse to the distance of hand to camera :param com: center of mass, in image coordinates (x,y,z), z in mm :param size: (x,y,z) extent of the source crop volume in mm :param dsize: (x,y) extent of the destination size :return: cropped hand image, transformation matrix for joints, CoM in image coordinates """ #print com, self.importer.jointImgTo3D(com) #import matplotlib.pyplot as plt #import matplotlib #fig = plt.figure() #ax = fig.add_subplot(111) #ax.imshow(self.dpt, cmap=matplotlib.cm.jet) if len(size) != 3 or len(dsize) != 2: raise ValueError("Size must be 3D and dsize 2D bounding box") if com is None: com = self.calculateCoM(self.dpt) # calculate boundaries xstart, xend, ystart, yend, zstart, zend = self.comToBounds(com, size) # crop patch from source cropped = self.getCrop(self.dpt, xstart, xend, ystart, yend, zstart, zend) #ax.plot(com[0],com[1],marker='.') ############# # for simulating COM within cube if docom is True: com = self.calculateCoM(cropped) if numpy.allclose(com, 0.): com[2] = cropped[cropped.shape[0]//2, cropped.shape[1]//2] if numpy.isclose(com[2], 0): com[2] = 300 com[0] += xstart com[1] += ystart # calculate boundaries xstart, xend, ystart, yend, zstart, zend = self.comToBounds(com, size) # crop patch from source cropped = self.getCrop(self.dpt, xstart, xend, ystart, yend, zstart, zend) # ax.plot(com[0],com[1],marker='x') ############## if docom is True and self.refineNet is not None and self.importer is not None: rz = self.resizeCrop(cropped, dsize) newCom3D = self.refineCoM(rz, size, com) + self.importer.jointImgTo3D(com) com = self.importer.joint3DToImg(newCom3D) if numpy.allclose(com, 0.): com[2] = cropped[cropped.shape[0]//2, cropped.shape[1]//2] # calculate boundaries xstart, xend, ystart, yend, zstart, zend = self.comToBounds(com, size) # crop patch from source cropped = self.getCrop(self.dpt, xstart, xend, ystart, yend, zstart, zend) # ax.plot(com[0],com[1],marker='o') #plt.show(block=True) ############# wb = (xend - xstart) hb = (yend - ystart) if wb > hb: sz = (dsize[0], hb * dsize[0] / wb) else: sz = (wb * dsize[1] / hb, dsize[1]) # print com, sz, cropped.shape, xstart, xend, ystart, yend, hb, wb, zstart, zend trans = numpy.eye(3) trans[0, 2] = -xstart trans[1, 2] = -ystart if cropped.shape[0] > cropped.shape[1]: scale = numpy.eye(3) * sz[1] / float(cropped.shape[0]) else: scale = numpy.eye(3) * sz[0] / float(cropped.shape[1]) scale[2, 2] = 1 # depth resize rz = self.resizeCrop(cropped, sz) #pylab.imshow(rz); pylab.gray();t=transformPoint2D(com,scale*trans);pylab.scatter(t[0],t[1]); pylab.show() ret = numpy.ones(dsize, numpy.float32) * self.getNDValue() # use background as filler xstart = int(numpy.floor(dsize[0] / 2. - rz.shape[1] / 2.)) xend = int(xstart + rz.shape[1]) ystart = int(numpy.floor(dsize[1] / 2. - rz.shape[0] / 2.)) yend = int(ystart + rz.shape[0]) ret[ystart:yend, xstart:xend] = rz # print rz.shape, xstart, ystart off = numpy.eye(3) off[0, 2] = xstart off[1, 2] = ystart # import matplotlib.pyplot as plt # fig = plt.figure() # ax = fig.add_subplot(131) # ax.imshow(cropped, cmap='jet') # ax = fig.add_subplot(132) # ax.imshow(rz, cmap='jet') # ax = fig.add_subplot(133) # ax.imshow(ret, cmap='jet') # plt.show(block=True) # print trans,scale,off,numpy.dot(off, numpy.dot(scale, trans)) return ret, numpy.dot(off, numpy.dot(scale, trans)), com def checkPose(self, joints): """ Check if pose is anatomically possible @see Serre: Kinematic model of the hand using computer vision :param joints: joint locations R^16x3 :return: true if pose is possible """ # check dip, pip of fingers return True def track(self, com, size=(250, 250, 250), dsize=(128, 128), doHandSize=True): """ Detect the hand as closest object to camera :param size: bounding box size :return: center of mass of hand """ # calculate boundaries xstart, xend, ystart, yend, zstart, zend = self.comToBounds(com, size) # crop patch from source cropped = self.getCrop(self.dpt, xstart, xend, ystart, yend, zstart, zend) # predict movement of CoM if self.refineNet is not None and self.importer is not None: rz = self.resizeCrop(cropped, dsize) newCom3D = self.refineCoM(rz, size, com) + self.importer.jointImgTo3D(com) com = self.importer.joint3DToImg(newCom3D) if numpy.allclose(com, 0.): com[2] = cropped[cropped.shape[0]//2, cropped.shape[1]//2] else: raise RuntimeError("Need refineNet for this") if doHandSize is True: # refined contour for size estimation zstart = com[2] - size[2] / 2. zend = com[2] + size[2] / 2. part_ref = self.dpt.copy() part_ref[part_ref < zstart] = 0 part_ref[part_ref > zend] = 0 part_ref[part_ref != 0] = 10 # set to something ret, thresh_ref = cv2.threshold(part_ref, 1, 255, cv2.THRESH_BINARY) contours_ref, _ = cv2.findContours(thresh_ref.astype(dtype=numpy.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # find the largest contour areas = [cv2.contourArea(cc) for cc in contours_ref] c_max = numpy.argmax(areas) # final result return com, self.estimateHandsize(contours_ref[c_max], com, size) else: return com, size def refineCoMIterative(self, com, num_iter, size=(250, 250, 250)): """ Refine com iteratively :param com: center of mass, in image coordinates (x,y,z), z in mm :param num_iter: number of iterations :param size: (x,y,z) extent of the source crop volume in mm :return: refined com """ for k in xrange(num_iter): # calculate boundaries xstart, xend, ystart, yend, zstart, zend = self.comToBounds(com, size) # crop cropped = self.getCrop(self.dpt, xstart, xend, ystart, yend, zstart, zend) com = self.calculateCoM(cropped) if numpy.allclose(com, 0.): com[2] = cropped[cropped.shape[0]//2, cropped.shape[1]//2] com[0] += max(xstart, 0) com[1] += max(ystart, 0) return com def detect(self, size=(250, 250, 250), doHandSize=True): """ Detect the hand as closest object to camera :param size: bounding box size :return: center of mass of hand """ steps = 65 dz = (self.maxDepth - self.minDepth)/float(steps) for i in range(5,steps): part = self.dpt.copy() part[part < i*dz + self.minDepth] = 0 part[part > (i+1)*dz + self.minDepth] = 0 part[part != 0] = 10 # set to something #plt.imshow(part); plt.show() ret, thresh = cv2.threshold(part, 1, 255, cv2.THRESH_BINARY) thresh = thresh.astype(dtype=numpy.uint8) im2, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) for c in range(len(contours)): if cv2.contourArea(contours[c]) > 200: # centroid M = cv2.moments(contours[c]) cx = int(numpy.rint(M['m10']/M['m00'])) cy = int(numpy.rint(M['m01']/M['m00'])) # crop xstart = int(max(cx-100, 0)) xend = int(min(cx+100, self.dpt.shape[1]-1)) ystart = int(max(cy-100, 0)) yend = int(min(cy+100, self.dpt.shape[0]-1)) cropped = self.dpt[ystart:yend, xstart:xend].copy() cropped[cropped < i*dz + self.minDepth] = 0. cropped[cropped > (i+1)*dz + self.minDepth] = 0. com = self.calculateCoM(cropped) if numpy.allclose(com, 0.): com[2] = cropped[cropped.shape[0]//2, cropped.shape[1]//2] com[0] += xstart com[1] += ystart # refine iteratively com = self.refineCoMIterative(com, 5, size) zstart = com[2] - size[2] / 2. zend = com[2] + size[2] / 2. if doHandSize is True: # refined contour for size estimation part_ref = self.dpt.copy() part_ref[part_ref < zstart] = 0 part_ref[part_ref > zend] = 0 part_ref[part_ref != 0] = 10 # set to something ret, thresh_ref = cv2.threshold(part_ref, 1, 255, cv2.THRESH_BINARY) contours_ref, _ = cv2.findContours(thresh_ref.astype(dtype=numpy.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # find the largest contour areas = [cv2.contourArea(cc) for cc in contours_ref] c_max = numpy.argmax(areas) # final result return com, self.estimateHandsize(contours_ref[c_max], com, size) else: return com, size # no appropriate hand detected return numpy.array((0, 0, 0), numpy.float), size def refineCoM(self, cropped, size, com): """ Refines the detection result of the hand :return: center of hand """ imgD = numpy.asarray(cropped.copy(), 'float32') imgD[imgD == 0] = com[2] + (size[2] / 2.) imgD[imgD >= com[2] + (size[2] / 2.)] = com[2] + (size[2] / 2.) imgD[imgD <= com[2] - (size[2] / 2.)] = com[2] - (size[2] / 2.) imgD -= com[2] imgD /= (size[2] / 2.) test_data = numpy.zeros((1, 1, cropped.shape[0], cropped.shape[1]), dtype='float32') test_data[0, 0] = imgD # test_data2 = numpy.zeros((test_data.shape[0], test_data.shape[1], test_data.shape[2]//2, test_data.shape[3]//2), dtype='float32') # test_data4 = numpy.zeros((test_data2.shape[0], test_data2.shape[1], test_data2.shape[2]//2, test_data2.shape[3]//2), dtype='float32') # for j in range(test_data.shape[0]): # for i in range(test_data.shape[1]): # test_data2[j, i, :, :] = cv2.resize(test_data[j, i, :, :], (test_data2.shape[3], test_data2.shape[2])) # test_data4[j, i, :, :] = cv2.resize(test_data2[j, i, :, :], (test_data4.shape[3], test_data4.shape[2])) dsize = (int(test_data.shape[2]//2), int(test_data.shape[3]//2)) xstart = int(test_data.shape[2]/2-dsize[0]/2) xend = xstart + dsize[0] ystart = int(test_data.shape[3]/2-dsize[1]/2) yend = ystart + dsize[1] test_data2 = test_data[:, :, ystart:yend, xstart:xend] dsize = (int(test_data.shape[2]//4), int(test_data.shape[3]//4)) xstart = int(test_data.shape[2]/2-dsize[0]/2) xend = xstart + dsize[0] ystart = int(test_data.shape[3]/2-dsize[1]/2) yend = ystart + dsize[1] test_data4 = test_data[:, :, ystart:yend, xstart:xend] if self.refineNet.cfgParams.numInputs == 1: jts = self.refineNet.computeOutput(test_data) elif self.refineNet.cfgParams.numInputs == 3: jts = self.refineNet.computeOutput([test_data, test_data2, test_data4]) else: raise NotImplementedError("Number of inputs is {}".format(self.refineNet.cfgParams.numInputs)) return jts[0]*(size[2]/2.) def moveCoM(self, dpt, cube, com, off, joints3D, M, pad_value=0): """ Adjust already cropped image such that a moving CoM normalization is simulated :param dpt: cropped depth image with different CoM :param cube: metric cube of size (sx,sy,sz) :param com: original center of mass, in image coordinates (x,y,z) :param off: offset to center of mass (dx,dy,dz) in 3D coordinates :param joints3D: 3D joint coordinates, cropped to old CoM :param pad_value: value of padding :return: adjusted image, new 3D joint coordinates, new center of mass in image coordinates """ # if offset is 0, nothing to do if numpy.allclose(off, 0.): return dpt, joints3D, com, M # add offset to com new_com = self.importer.joint3DToImg(self.importer.jointImgTo3D(com) + off) # check for 1/0. if not (numpy.allclose(com[2], 0.) or numpy.allclose(new_com[2], 0.)): # scale to original size Mnew = self.comToTransform(new_com, cube, dpt.shape) new_dpt = self.recropHand(dpt, Mnew, numpy.linalg.inv(M), dpt.shape, background_value=pad_value, nv_val=32000., thresh_z=True, com=new_com, size=cube) else: Mnew = M new_dpt = dpt # adjust joint positions to new CoM new_joints3D = joints3D + self.importer.jointImgTo3D(com) - self.importer.jointImgTo3D(new_com) return new_dpt, new_joints3D, new_com, Mnew def rotateHand(self, dpt, cube, com, rot, joints3D, pad_value=0): """ Rotate hand virtually in the image plane by a given angle :param dpt: cropped depth image with different CoM :param cube: metric cube of size (sx,sy,sz) :param com: original center of mass, in image coordinates (x,y,z) :param rot: rotation angle in deg :param joints3D: original joint coordinates, in 3D coordinates (x,y,z) :param pad_value: value of padding :return: adjusted image, new 3D joint coordinates, rotation angle in XXX """ # if rot is 0, nothing to do if numpy.allclose(rot, 0.): return dpt, joints3D, rot rot = numpy.mod(rot, 360) M = cv2.getRotationMatrix2D((dpt.shape[1]//2, dpt.shape[0]//2), -rot, 1) if self.resizeMethod == self.RESIZE_CV2_NN: flags = cv2.INTER_NEAREST elif self.resizeMethod == self.RESIZE_CV2_LINEAR: flags = cv2.INTER_LINEAR else: raise NotImplementedError new_dpt = cv2.warpAffine(dpt, M, (dpt.shape[1], dpt.shape[0]), flags=flags, borderMode=cv2.BORDER_CONSTANT, borderValue=pad_value) com3D = self.importer.jointImgTo3D(com) joint_2D = self.importer.joints3DToImg(joints3D + com3D) data_2D = numpy.zeros_like(joint_2D) for k in xrange(data_2D.shape[0]): data_2D[k] = rotatePoint2D(joint_2D[k], com[0:2], rot) new_joints3D = (self.importer.jointsImgTo3D(data_2D) - com3D) return new_dpt, new_joints3D, rot def scaleHand(self, dpt, cube, com, sc, joints3D, M, pad_value=0): """ Virtually scale the hand by applying different cube :param dpt: cropped depth image with different CoM :param cube: metric cube of size (sx,sy,sz) :param com: original center of mass, in image coordinates (x,y,z) :param sc: scale factor for cube :param joints3D: 3D joint coordinates, cropped to old CoM :param pad_value: value of padding :return: adjusted image, new 3D joint coordinates, new center of mass in image coordinates """ # if scale is 1, nothing to do if numpy.allclose(sc, 1.): return dpt, joints3D, cube, M new_cube = [s*sc for s in cube] # check for 1/0. if not numpy.allclose(com[2], 0.): # scale to original size Mnew = self.comToTransform(com, new_cube, dpt.shape) new_dpt = self.recropHand(dpt, Mnew, numpy.linalg.inv(M), dpt.shape, background_value=pad_value, nv_val=32000., thresh_z=True, com=com, size=cube) else: Mnew = M new_dpt = dpt new_joints3D = joints3D return new_dpt, new_joints3D, new_cube, Mnew def recropHand(self, crop, M, Mnew, target_size, background_value=0., nv_val=0., thresh_z=True, com=None, size=(250, 250, 250)): if self.resizeMethod == self.RESIZE_CV2_NN: flags = cv2.INTER_NEAREST elif self.resizeMethod == self.RESIZE_CV2_LINEAR: flags = cv2.INTER_LINEAR else: raise NotImplementedError warped = cv2.warpPerspective(crop, numpy.dot(M, Mnew), target_size, flags=flags, borderMode=cv2.BORDER_CONSTANT, borderValue=float(background_value)) warped[numpy.isclose(warped, nv_val)] = background_value if thresh_z is True: assert com is not None _, _, _, _, zstart, zend = self.comToBounds(com, size) msk1 = numpy.logical_and(warped < zstart, warped != 0) msk2 = numpy.logical_and(warped > zend, warped != 0) warped[msk1] = zstart warped[msk2] = 0. # backface is at 0, it is set later return warped @staticmethod def sampleRandomPoses(importer, rng, base_poses, base_com, base_cube, num_poses, nmax, aug_modes, retall=False, rot3D=False, sigma_com=None, sigma_sc=None, rot_range=None): """ Sample random poses such that we can estimate the subspace more robustly :param importer: importer :param rng: RandomState :param base_poses: set of base 3D poses :param base_com: corresponding 3D crop locations :param base_cube: corresponding crop cubes :param num_poses: number of poses to sample :param aug_modes: augmentation modes (comb, com, rot, sc, none) :param retall: return all random parameters :param rot3D: augment rotation in 3D, which is only possible with poses not images :return: random poses """ if sigma_com is None: sigma_com = 10. if sigma_sc is None: sigma_sc = 0.05 if rot_range is None: rot_range = 180. all_modes = ['none', 'rot', 'sc', 'com', 'rot+com', 'com+rot', 'rot+com+sc', 'rot+sc+com', 'sc+rot+com', 'sc+com+rot', 'com+sc+rot', 'com+rot+sc'] assert all([aug_modes[i] in all_modes for i in xrange(len(aug_modes))]) p2use = numpy.minimum(base_poses.shape[0], nmax) print('poses to use %d' % p2use) new_poses = numpy.zeros((int(num_poses), base_poses.shape[1], base_poses.shape[2]), dtype=base_poses.dtype) new_com = numpy.zeros((int(num_poses), 3), dtype=base_poses.dtype) new_cube = numpy.zeros((int(num_poses), 3), dtype=base_poses.dtype) modes = rng.randint(0, len(aug_modes), int(num_poses)) ridxs = rng.randint(0, p2use, int(num_poses)) off = rng.randn(int(num_poses), 3) * sigma_com sc = numpy.fabs(rng.randn(int(num_poses)) * sigma_sc + 1.) rot = rng.uniform(-rot_range, rot_range, size=(int(num_poses), 3)) if aug_modes == ['none']: if retall is True: return base_poses / (base_cube[:, 2]/2.)[:, None, None], base_com, base_cube else: return base_poses / (base_cube[:, 2]/2.)[:, None, None] for i in xrange(int(num_poses)): if i % 1000 == 0: print(i) mode = modes[i] ridx = ridxs[i] cube = base_cube[ridx] com3D = base_com[ridx] pose = base_poses[ridx] if aug_modes[mode] == 'com': # augment com new_com[i] = com3D + off[i] new_cube[i] = cube new_poses[i] = (pose + com3D - new_com[i]) / (new_cube[i][2]/2.) elif aug_modes[mode] == 'rot': # augment rotation new_com[i] = com3D new_cube[i] = cube if rot3D is False: joint_2D = importer.joints3DToImg(pose + new_com[i]) data_2D = rotatePoints2D(joint_2D, importer.joint3DToImg(com3D)[0:2], rot[i, 0]) new_poses[i] = (importer.jointsImgTo3D(data_2D) - new_com[i]) / (new_cube[i][2]/2.) else: new_poses[i] = (rotatePoints3D(pose + new_com[i], new_com[i], rot[i, 0], rot[i, 1], rot[i, 2]) - new_com[i]) / (new_cube[i][2]/2.) elif aug_modes[mode] == 'sc': # augment cube new_com[i] = com3D new_cube[i] = cube*sc[i] new_poses[i] = pose / (new_cube[i][2]/2.) elif aug_modes[mode] == 'none': # no augmentation new_com[i] = com3D new_cube[i] = cube new_poses[i] = pose / (new_cube[i][2]/2.) elif aug_modes[mode] == 'rot+com' or aug_modes[mode] == 'com+rot': # augment com+rot new_com[i] = com3D + off[i] new_cube[i] = cube pose = (pose + com3D - new_com[i]) if rot3D is False: joint_2D = importer.joints3DToImg(pose + com3D) data_2D = rotatePoints2D(joint_2D, importer.joint3DToImg(new_com[i])[0:2], rot[i, 0]) new_poses[i] = (importer.jointsImgTo3D(data_2D) - com3D) / (new_cube[i][2] / 2.) else: new_poses[i] = (rotatePoints3D(pose + new_com[i], new_com[i], rot[i, 0], rot[i, 1], rot[i, 2]) - new_com[i]) / (new_cube[i][2] / 2.) elif aug_modes[mode] == 'rot+com+sc' or aug_modes[mode] == 'rot+sc+com' or aug_modes == 'sc+rot+com' or aug_modes == 'sc+com+rot' or aug_modes == 'com+sc+rot' or aug_modes == 'com+rot+sc': # augment com+scale+rot new_com[i] = com3D + off[i] new_cube[i] = cube pose = (pose + com3D - new_com[i]) pose = pose * sc[i] if rot3D is False: joint_2D = importer.joints3DToImg(pose + com3D) data_2D = rotatePoints2D(joint_2D, importer.joint3DToImg(new_com[i])[0:2], rot[i, 0]) new_poses[i] = (importer.jointsImgTo3D(data_2D) - com3D) / (new_cube[i][2] / 2.) else: new_poses[i] = (rotatePoints3D(pose + new_com[i], new_com[i], rot[i, 0], rot[i, 1], rot[i, 2]) - new_com[i]) / (new_cube[i][2] / 2.) else: raise NotImplementedError() if retall is True: return new_poses, new_com, new_cube, rot else: return new_poses def estimateHandsize(self, contours, com, cube=(250, 250, 250), tol=0.): """ Estimate hand size from contours :param contours: contours of hand :param com: center of mass :param cube: default cube :param tol: tolerance to be added to all sides :return: metric cube for cropping (x, y, z) """ x, y, w, h = cv2.boundingRect(contours) # drawing = numpy.zeros((480, 640), dtype=float) # cv2.drawContours(drawing, [contours], 0, (255, 0, 244), 1, 8) # cv2.rectangle(drawing, (x, y), (x+w, y+h), (244, 0, 233), 2, 8, 0) # cv2.imshow("contour", drawing) # convert to cube xstart = (com[0] - w / 2.) * com[2] / self.fx xend = (com[0] + w / 2.) * com[2] / self.fx ystart = (com[1] - h / 2.) * com[2] / self.fy yend = (com[1] + h / 2.) * com[2] / self.fy szx = xend - xstart szy = yend - ystart sz = (szx + szy) / 2. cube = (sz + tol, sz + tol, sz + tol) return cube