from __future__ import absolute_import, division import numpy as np import cv2 from . import Tracker from ..utils import dict2tuple from ..utils.complex import real, fft2, ifft2, complex_add, complex_mul, complex_div, fftshift from ..descriptors.fhog import fast_hog class TrackerKCF(Tracker): def __init__(self, **kargs): super(TrackerKCF, self).__init__('KCF') self.parse_args(**kargs) self._correlation = self.setup_kernel(self.cfg.kernel_type) def parse_args(self, **kargs): self.cfg = { 'lambda_': 1e-4, 'padding': 1.5, 'output_sigma_factor': 0.125, 'interp_factor': 0.012, 'sigma': 0.6, 'poly_a': 1, 'poly_b': 7, 'cell_size': 4, 'kernel_type': 'gaussian'} for key, val in kargs.items(): self.cfg.update({key: val}) self.cfg = dict2tuple(self.cfg) def setup_kernel(self, kernel_type): assert kernel_type in ['linear', 'polynomial', 'gaussian'] if kernel_type == 'linear': return lambda x1, x2: self._linear_correlation(x1, x2) elif kernel_type == 'polynomial': return lambda x1, x2: self._polynomial_correlation( x1, x2, self.cfg.poly_a, self.cfg.poly_b) elif kernel_type == 'gaussian': return lambda x1, x2: self._gaussian_correlation( x1, x2, self.cfg.sigma) def init(self, image, init_rect): # initialize parameters self.resize_image = False if np.sqrt(init_rect[2:].prod()) > 100: self.resize_image = True init_rect = init_rect / 2 self.t_center = init_rect[:2] + init_rect[2:] / 2 self.t_sz = init_rect[2:] mod = self.cfg.cell_size * 2 self.padded_sz = self.t_sz * (1 + self.cfg.padding) self.padded_sz = self.padded_sz.astype(int) // mod * mod + mod # get feature size and initialize hanning window if image.ndim == 2: image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) if self.resize_image: size = (int(image.shape[1] / 2), int(image.shape[0] / 2)) image = cv2.resize(image, size) self.z = self._crop(image, self.t_center, self.padded_sz) self.z = fast_hog(np.float32(self.z), self.cfg.cell_size) self.feat_sz = self.z.shape self.hann_window = np.outer( np.hanning(self.feat_sz[0]), np.hanning(self.feat_sz[1])).astype(np.float32) self.hann_window = self.hann_window[:, :, np.newaxis] self.z *= self.hann_window # create gaussian labels output_sigma = self.cfg.output_sigma_factor * \ np.sqrt(np.prod(self.feat_sz[:2])) / (1 + self.cfg.padding) rs, cs = np.ogrid[:self.feat_sz[0], :self.feat_sz[1]] rs, cs = rs - self.feat_sz[0] // 2, cs - self.feat_sz[1] // 2 y = np.exp(-0.5 / output_sigma ** 2 * (rs ** 2 + cs ** 2)) self.yf = fft2(y) # train classifier k = self._correlation(self.z, self.z) self.alphaf = complex_div(self.yf, complex_add(fft2(k), self.cfg.lambda_)) def update(self, image): if image.ndim == 2: image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) if self.resize_image: size = (int(image.shape[1] / 2), int(image.shape[0] / 2)) image = cv2.resize(image, size) # locate target x = self._crop(image, self.t_center, self.padded_sz) x = self.hann_window * fast_hog(np.float32(x), self.cfg.cell_size) k = self._correlation(x, self.z) score = real(ifft2(complex_mul(self.alphaf, fft2(k)))) offset = self._locate_target(score) self.t_center += offset * self.cfg.cell_size # limit the estimated bounding box to be overlapped with the image self.t_center = np.clip( self.t_center, -self.t_sz / 2 + 2, image.shape[1::-1] + self.t_sz / 2 - 1) # update model new_z = self._crop(image, self.t_center, self.padded_sz) new_z = self.hann_window * fast_hog(np.float32(new_z), self.cfg.cell_size) k = self._correlation(new_z, new_z) new_alphaf = complex_div(self.yf, complex_add(fft2(k), self.cfg.lambda_)) self.alphaf = (1 - self.cfg.interp_factor) * self.alphaf + \ self.cfg.interp_factor * new_alphaf self.z = (1 - self.cfg.interp_factor) * self.z + \ self.cfg.interp_factor * new_z bndbox = np.concatenate([ self.t_center - self.t_sz / 2, self.t_sz]) if self.resize_image: bndbox = bndbox * 2 return bndbox def _crop(self, image, center, size): corners = np.zeros(4, dtype=int) corners[:2] = np.floor(center - size / 2).astype(int) corners[2:] = corners[:2] + size pads = np.concatenate( (-corners[:2], corners[2:] - image.shape[1::-1])) pads = np.maximum(0, pads) if np.any(pads > 0): corners = np.concatenate(( corners[:2] + pads[:2], corners[2:] - pads[2:])).astype(int) patch = image[corners[1]:corners[3], corners[0]:corners[2]] if np.any(pads > 0): patch = cv2.copyMakeBorder( patch, pads[1], pads[3], pads[0], pads[2], borderType=cv2.BORDER_REPLICATE) return patch def _linear_correlation(self, x1, x2): xcorr = np.zeros((self.feat_sz[0], self.feat_sz[1]), np.float32) for i in range(self.feat_sz[2]): xcorr_ = cv2.mulSpectrums( fft2(x1[:, :, i]), fft2(x2[:, :, i]), 0, conjB=True) xcorr_ = real(ifft2(xcorr_)) xcorr += xcorr_ xcorr = fftshift(xcorr) return xcorr / x1.size def _polynomial_correlation(self, x1, x2, a, b): xcorr = np.zeros((self.feat_sz[0], self.feat_sz[1]), np.float32) for i in range(self.feat_sz[2]): xcorr_ = cv2.mulSpectrums( fft2(x1[:, :, i]), fft2(x2[:, :, i]), 0, conjB=True) xcorr_ = real(ifft2(xcorr_)) xcorr += xcorr_ xcorr = fftshift(xcorr) out = (xcorr / x1.size + a) ** b return out def _gaussian_correlation(self, x1, x2, sigma): xcorr = np.zeros((self.feat_sz[0], self.feat_sz[1]), np.float32) for i in range(self.feat_sz[2]): xcorr_ = cv2.mulSpectrums( fft2(x1[:, :, i]), fft2(x2[:, :, i]), 0, conjB=True) xcorr_ = real(ifft2(xcorr_)) xcorr += xcorr_ xcorr = fftshift(xcorr) out = (np.sum(x1 * x1) + np.sum(x2 * x2) - 2.0 * xcorr) / x1.size out[out < 0] = 0 out = np.exp(-out / self.cfg.sigma ** 2) return out def _locate_target(self, score): def subpixel_peak(left, center, right): divisor = 2 * center - left - right if abs(divisor) < 1e-3: return 0 return 0.5 * (right - left) / divisor _, _, _, max_loc = cv2.minMaxLoc(score) loc = np.float32(max_loc) if max_loc[0] in range(1, score.shape[1] - 1): loc[0] += subpixel_peak( score[max_loc[1], max_loc[0] - 1], score[max_loc[1], max_loc[0]], score[max_loc[1], max_loc[0] + 1]) if max_loc[1] in range(1, score.shape[0] - 1): loc[1] += subpixel_peak( score[max_loc[1] - 1, max_loc[0]], score[max_loc[1], max_loc[0]], score[max_loc[1] + 1, max_loc[0]]) offset = loc - np.float32(score.shape[1::-1]) / 2 return offset class TrackerDCF(TrackerKCF): def __init__(self, **kargs): kargs.update({'kernel_type': 'linear'}) super(TrackerDCF, self).__init__(**kargs)