python source code of tracker

from collections import deque

import numpy as np
import torch
import torch.nn.functional as F
from torch.autograd import Variable
from scipy.optimize import linear_sum_assignment
import cv2

from .utils import bbox_overlaps, warp_pos, get_center, get_height, get_width, make_pos

from torchvision.ops.boxes import clip_boxes_to_image, nms


class Tracker:
	"""The main tracking file, here is where magic happens."""
	# only track pedestrian
	cl = 1

	def __init__(self, obj_detect, reid_network, tracker_cfg):
		self.obj_detect = obj_detect
		self.reid_network = reid_network
		self.detection_person_thresh = tracker_cfg['detection_person_thresh']
		self.regression_person_thresh = tracker_cfg['regression_person_thresh']
		self.detection_nms_thresh = tracker_cfg['detection_nms_thresh']
		self.regression_nms_thresh = tracker_cfg['regression_nms_thresh']
		self.public_detections = tracker_cfg['public_detections']
		self.inactive_patience = tracker_cfg['inactive_patience']
		self.do_reid = tracker_cfg['do_reid']
		self.max_features_num = tracker_cfg['max_features_num']
		self.reid_sim_threshold = tracker_cfg['reid_sim_threshold']
		self.reid_iou_threshold = tracker_cfg['reid_iou_threshold']
		self.do_align = tracker_cfg['do_align']
		self.motion_model_cfg = tracker_cfg['motion_model']

		self.warp_mode = eval(tracker_cfg['warp_mode'])
		self.number_of_iterations = tracker_cfg['number_of_iterations']
		self.termination_eps = tracker_cfg['termination_eps']

		self.tracks = []
		self.inactive_tracks = []
		self.track_num = 0
		self.im_index = 0
		self.results = {}

	def reset(self, hard=True):
		self.tracks = []
		self.inactive_tracks = []

		if hard:
			self.track_num = 0
			self.results = {}
			self.im_index = 0

	def tracks_to_inactive(self, tracks):
		self.tracks = [t for t in self.tracks if t not in tracks]
		for t in tracks:
			t.pos = t.last_pos[-1]
		self.inactive_tracks += tracks

	def add(self, new_det_pos, new_det_scores, new_det_features):
		"""Initializes new Track objects and saves them."""
		num_new = new_det_pos.size(0)
		for i in range(num_new):
			self.tracks.append(Track(
				new_det_pos[i].view(1, -1),
				new_det_scores[i],
				self.track_num + i,
				new_det_features[i].view(1, -1),
				self.inactive_patience,
				self.max_features_num,
				self.motion_model_cfg['n_steps'] if self.motion_model_cfg['n_steps'] > 0 else 1
			))
		self.track_num += num_new

	def regress_tracks(self, blob):
		"""Regress the position of the tracks and also checks their scores."""
		pos = self.get_pos()

		# regress
		boxes, scores = self.obj_detect.predict_boxes(pos)
		pos = clip_boxes_to_image(boxes, blob['img'].shape[-2:])

		s = []
		for i in range(len(self.tracks) - 1, -1, -1):
			t = self.tracks[i]
			t.score = scores[i]
			if scores[i] <= self.regression_person_thresh:
				self.tracks_to_inactive([t])
			else:
				s.append(scores[i])
				# t.prev_pos = t.pos
				t.pos = pos[i].view(1, -1)

		return torch.Tensor(s[::-1]).cuda()

	def get_pos(self):
		"""Get the positions of all active tracks."""
		if len(self.tracks) == 1:
			pos = self.tracks[0].pos
		elif len(self.tracks) > 1:
			pos = torch.cat([t.pos for t in self.tracks], 0)
		else:
			pos = torch.zeros(0).cuda()
		return pos

	def get_features(self):
		"""Get the features of all active tracks."""
		if len(self.tracks) == 1:
			features = self.tracks[0].features
		elif len(self.tracks) > 1:
			features = torch.cat([t.features for t in self.tracks], 0)
		else:
			features = torch.zeros(0).cuda()
		return features

	def get_inactive_features(self):
		"""Get the features of all inactive tracks."""
		if len(self.inactive_tracks) == 1:
			features = self.inactive_tracks[0].features
		elif len(self.inactive_tracks) > 1:
			features = torch.cat([t.features for t in self.inactive_tracks], 0)
		else:
			features = torch.zeros(0).cuda()
		return features

	def reid(self, blob, new_det_pos, new_det_scores):
		"""Tries to ReID inactive tracks with provided detections."""
		new_det_features = [torch.zeros(0).cuda() for _ in range(len(new_det_pos))]

		if self.do_reid:
			new_det_features = self.reid_network.test_rois(
				blob['img'], new_det_pos).data

			if len(self.inactive_tracks) >= 1:
				# calculate appearance distances
				dist_mat, pos = [], []
				for t in self.inactive_tracks:
					dist_mat.append(torch.cat([t.test_features(feat.view(1, -1))
					                           for feat in new_det_features], dim=1))
					pos.append(t.pos)
				if len(dist_mat) > 1:
					dist_mat = torch.cat(dist_mat, 0)
					pos = torch.cat(pos, 0)
				else:
					dist_mat = dist_mat[0]
					pos = pos[0]

				# calculate IoU distances
				iou = bbox_overlaps(pos, new_det_pos)
				iou_mask = torch.ge(iou, self.reid_iou_threshold)
				iou_neg_mask = ~iou_mask
				# make all impossible assignments to the same add big value
				dist_mat = dist_mat * iou_mask.float() + iou_neg_mask.float() * 1000
				dist_mat = dist_mat.cpu().numpy()

				row_ind, col_ind = linear_sum_assignment(dist_mat)

				assigned = []
				remove_inactive = []
				for r, c in zip(row_ind, col_ind):
					if dist_mat[r, c] <= self.reid_sim_threshold:
						t = self.inactive_tracks[r]
						self.tracks.append(t)
						t.count_inactive = 0
						t.pos = new_det_pos[c].view(1, -1)
						t.reset_last_pos()
						t.add_features(new_det_features[c].view(1, -1))
						assigned.append(c)
						remove_inactive.append(t)

				for t in remove_inactive:
					self.inactive_tracks.remove(t)

				keep = torch.Tensor([i for i in range(new_det_pos.size(0)) if i not in assigned]).long().cuda()
				if keep.nelement() > 0:
					new_det_pos = new_det_pos[keep]
					new_det_scores = new_det_scores[keep]
					new_det_features = new_det_features[keep]
				else:
					new_det_pos = torch.zeros(0).cuda()
					new_det_scores = torch.zeros(0).cuda()
					new_det_features = torch.zeros(0).cuda()

		return new_det_pos, new_det_scores, new_det_features

	def get_appearances(self, blob):
		"""Uses the siamese CNN to get the features for all active tracks."""
		new_features = self.reid_network.test_rois(blob['img'], self.get_pos()).data
		return new_features

	def add_features(self, new_features):
		"""Adds new appearance features to active tracks."""
		for t, f in zip(self.tracks, new_features):
			t.add_features(f.view(1, -1))

	def align(self, blob):
		"""Aligns the positions of active and inactive tracks depending on camera motion."""
		if self.im_index > 0:
			im1 = np.transpose(self.last_image.cpu().numpy(), (1, 2, 0))
			im2 = np.transpose(blob['img'][0].cpu().numpy(), (1, 2, 0))
			im1_gray = cv2.cvtColor(im1, cv2.COLOR_RGB2GRAY)
			im2_gray = cv2.cvtColor(im2, cv2.COLOR_RGB2GRAY)
			warp_matrix = np.eye(2, 3, dtype=np.float32)
			criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, self.number_of_iterations,  self.termination_eps)
			cc, warp_matrix = cv2.findTransformECC(im1_gray, im2_gray, warp_matrix, self.warp_mode, criteria)
			warp_matrix = torch.from_numpy(warp_matrix)

			for t in self.tracks:
				t.pos = warp_pos(t.pos, warp_matrix)
				# t.pos = clip_boxes(Variable(pos), blob['im_info'][0][:2]).data

			if self.do_reid:
				for t in self.inactive_tracks:
					t.pos = warp_pos(t.pos, warp_matrix)

			if self.motion_model_cfg['enabled']:
				for t in self.tracks:
					for i in range(len(t.last_pos)):
						t.last_pos[i] = warp_pos(t.last_pos[i], warp_matrix)

	def motion_step(self, track):
		"""Updates the given track's position by one step based on track.last_v"""
		if self.motion_model_cfg['center_only']:
			center_new = get_center(track.pos) + track.last_v
			track.pos = make_pos(*center_new, get_width(track.pos), get_height(track.pos))
		else:
			track.pos = track.pos + track.last_v

	def motion(self):
		"""Applies a simple linear motion model that considers the last n_steps steps."""
		for t in self.tracks:
			last_pos = list(t.last_pos)

			# avg velocity between each pair of consecutive positions in t.last_pos
			if self.motion_model_cfg['center_only']:
				vs = [get_center(p2) - get_center(p1) for p1, p2 in zip(last_pos, last_pos[1:])]
			else:
				vs = [p2 - p1 for p1, p2 in zip(last_pos, last_pos[1:])]

			t.last_v = torch.stack(vs).mean(dim=0)
			self.motion_step(t)

		if self.do_reid:
			for t in self.inactive_tracks:
				if t.last_v.nelement() > 0:
					self.motion_step(t)

	def step(self, blob):
		"""This function should be called every timestep to perform tracking with a blob
		containing the image information.
		"""
		for t in self.tracks:
			# add current position to last_pos list
			t.last_pos.append(t.pos.clone())

		###########################
		# Look for new detections #
		###########################

		self.obj_detect.load_image(blob['img'])

		if self.public_detections:
			dets = blob['dets'].squeeze(dim=0)
			if dets.nelement() > 0:
				boxes, scores = self.obj_detect.predict_boxes(dets)
			else:
				boxes = scores = torch.zeros(0).cuda()
		else:
			boxes, scores = self.obj_detect.detect(blob['img'])

		if boxes.nelement() > 0:
			boxes = clip_boxes_to_image(boxes, blob['img'].shape[-2:])

			# Filter out tracks that have too low person score
			inds = torch.gt(scores, self.detection_person_thresh).nonzero().view(-1)
		else:
			inds = torch.zeros(0).cuda()

		if inds.nelement() > 0:
			det_pos = boxes[inds]

			det_scores = scores[inds]
		else:
			det_pos = torch.zeros(0).cuda()
			det_scores = torch.zeros(0).cuda()

		##################
		# Predict tracks #
		##################

		num_tracks = 0
		nms_inp_reg = torch.zeros(0).cuda()
		if len(self.tracks):
			# align
			if self.do_align:
				self.align(blob)

			# apply motion model
			if self.motion_model_cfg['enabled']:
				self.motion()
				self.tracks = [t for t in self.tracks if t.has_positive_area()]

			# regress
			person_scores = self.regress_tracks(blob)

			if len(self.tracks):
				# create nms input

				# nms here if tracks overlap
				keep = nms(self.get_pos(), person_scores, self.regression_nms_thresh)

				self.tracks_to_inactive([self.tracks[i] for i in list(range(len(self.tracks))) if i not in keep])

				if keep.nelement() > 0 and self.do_reid:
						new_features = self.get_appearances(blob)
						self.add_features(new_features)

		#####################
		# Create new tracks #
		#####################

		# !!! Here NMS is used to filter out detections that are already covered by tracks. This is
		# !!! done by iterating through the active tracks one by one, assigning them a bigger score
		# !!! than 1 (maximum score for detections) and then filtering the detections with NMS.
		# !!! In the paper this is done by calculating the overlap with existing tracks, but the
		# !!! result stays the same.
		if det_pos.nelement() > 0:
			keep = nms(det_pos, det_scores, self.detection_nms_thresh)
			det_pos = det_pos[keep]
			det_scores = det_scores[keep]

			# check with every track in a single run (problem if tracks delete each other)
			for t in self.tracks:
				nms_track_pos = torch.cat([t.pos, det_pos])
				nms_track_scores = torch.cat(
					[torch.tensor([2.0]).to(det_scores.device), det_scores])
				keep = nms(nms_track_pos, nms_track_scores, self.detection_nms_thresh)

				keep = keep[torch.ge(keep, 1)] - 1

				det_pos = det_pos[keep]
				det_scores = det_scores[keep]
				if keep.nelement() == 0:
					break

		if det_pos.nelement() > 0:
			new_det_pos = det_pos
			new_det_scores = det_scores

			# try to reidentify tracks
			new_det_pos, new_det_scores, new_det_features = self.reid(blob, new_det_pos, new_det_scores)

			# add new
			if new_det_pos.nelement() > 0:
				self.add(new_det_pos, new_det_scores, new_det_features)

		####################
		# Generate Results #
		####################

		for t in self.tracks:
			if t.id not in self.results.keys():
				self.results[t.id] = {}
			self.results[t.id][self.im_index] = np.concatenate([t.pos[0].cpu().numpy(), np.array([t.score])])

		for t in self.inactive_tracks:
			t.count_inactive += 1

		self.inactive_tracks = [
			t for t in self.inactive_tracks if t.has_positive_area() and t.count_inactive <= self.inactive_patience
		]

		self.im_index += 1
		self.last_image = blob['img'][0]

	def get_results(self):
		return self.results


class Track(object):
	"""This class contains all necessary for every individual track."""

	def __init__(self, pos, score, track_id, features, inactive_patience, max_features_num, mm_steps):
		self.id = track_id
		self.pos = pos
		self.score = score
		self.features = deque([features])
		self.ims = deque([])
		self.count_inactive = 0
		self.inactive_patience = inactive_patience
		self.max_features_num = max_features_num
		self.last_pos = deque([pos.clone()], maxlen=mm_steps + 1)
		self.last_v = torch.Tensor([])
		self.gt_id = None

	def has_positive_area(self):
		return self.pos[0, 2] > self.pos[0, 0] and self.pos[0, 3] > self.pos[0, 1]

	def add_features(self, features):
		"""Adds new appearance features to the object."""
		self.features.append(features)
		if len(self.features) > self.max_features_num:
			self.features.popleft()

	def test_features(self, test_features):
		"""Compares test_features to features of this Track object"""
		if len(self.features) > 1:
			features = torch.cat(list(self.features), dim=0)
		else:
			features = self.features[0]
		features = features.mean(0, keepdim=True)
		dist = F.pairwise_distance(features, test_features, keepdim=True)
		return dist

	def reset_last_pos(self):
		self.last_pos.clear()
		self.last_pos.append(self.pos.clone())