# ===================================================================== # cnn_lcd.py - CNNs for loop-closure detection in vSLAM systems. # Copyright (C) 2018 Zach Carmichael # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # ===================================================================== from __future__ import (print_function, division, unicode_literals, absolute_import) import argparse import os import sys import numpy as np from scipy.signal import medfilt from sklearn.metrics import (precision_recall_curve, average_precision_score, precision_score, recall_score) from sklearn.cluster import KMeans # local imports from cnn_models import get_model_features, is_valid_model from dataset import get_dataset import matplotlib.pyplot as plt if 'DISPLAY' not in os.environ.keys(): import matplotlib as mpl mpl.use('Agg') del mpl plt.rcParams.update({'font.size': 12, 'font.family': 'Times New Roman'}) def get_descriptors(imgs, feat_func, pca=True, pca_dim=500, eps=1e-5, cache=None, name=''): """ Returns feature descriptor vector for given image(s). Method follows procedure adapted from Zhang et al.: 'Loop Closure Detection for Visual SLAM Systems Using Convolutional Neural Network' Args: imgs: Iterable of images each of shape (h,w,c) feat_func: Function that takes imgs and cache as arguments, and returns CNN features and cache pca: Whether to perform PCA (and whitening) pca_dim: Dimension to reduce vectors to eps: Small value to prevent division by 0 cache: Dict containing descs/other cached values name: Name used for caching """ if cache is None: cache = {} if name + 'pdescs' not in cache: # Get features from network descs, cache = feat_func(imgs, cache) # Ensure features as vectors descs = descs.reshape(len(imgs), -1) print(descs.shape) # L2 Norm descs = descs / np.linalg.norm(descs, axis=1)[:, None] cache.update({name + 'pdescs': descs}) else: descs = cache[name + 'pdescs'] if pca: print('Performing PCA with pca_dim={}'.format(pca_dim)) descs, cache = pca_red(descs, pca_dim, eps=eps, whiten=True, cache=cache, name=name) print('PCA done.') return descs, cache def pca_red(descs, dim, eps=1e-5, whiten=True, cache=None, name=''): """ Performs PCA + whitening on image descriptors Args: descs: input matrix of image descriptors dim: the number of principal components to reduce descs to eps: small epsilon to avoid 0-division whiten: whether to whiten the principal components cache: PCA cache (see name parameter) name: used to differentiate different cached value between models Returns: descs: the descs post-reduction cache: the (updated) cache """ if cache is None: cache = {} # Zero-center data dmean = descs.mean(axis=0) descs = descs - dmean if name + 'S' not in cache or name + 'U' not in cache: # Compute covariance matrix cov = descs.T.dot(descs) / (descs.shape[0] - 1) # Apply SVD U, S, W = np.linalg.svd(cov) cache.update({name + 'U': U, name + 'S': S}) else: U, S = cache[name + 'U'], cache[name + 'S'] # Project onto principal axes descs = descs.dot(U[:, :dim]) # Whiten if whiten: descs = descs / np.sqrt(S[:dim] + eps) return descs, cache def cluster_kmeans(sim): """Run k-means on similarity matrix and segment""" sim_dim = sim.shape[0] sim = sim.reshape(-1, 1) # Augment with spatial coordinates sim_aug = np.concatenate( [sim, np.mgrid[:sim_dim, :sim_dim].reshape(-1, sim_dim ** 2).T], axis=1 ) # Empirical metric for number of loop-closures given number of images # in sequence (assumption: equally-spaced samples): n_clusters = int(np.sqrt(sim_dim)) print('Performing clustering via KMeans(n={}).'.format(n_clusters)) km = KMeans(n_clusters=n_clusters, n_jobs=2, max_iter=300) labels = km.fit_predict(sim_aug) print('Got cluster labels') for i in range(n_clusters): lab_idx = (labels == i) if lab_idx.size: cc = sim[lab_idx].mean() # cc = sim[lab_idx].max() sim[lab_idx] = cc # Re-normalize and reshape sim = sim.reshape(sim_dim, sim_dim) / sim.max() return sim def median_filter(sim, gt, k_size=None): """ Apply median filtering and tune kernel size if applicable. Args: sim: The similarity matrix gt: The ground truth matrix k_size: The square kernel size Returns: sim: filtered similarity matrix """ # NOTE: only lower triangular part of matrix actually requires filtering tri_idx = np.tril_indices(gt.shape[0], -1) if k_size is None: print('Sweeping median kernel sizes.') best_ks = None # Compute baseline AP (no median filtering) best_ap = average_precision_score(gt[tri_idx], sim[tri_idx]) best_sim = sim k_sizes = list(range(1, 61, 2)) # Compute similarity matrix for ks in k_sizes: sim_filtered = medfilt(sim, kernel_size=ks) # Re-normalize sim_filtered = sim_filtered / sim_filtered.max() ks_ap = average_precision_score(gt[tri_idx], sim_filtered[tri_idx]) if ks_ap > best_ap: best_ks = ks best_ap = ks_ap best_sim = sim_filtered print('Finished with ks={} (AP={}). Best so far={}'.format(ks, ks_ap, best_ks)) print('Best ks={} yielded an AP of {}%.'.format(best_ks, best_ap * 100)) sim = best_sim else: print('Filtering with median kernel with kernel size {}.'.format(k_size)) sim = medfilt(sim, kernel_size=k_size) # Re-normalize sim = sim / sim.max() ap = average_precision_score(gt[tri_idx], sim[tri_idx]) print('Median filter with ks={} on sim yielded an AP of {}%.'.format(k_size, ap * 100)) return sim def similarity_matrix(descs, gt, median=True, cluster=False, plot=False, k_size=None, name=''): """ Compute pairwise similarity between descriptors. Using provided gt to find best parameters given function args. Args: descs: feature descriptors of shape (n, d) gt: the ground truth median: whether to use median filtering (chooses median value that obtains highest avg precision... cluster: whether to cluster plot: whether to plot matrix k_size: specify None to sweep, otherwise the value to use name: name for plot file+cache """ print('Computing similarity matrix...') n = descs.shape[0] diffs = np.zeros((n, n)) # Compute L2 norm of each vector norms = np.linalg.norm(descs, axis=1) descs_norm = descs / norms[:, None] # Compute similarity of every vector with every vector for i, desc in enumerate(descs): # Compute difference diff = np.linalg.norm(descs_norm - descs_norm[i], axis=1) diffs[i] = diff # Compute max difference dmax = diffs.max() # Normalize difference and create sim matrix sim = 1. - (diffs / dmax) assert gt.shape[0] == sim.shape[0] if cluster: sim = cluster_kmeans(sim) if median: sim = median_filter(sim, gt, k_size=k_size) if plot: f, ax = plt.subplots() cax = ax.imshow(sim, cmap='coolwarm', interpolation='nearest', vmin=0., vmax=1.) cbar = f.colorbar(cax, ticks=[0, 0.5, 1]) cbar.ax.set_yticklabels(['0', '0.5', '1']) plt.savefig('simplot_{}.png'.format(name), format='png', dpi=150) plt.show() # Preprocess gt... gt = gt.copy() gt += gt.T # add transpose gt += np.eye(gt.shape[0], dtype=gt.dtype) # Plot f, ax = plt.subplots(1, 2) ax[0].imshow(sim, cmap='coolwarm', interpolation='nearest', vmin=0., vmax=1.) ax[0].set_axis_off() ax[0].set_title('Similarity Matrix') ax[1].imshow(gt, cmap='gray', interpolation='nearest', vmin=0., vmax=1.) ax[1].set_axis_off() ax[1].set_title('Ground Truth') plt.savefig('simplot_w_gt_{}.png'.format(name), format='png', dpi=150) plt.show() return sim def mean_per_class_accuracy(y_true, y_pred, n_classes=None, labels=None): """ Computes mean per-class accuracy Args: y_true: the true labels y_pred: the predicted labels n_classes: the number of classes, optional. If not provided, the number of unique classes or length of `labels` if provided. labels: the unique labels, optional. If not provided, unique labels are used if `n_classes` not provided, otherwise range(n_classes). Returns: mean per-class accuracy """ if n_classes is None: if labels is None: labels = np.unique(y_true) n_classes = len(labels) elif labels is None: labels = np.arange(n_classes) elif len(labels) != n_classes: raise ValueError('Number of classes specified ({}) differs from ' 'number of labels ({}).'.format(n_classes, len(labels))) acc = 0. for c in labels: c_mask = (y_true == c) c_count = c_mask.sum() if c_count: # Avoid division by 0 # Add accuracy for class c acc += np.logical_and(c_mask, (y_pred == c)).sum() / c_count # Mean accuracy per class return acc / n_classes def compute_and_plot_scores(sim, gt, model_name): """ Computes relevant metrics and plots results. Args: sim: Similarity matrix gt: Ground truth matrix model_name: Name of the model for logging """ # Modify sim matrix to get "real" vector of loop-closures # symmetric matrix, take either diagonal matrix, rid diagonal sim = sim[np.tril_indices(sim.shape[0], -1)] # Ground truth only present in lower diagonal for Oxford datasets gt = gt[np.tril_indices(gt.shape[0], -1)] # Compute PR-curve precision, recall, thresholds = precision_recall_curve(gt, sim) average_precision = average_precision_score(gt, sim) print('Average Precision: {}'.format(average_precision)) best_macc = 0. best_mthresh = None # Compute the best MPC-accuracy at hard-coded thresholds thresholds = np.arange(0, 1.02, 0.02) for thresh in thresholds: sim_thresh = np.zeros_like(sim) sim_thresh[sim >= thresh] = 1 macc = mean_per_class_accuracy(gt, sim_thresh, n_classes=2) if macc > best_macc: best_macc = macc best_mthresh = thresh sim_mthresh = np.zeros_like(sim) sim_mthresh[sim >= best_mthresh] = 1 precision_at_mthresh = precision_score(gt, sim_mthresh) recall_at_mthresh = recall_score(gt, sim_mthresh) print('Best MPC-ACC (thresh={}): {}'.format(best_mthresh, best_macc)) print('Precision (thresh={}): {}'.format(best_mthresh, precision_at_mthresh)) print('Recall (thresh={}): {}'.format(best_mthresh, recall_at_mthresh)) plt.step(recall, precision, color='b', alpha=0.2, where='post') plt.fill_between(recall, precision, step='post', alpha=0.2, color='b') plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('2-class Precision-Recall curve: AP={0:0.3f}'.format( average_precision)) plt.savefig('precision-recall_curve_{}.png'.format(model_name), format='png', dpi=150) plt.show() def main(args): model_name = args.model # Check specified model if is_valid_model(model_name): # Create weights path weights_path = os.path.join(args.weights_dir, args.weights_base) weights_path = weights_path.format(args.overfeat) # Create feature function feat_func = lambda _imgs, _cache: get_model_features(_imgs, model_name, overfeat_weights_path=weights_path, overfeat_typ=args.overfeat, layer=args.layer, cache=_cache) else: print('Unknown model type: {}'.format(model_name)) sys.exit(1) # Load dataset imgs, gt = get_dataset(args.dataset, args.debug) if args.plot_gt: plt.figure() plt.imshow(gt, cmap='gray', interpolation='nearest') plt.savefig('{}_gt_plot.png'.format(args.dataset), format='png', dpi=150) plt.show() sys.exit(0) # Compute feature descriptors descs, cache = get_descriptors(imgs, feat_func, pca=True, pca_dim=500, eps=1e-5, cache=None, name=model_name) # Kernel sizes for median filter if args.sweep_median: k_size = None elif args.dataset.lower() == 'city': k_size = 17 # BEST HARD-CODED PARAMETER FROM SWEEP: ```range(1,61,2)``` elif args.dataset.lower() == 'college': k_size = 11 # BEST HARD-CODED PARAMETER FROM SWEEP: ```range(1,61,2)``` else: k_size = None # SWEEP # Compute similarity matrix sim = similarity_matrix(descs, gt, plot=True, cluster=args.cluster, median=True, k_size=k_size, name='_'.join([args.dataset, model_name])) assert sim.shape == gt.shape, 'sim and gt not the same shape: {} != {}'.format(sim.shape, gt.shape) compute_and_plot_scores(sim, gt, model_name) if __name__ == '__main__': # Parse CLI args parser = argparse.ArgumentParser(description='CNNs for loop-closure ' 'detection.') parser.add_argument('model', type=str, help='Model name: [overfeat, inception_v{1,2,3,4}, nasnet, resnet_v2_152]') parser.add_argument('--dataset', type=str, help='Either "city" or "college".', default='city') parser.add_argument('--overfeat', type=int, help='0 for small network, 1 for large', default=1) parser.add_argument('--weights_dir', type=str, default='OverFeat/data/default', help='Weights directory.') parser.add_argument('--weights_base', type=str, default='net_weight_{}', help='Basename of weights file.') parser.add_argument('--layer', type=int, default=None, help='Layer number to extract features from.') parser.add_argument('--plot_gt', action='store_true', help='Plots heat-map of ground truth and exits') parser.add_argument('--cluster', action='store_true', help='Additionally performs clustering on sim matrix.') parser.add_argument('--sweep_median', action='store_true', help='Sweep median filter size values.') parser.add_argument('--debug', action='store_true', help='Use small number of images to debug code') args = parser.parse_args() # Start program main(args)