"""Functions for building the face recognition network. """ # MIT License # # Copyright (c) 2016 David Sandberg # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # pylint: disable=missing-docstring from __future__ import absolute_import from __future__ import division from __future__ import print_function import os from subprocess import Popen, PIPE import tensorflow as tf import numpy as np from scipy import misc from sklearn.model_selection import KFold from scipy import interpolate from tensorflow.python.training import training import random import re from tensorflow.python.platform import gfile import math from six import iteritems def triplet_loss(anchor, positive, negative, alpha): """Calculate the triplet loss according to the FaceNet paper Args: anchor: the embeddings for the anchor images. positive: the embeddings for the positive images. negative: the embeddings for the negative images. Returns: the triplet loss according to the FaceNet paper as a float tensor. """ with tf.variable_scope('triplet_loss'): pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1) neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1) basic_loss = tf.add(tf.subtract(pos_dist,neg_dist), alpha) loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0) return loss def center_loss(features, label, alfa, nrof_classes): """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" (http://ydwen.github.io/papers/WenECCV16.pdf) """ nrof_features = features.get_shape()[1] centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) label = tf.reshape(label, [-1]) centers_batch = tf.gather(centers, label) diff = (1 - alfa) * (centers_batch - features) centers = tf.scatter_sub(centers, label, diff) with tf.control_dependencies([centers]): loss = tf.reduce_mean(tf.square(features - centers_batch)) return loss, centers def get_image_paths_and_labels(dataset): image_paths_flat = [] labels_flat = [] for i in range(len(dataset)): image_paths_flat += dataset[i].image_paths labels_flat += [i] * len(dataset[i].image_paths) return image_paths_flat, labels_flat def shuffle_examples(image_paths, labels): shuffle_list = list(zip(image_paths, labels)) random.shuffle(shuffle_list) image_paths_shuff, labels_shuff = zip(*shuffle_list) return image_paths_shuff, labels_shuff def random_rotate_image(image): angle = np.random.uniform(low=-10.0, high=10.0) return misc.imrotate(image, angle, 'bicubic') # 1: Random rotate 2: Random crop 4: Random flip 8: Fixed image standardization 16: Flip RANDOM_ROTATE = 1 RANDOM_CROP = 2 RANDOM_FLIP = 4 FIXED_STANDARDIZATION = 8 FLIP = 16 def create_input_pipeline(input_queue, image_size, nrof_preprocess_threads, batch_size_placeholder): images_and_labels_list = [] for _ in range(nrof_preprocess_threads): filenames, label, control = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, 3) image = tf.cond(get_control_flag(control[0], RANDOM_ROTATE), lambda:tf.py_func(random_rotate_image, [image], tf.uint8), lambda:tf.identity(image)) image = tf.cond(get_control_flag(control[0], RANDOM_CROP), lambda:tf.random_crop(image, image_size + (3,)), lambda:tf.image.resize_image_with_crop_or_pad(image, image_size[0], image_size[1])) image = tf.cond(get_control_flag(control[0], RANDOM_FLIP), lambda:tf.image.random_flip_left_right(image), lambda:tf.identity(image)) image = tf.cond(get_control_flag(control[0], FIXED_STANDARDIZATION), lambda:(tf.cast(image, tf.float32) - 127.5)/128.0, lambda:tf.image.per_image_standardization(image)) image = tf.cond(get_control_flag(control[0], FLIP), lambda:tf.image.flip_left_right(image), lambda:tf.identity(image)) #pylint: disable=no-member image.set_shape(image_size + (3,)) images.append(image) images_and_labels_list.append([images, label]) image_batch, label_batch = tf.train.batch_join( images_and_labels_list, batch_size=batch_size_placeholder, shapes=[image_size + (3,), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * 100, allow_smaller_final_batch=True) return image_batch, label_batch def get_control_flag(control, field): return tf.equal(tf.mod(tf.floor_div(control, field), 2), 1) def _add_loss_summaries(total_loss): """Add summaries for losses. Generates moving average for all losses and associated summaries for visualizing the performance of the network. Args: total_loss: Total loss from loss(). Returns: loss_averages_op: op for generating moving averages of losses. """ # Compute the moving average of all individual losses and the total loss. loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') losses = tf.get_collection('losses') loss_averages_op = loss_averages.apply(losses + [total_loss]) # Attach a scalar summmary to all individual losses and the total loss; do the # same for the averaged version of the losses. for l in losses + [total_loss]: # Name each loss as '(raw)' and name the moving average version of the loss # as the original loss name. tf.summary.scalar(l.op.name +' (raw)', l) tf.summary.scalar(l.op.name, loss_averages.average(l)) return loss_averages_op def train(total_loss, global_step, optimizer, learning_rate, moving_average_decay, update_gradient_vars, log_histograms=True): # Generate moving averages of all losses and associated summaries. loss_averages_op = _add_loss_summaries(total_loss) # Compute gradients. with tf.control_dependencies([loss_averages_op]): if optimizer=='ADAGRAD': opt = tf.train.AdagradOptimizer(learning_rate) elif optimizer=='ADADELTA': opt = tf.train.AdadeltaOptimizer(learning_rate, rho=0.9, epsilon=1e-6) elif optimizer=='ADAM': opt = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=0.1) elif optimizer=='RMSPROP': opt = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9, epsilon=1.0) elif optimizer=='MOM': opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True) else: raise ValueError('Invalid optimization algorithm') grads = opt.compute_gradients(total_loss, update_gradient_vars) # Apply gradients. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. if log_histograms: for var in tf.trainable_variables(): tf.summary.histogram(var.op.name, var) # Add histograms for gradients. if log_histograms: for grad, var in grads: if grad is not None: tf.summary.histogram(var.op.name + '/gradients', grad) # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies([apply_gradient_op, variables_averages_op]): train_op = tf.no_op(name='train') return train_op def prewhiten(x): mean = np.mean(x) std = np.std(x) std_adj = np.maximum(std, 1.0/np.sqrt(x.size)) y = np.multiply(np.subtract(x, mean), 1/std_adj) return y def crop(image, random_crop, image_size): if image.shape[1]>image_size: sz1 = int(image.shape[1]//2) sz2 = int(image_size//2) if random_crop: diff = sz1-sz2 (h, v) = (np.random.randint(-diff, diff+1), np.random.randint(-diff, diff+1)) else: (h, v) = (0,0) image = image[(sz1-sz2+v):(sz1+sz2+v),(sz1-sz2+h):(sz1+sz2+h),:] return image def flip(image, random_flip): if random_flip and np.random.choice([True, False]): image = np.fliplr(image) return image def to_rgb(img): w, h = img.shape ret = np.empty((w, h, 3), dtype=np.uint8) ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img return ret def load_data(image_paths, do_random_crop, do_random_flip, image_size, do_prewhiten=True): nrof_samples = len(image_paths) images = np.zeros((nrof_samples, image_size, image_size, 3)) for i in range(nrof_samples): img = misc.imread(image_paths[i]) if img.ndim == 2: img = to_rgb(img) if do_prewhiten: img = prewhiten(img) img = crop(img, do_random_crop, image_size) img = flip(img, do_random_flip) images[i,:,:,:] = img return images def get_label_batch(label_data, batch_size, batch_index): nrof_examples = np.size(label_data, 0) j = batch_index*batch_size % nrof_examples if j+batch_size<=nrof_examples: batch = label_data[j:j+batch_size] else: x1 = label_data[j:nrof_examples] x2 = label_data[0:nrof_examples-j] batch = np.vstack([x1,x2]) batch_int = batch.astype(np.int64) return batch_int def get_batch(image_data, batch_size, batch_index): nrof_examples = np.size(image_data, 0) j = batch_index*batch_size % nrof_examples if j+batch_size<=nrof_examples: batch = image_data[j:j+batch_size,:,:,:] else: x1 = image_data[j:nrof_examples,:,:,:] x2 = image_data[0:nrof_examples-j,:,:,:] batch = np.vstack([x1,x2]) batch_float = batch.astype(np.float32) return batch_float def get_triplet_batch(triplets, batch_index, batch_size): ax, px, nx = triplets a = get_batch(ax, int(batch_size/3), batch_index) p = get_batch(px, int(batch_size/3), batch_index) n = get_batch(nx, int(batch_size/3), batch_index) batch = np.vstack([a, p, n]) return batch def get_learning_rate_from_file(filename, epoch): with open(filename, 'r') as f: for line in f.readlines(): line = line.split('#', 1)[0] if line: par = line.strip().split(':') e = int(par[0]) if par[1]=='-': lr = -1 else: lr = float(par[1]) if e <= epoch: learning_rate = lr else: return learning_rate class ImageClass(): "Stores the paths to images for a given class" def __init__(self, name, image_paths): self.name = name self.image_paths = image_paths def __str__(self): return self.name + ', ' + str(len(self.image_paths)) + ' images' def __len__(self): return len(self.image_paths) def get_dataset(path, has_class_directories=True): dataset = [] path_exp = os.path.expanduser(path) classes = [path for path in os.listdir(path_exp) \ if os.path.isdir(os.path.join(path_exp, path))] classes.sort() nrof_classes = len(classes) for i in range(nrof_classes): class_name = classes[i] facedir = os.path.join(path_exp, class_name) image_paths = get_image_paths(facedir) dataset.append(ImageClass(class_name, image_paths)) return dataset def get_image_paths(facedir): image_paths = [] if os.path.isdir(facedir): images = os.listdir(facedir) image_paths = [os.path.join(facedir,img) for img in images] return image_paths def split_dataset(dataset, split_ratio, min_nrof_images_per_class, mode): if mode=='SPLIT_CLASSES': nrof_classes = len(dataset) class_indices = np.arange(nrof_classes) np.random.shuffle(class_indices) split = int(round(nrof_classes*(1-split_ratio))) train_set = [dataset[i] for i in class_indices[0:split]] test_set = [dataset[i] for i in class_indices[split:-1]] elif mode=='SPLIT_IMAGES': train_set = [] test_set = [] for cls in dataset: paths = cls.image_paths np.random.shuffle(paths) nrof_images_in_class = len(paths) split = int(math.floor(nrof_images_in_class*(1-split_ratio))) if split==nrof_images_in_class: split = nrof_images_in_class-1 if split>=min_nrof_images_per_class and nrof_images_in_class-split>=1: train_set.append(ImageClass(cls.name, paths[:split])) test_set.append(ImageClass(cls.name, paths[split:])) else: raise ValueError('Invalid train/test split mode "%s"' % mode) return train_set, test_set def load_model(model, input_map=None): # Check if the model is a model directory (containing a metagraph and a checkpoint file) # or if it is a protobuf file with a frozen graph model_exp = os.path.expanduser(model) if (os.path.isfile(model_exp)): print('Model filename: %s' % model_exp) with gfile.FastGFile(model_exp,'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, input_map=input_map, name='') else: print('Model directory: %s' % model_exp) meta_file, ckpt_file = get_model_filenames(model_exp) print('Metagraph file: %s' % meta_file) print('Checkpoint file: %s' % ckpt_file) saver = tf.train.import_meta_graph(os.path.join(model_exp, meta_file), input_map=input_map) saver.restore(tf.get_default_session(), os.path.join(model_exp, ckpt_file)) def get_model_filenames(model_dir): files = os.listdir(model_dir) meta_files = [s for s in files if s.endswith('.meta')] if len(meta_files)==0: raise ValueError('No meta file found in the model directory (%s)' % model_dir) elif len(meta_files)>1: raise ValueError('There should not be more than one meta file in the model directory (%s)' % model_dir) meta_file = meta_files[0] ckpt = tf.train.get_checkpoint_state(model_dir) if ckpt and ckpt.model_checkpoint_path: ckpt_file = os.path.basename(ckpt.model_checkpoint_path) return meta_file, ckpt_file meta_files = [s for s in files if '.ckpt' in s] max_step = -1 for f in files: step_str = re.match(r'(^model-[\w\- ]+.ckpt-(\d+))', f) if step_str is not None and len(step_str.groups())>=2: step = int(step_str.groups()[1]) if step > max_step: max_step = step ckpt_file = step_str.groups()[0] return meta_file, ckpt_file def distance(embeddings1, embeddings2, distance_metric=0): if distance_metric==0: # Euclidian distance diff = np.subtract(embeddings1, embeddings2) dist = np.sum(np.square(diff),1) elif distance_metric==1: # Distance based on cosine similarity dot = np.sum(np.multiply(embeddings1, embeddings2), axis=1) norm = np.linalg.norm(embeddings1, axis=1) * np.linalg.norm(embeddings2, axis=1) similarity = dot / norm dist = np.arccos(similarity) / math.pi else: raise 'Undefined distance metric %d' % distance_metric return dist def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False): assert(embeddings1.shape[0] == embeddings2.shape[0]) assert(embeddings1.shape[1] == embeddings2.shape[1]) nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) nrof_thresholds = len(thresholds) k_fold = KFold(n_splits=nrof_folds, shuffle=False) tprs = np.zeros((nrof_folds,nrof_thresholds)) fprs = np.zeros((nrof_folds,nrof_thresholds)) accuracy = np.zeros((nrof_folds)) indices = np.arange(nrof_pairs) for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): if subtract_mean: mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0) else: mean = 0.0 dist = distance(embeddings1-mean, embeddings2-mean, distance_metric) # Find the best threshold for the fold acc_train = np.zeros((nrof_thresholds)) for threshold_idx, threshold in enumerate(thresholds): _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set]) best_threshold_index = np.argmax(acc_train) for threshold_idx, threshold in enumerate(thresholds): tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set]) _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set]) tpr = np.mean(tprs,0) fpr = np.mean(fprs,0) return tpr, fpr, accuracy def calculate_accuracy(threshold, dist, actual_issame): predict_issame = np.less(dist, threshold) tp = np.sum(np.logical_and(predict_issame, actual_issame)) fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame))) fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame)) tpr = 0 if (tp+fn==0) else float(tp) / float(tp+fn) fpr = 0 if (fp+tn==0) else float(fp) / float(fp+tn) acc = float(tp+tn)/dist.size return tpr, fpr, acc def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10, distance_metric=0, subtract_mean=False): assert(embeddings1.shape[0] == embeddings2.shape[0]) assert(embeddings1.shape[1] == embeddings2.shape[1]) nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) nrof_thresholds = len(thresholds) k_fold = KFold(n_splits=nrof_folds, shuffle=False) val = np.zeros(nrof_folds) far = np.zeros(nrof_folds) indices = np.arange(nrof_pairs) for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): if subtract_mean: mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0) else: mean = 0.0 dist = distance(embeddings1-mean, embeddings2-mean, distance_metric) # Find the threshold that gives FAR = far_target far_train = np.zeros(nrof_thresholds) for threshold_idx, threshold in enumerate(thresholds): _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set]) if np.max(far_train)>=far_target: f = interpolate.interp1d(far_train, thresholds, kind='slinear') threshold = f(far_target) else: threshold = 0.0 val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set]) val_mean = np.mean(val) far_mean = np.mean(far) val_std = np.std(val) return val_mean, val_std, far_mean def calculate_val_far(threshold, dist, actual_issame): predict_issame = np.less(dist, threshold) true_accept = np.sum(np.logical_and(predict_issame, actual_issame)) false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) n_same = np.sum(actual_issame) n_diff = np.sum(np.logical_not(actual_issame)) val = float(true_accept) / float(n_same) far = float(false_accept) / float(n_diff) return val, far def store_revision_info(src_path, output_dir, arg_string): try: # Get git hash cmd = ['git', 'rev-parse', 'HEAD'] gitproc = Popen(cmd, stdout = PIPE, cwd=src_path) (stdout, _) = gitproc.communicate() git_hash = stdout.strip() except OSError as e: git_hash = ' '.join(cmd) + ': ' + e.strerror try: # Get local changes cmd = ['git', 'diff', 'HEAD'] gitproc = Popen(cmd, stdout = PIPE, cwd=src_path) (stdout, _) = gitproc.communicate() git_diff = stdout.strip() except OSError as e: git_diff = ' '.join(cmd) + ': ' + e.strerror # Store a text file in the log directory rev_info_filename = os.path.join(output_dir, 'revision_info.txt') with open(rev_info_filename, "w") as text_file: text_file.write('arguments: %s\n--------------------\n' % arg_string) text_file.write('tensorflow version: %s\n--------------------\n' % tf.__version__) # @UndefinedVariable text_file.write('git hash: %s\n--------------------\n' % git_hash) text_file.write('%s' % git_diff) def list_variables(filename): reader = training.NewCheckpointReader(filename) variable_map = reader.get_variable_to_shape_map() names = sorted(variable_map.keys()) return names def put_images_on_grid(images, shape=(16,8)): nrof_images = images.shape[0] img_size = images.shape[1] bw = 3 img = np.zeros((shape[1]*(img_size+bw)+bw, shape[0]*(img_size+bw)+bw, 3), np.float32) for i in range(shape[1]): x_start = i*(img_size+bw)+bw for j in range(shape[0]): img_index = i*shape[0]+j if img_index>=nrof_images: break y_start = j*(img_size+bw)+bw img[x_start:x_start+img_size, y_start:y_start+img_size, :] = images[img_index, :, :, :] if img_index>=nrof_images: break return img def write_arguments_to_file(args, filename): with open(filename, 'w') as f: for key, value in iteritems(vars(args)): f.write('%s: %s\n' % (key, str(value)))