# -*- coding: utf-8 -*- # -------------------------------------------------------- # Implementation-CVPR2015-CNN-for-ReID # Copyright (c) 2017 Ning Ding # Licensed under The MIT License [see LICENSE for details] # Written by Ning Ding # -------------------------------------------------------- import os import sys import h5py import itertools import numpy as np import tensorflow as tf from PIL import Image from tensorflow import keras from tensorflow.keras import backend as K from tensorflow.keras.regularizers import l2 from easydict import EasyDict __C = EasyDict() __C.DATA = EasyDict() __C.DATA.ORIGINAL_FILE = "cuhk-03.mat" __C.DATA.CREATED_FILE = "cuhk-03.hdf5" __C.DATA.INDEX_FILE = "cuhk-03-index.hdf5" __C.DATA.IMAGE_SIZE = (60, 160) __C.DATA.ARRAY_SIZE = (160, 60) __C.DATA.PATTERN = EasyDict() __C.DATA.PATTERN.TRAIN = [1, 0, 0] __C.DATA.PATTERN.VALID = [1, 0] __C.TRAIN = EasyDict() __C.TRAIN.BATCHSIZE = 150 __C.TRAIN.STEPS = 2000 __C.TRAIN.WEIGHT_DECAY = 0.00025 __C.TRAIN.GPU_INDEX = 0 __C.VALID = EasyDict() __C.VALID.STEPS = 1 cfg = __C os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(cfg.TRAIN.GPU_INDEX) # ------------------------------------------------------- # ------------------------------------------------------- # model section # @export_estimator def generate_model(weight_decay=cfg.TRAIN.WEIGHT_DECAY): # Input Pair of images x1_input = keras.layers.Input(shape=(*cfg.DATA.ARRAY_SIZE, 3), name="x1_input") x2_input = keras.layers.Input(shape=(*cfg.DATA.ARRAY_SIZE, 3), name="x2_input") # Tied Convolution with max pooling share_conv_1 = keras.layers.Conv2D(20, 5, activation="relu", kernel_regularizer=l2(weight_decay), name="share_conv_1") x1 = share_conv_1(x1_input) x2 = share_conv_1(x2_input) x1 = keras.layers.MaxPooling2D(pool_size=(2, 2))(x1) x2 = keras.layers.MaxPooling2D(pool_size=(2, 2))(x2) share_conv_2 = keras.layers.Conv2D(25, 5, activation="relu", kernel_regularizer=l2(weight_decay), name="share_conv_2") x1 = share_conv_2(x1) x2 = share_conv_2(x2) x1 = keras.layers.MaxPooling2D(pool_size=(2, 2))(x1) x2 = keras.layers.MaxPooling2D(pool_size=(2, 2))(x2) # Cross-Input Neighborhood Differences x1_up = keras.layers.UpSampling2D(size=(5, 5))(x1) x2_up = keras.layers.UpSampling2D(size=(5, 5))(x2) x1_nn = keras.layers.Lambda(_upsample_neighbor_function)(x1) x2_nn = keras.layers.Lambda(_upsample_neighbor_function)(x2) x1_nn = keras.layers.Lambda(lambda x: -x)(x1_nn) x2_nn = keras.layers.Lambda(lambda x: -x)(x2_nn) x1 = keras.layers.Add()([x1_up, x2_nn]) x2 = keras.layers.Add()([x2_up, x1_nn]) # Patch Summary Features conv_3_1 = keras.layers.Conv2D(25, 5, strides=(5, 5), activation="relu", kernel_regularizer=l2(weight_decay), name="conv_3_1") conv_3_2 = keras.layers.Conv2D(25, 5, strides=(5, 5), activation="relu", kernel_regularizer=l2(weight_decay), name="conv_3_2") x1 = conv_3_1(x1) x2 = conv_3_2(x2) # Across-Patch Features conv_4_1 = keras.layers.Conv2D(25, 3, activation="relu", kernel_regularizer=l2(weight_decay), name="conv_4_1") conv_4_2 = keras.layers.Conv2D(25, 3, activation="relu", kernel_regularizer=l2(weight_decay), name="conv_4_2") x1 = conv_4_1(x1) x2 = conv_4_2(x2) x1 = keras.layers.MaxPooling2D(pool_size=(2, 2), padding='same')(x1) x2 = keras.layers.MaxPooling2D(pool_size=(2, 2), padding='same')(x2) # Higher-Order Relationships y = keras.layers.Concatenate()([x1, x2]) y = keras.layers.Flatten()(y) y = keras.layers.Dense(500, kernel_regularizer=l2(weight_decay), activation='relu')(y) y = keras.layers.Dense(2, kernel_regularizer=l2(weight_decay), activation='softmax')(y) model = keras.Model(inputs=[x1_input, x2_input], outputs=[y]) model.summary() model = _compile_model(model) return model def _upsample_neighbor_function(input_x): input_x_pad = K.spatial_2d_padding(input_x, padding=((2, 2), (2, 2))) x_length = K.int_shape(input_x)[1] y_length = K.int_shape(input_x)[2] output_x_list = [] output_y_list = [] for i_x in range(2, x_length + 2): for i_y in range(2, y_length + 2): output_y_list.append(input_x_pad[:, i_x-2:i_x+3, i_y-2:i_y+3, :]) output_x_list.append(K.concatenate(output_y_list, axis=2)) output_y_list = [] return K.concatenate(output_x_list, axis=1) def _compile_model(model): sgd = tf.keras.optimizers.SGD(lr=0.01, momentum=0.9) model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) return model # ------------------------------------------------------- # dataset preparation section def get_data_generator(mode='train', pattern=cfg.DATA.PATTERN.TRAIN): def gen_data(): for pos_or_neg in itertools.cycle(pattern): if pos_or_neg: image_x, image_y = _generate_positive_pair(mode) yield ((image_x, image_y), (1, 0)) else: image_x, image_y = _generate_negative_pair(mode) yield ((image_x, image_y), (0, 1)) return gen_data def _generate_positive_pair(mode='train'): with h5py.File(cfg.DATA.CREATED_FILE, 'r') as f: index_array = _get_index_array(mode) i = np.random.choice(index_array) x, y = np.random.choice(f[str(i)].shape[0], 2, replace=False) image_x = _image_augmentation(f[str(i)][x]) image_y = _image_augmentation(f[str(i)][y]) return image_x, image_y def _generate_negative_pair(mode='train'): with h5py.File(cfg.DATA.CREATED_FILE, 'r') as f: index_array = _get_index_array(mode) i, j = np.random.choice(index_array, 2, replace=False) x = np.random.choice(f[str(i)].shape[0], replace=False) y = np.random.choice(f[str(j)].shape[0], replace=False) image_x = f[str(i)][x] image_y = f[str(j)][y] return image_x, image_y def _get_index_array(mode='train'): with h5py.File(cfg.DATA.INDEX_FILE, 'r') as f: index_array = f[mode][:] return index_array def _image_augmentation(image): x_padding = int(np.round(image.shape[0] * 0.05)) y_padding = int(np.round(image.shape[1] * 0.05)) padding_shape = x_padding * 2, y_padding * 2 image_shape = np.array(image.shape[:2]) + np.array(padding_shape) image_padding = np.zeros((image_shape[0]+padding_shape[0], image_shape[1]+padding_shape[1], 3)) image_padding[x_padding:x_padding+image.shape[0], y_padding:y_padding+image.shape[1], :] = image x_translation = np.random.choice(x_padding * 2) y_translation = np.random.choice(y_padding * 2) new_image = image_padding[x_translation:x_translation+image.shape[0], y_translation:y_translation+image.shape[1], :] return new_image def _get_cmc_data(): view_a, view_b = [], [] with h5py.File(cfg.DATA.CREATED_FILE, 'r') as f: index_array = _get_index_array('test') for i in index_array: x, y = np.random.choice(f[str(i)].shape[0], 2, replace=False) view_a.append(f[str(i)][x]) view_b.append(f[str(i)][y]) return view_a, view_b def compute_cmc(model, rank=1): view_a, view_b = _get_cmc_data() view_b_array = np.array(view_b) num = 0 for i, image in enumerate(view_a): x = np.array([image] * 100) result = model.predict_on_batch([x, view_b_array]) args = result[:,0].argsort() args = args[::-1] args = args[:rank] if i in args: num += 1 return num / 100 # ------------------------------------------------------- # dataset creation section def generate_data(): with h5py.File(cfg.DATA.ORIGINAL_FILE, 'r') as fr, h5py.File(cfg.DATA.CREATED_FILE, 'w') as fw: """f[f[f['labeled'][0][i]][j][k]] get a HDF5 Dataset i: index from 0-4 denoted different cameras. j: index from 0-9 denoted different photos of identities. k: index denoted different identities captured by the camera. notes: 1. not all j range of 0-9, it may only range 0-5. 2. the numpy arrays are with different size. 3. the numpy array's dimenstion: channels, width, height. 4. there are 1360 identities from index i 0-2. 5. the five camera each contain: 843, 440, 77, 58, 49 identities. """ for i in range(3): for k in range(_get_identity_size(fr, i)): print("Now generated {} identities.".format(_compute_index(i, k) + 1)) temp = [] for j in range(10): array = _get_array(fr, i, j, k) if array is not None: temp.append(array) fw.create_dataset(str(_compute_index(i, k)), data=np.array(temp)) print("HDF5 Dataset Already Created.") def random_split_dataset(): index_test = np.random.choice(1360, 100, replace=False) res = np.array(list((set(range(1360)) - set(index_test)))) index_valid = np.random.choice(res, 100, replace=False) index_train = np.array(list((set(res) - set(index_valid)))) with h5py.File(cfg.DATA.INDEX_FILE, 'w') as f: f.create_dataset('train', data=index_train) f.create_dataset('valid', data=index_valid) f.create_dataset('test', data=index_test) print("Index Dataset Already Created.") def image_preprocessing(transpose=(2, 1, 0), image_size=(60, 160)): def image_preprocessing_decorator(fn): def updated_fn(*args, **kw): result = fn(*args, **kw) result = result if len(result.shape) == 3 else None if result is not None: image = Image.fromarray(result[:].transpose(transpose)) image = image.resize(image_size) # default return (160,60,3) 0-1 dtype=float64 numpy array. return np.array(image) / 255. else: return None return updated_fn return image_preprocessing_decorator @image_preprocessing(image_size=cfg.DATA.IMAGE_SIZE) def _get_array(File, camera, num, identities): return File[File[File['labeled'][0][camera]][num][identities]] def _get_identity_size(File, camera): return File[File['labeled'][0][camera]][0].size def _compute_index(i, k): if i == 0: return k elif i == 1: return k+843 elif i == 2: return k+843+440 # ------------------------------------------------------- # training section def train_input_fn(): dataset = tf.data.Dataset.from_generator( get_data_generator(), ((tf.float32, tf.float32), tf.int8), ((tf.TensorShape([*cfg.DATA.ARRAY_SIZE, 3]), tf.TensorShape([*cfg.DATA.ARRAY_SIZE, 3])), tf.TensorShape(None)) ) dataset = dataset.batch(batch_size=cfg.TRAIN.BATCHSIZE) dataset = dataset.prefetch(buffer_size=cfg.TRAIN.BATCHSIZE) return dataset def valid_input_fn(): dataset = tf.data.Dataset.from_generator( get_data_generator(mode='valid', pattern=cfg.DATA.PATTERN.VALID), ((tf.float32, tf.float32), tf.int8), ((tf.TensorShape([*cfg.DATA.ARRAY_SIZE, 3]), tf.TensorShape([*cfg.DATA.ARRAY_SIZE, 3])), tf.TensorShape(None))) dataset = dataset.batch(200) dataset = dataset.prefetch(buffer_size=200) return dataset def prepare_keras_callback(): callbacks = [] callback_lrs = tf.keras.callbacks.LearningRateScheduler( schedule=_learning_rate_schedule, verbose=1) callbacks.append(callback_lrs) callback_tensorboard = tf.keras.callbacks.TensorBoard( log_dir='./logs', histogram_freq=0, write_graph=True, write_grads=True, write_images=True) callbacks.append(callback_tensorboard) callback_mcp = tf.keras.callbacks.ModelCheckpoint( filepath='weights.{epoch:06d}-{val_loss:.4f}.checkpoint', monitor='val_loss', verbose=1, mode='auto', period=100) callbacks.append(callback_mcp) return callbacks def _learning_rate_schedule(epoch): step = epoch * 100 learning_rate = 0.01 * (1 + 0.0001 * step) ** (-0.75) return learning_rate # ------------------------------------------------------- # decorator of turning keras model to tensorflow estimator. def export_estimator(fn): def new_fn(*arg, **kw): model = fn(*arg, **kw) estimator = tf.keras.estimator.model_to_estimator(model) return estimator return new_fn # ------------------------------------------------------- # file check def dataset_file_check(): _check_created_dataset() _check_index_dataset() def _check_created_dataset(): if not os.path.exists(cfg.DATA.CREATED_FILE): print("Can't find the created HDF5 dataset file.") print("Would you like to Create a new one?") cmd = input("yes(y) or no(n)?") if cmd == 'y': _check_original_dataset() generate_data() else: sys.exit() def _check_original_dataset(): if not os.path.exists(cfg.DATA.ORIGINAL_FILE): print("Can't find the original dataset file.") print("Usually named: 'cuhk-03.mat'.") print("Find it, and come back lator.") sys.exit() def _check_index_dataset(): if not os.path.exists(cfg.DATA.INDEX_FILE): print("File is not Exists.") print("Creating new index file.") random_split_dataset() if __name__ == '__main__': dataset_file_check() model = generate_model() train_dataset = train_input_fn() valid_dataset = valid_input_fn() callbacks = prepare_keras_callback() history = model.fit(train_dataset, validation_data=valid_dataset, callbacks=callbacks, verbose=1, steps_per_epoch=100, epochs=cfg.TRAIN.STEPS, validation_steps=1)