from keras.models import Sequential from keras.layers.core import Flatten, Dense, Dropout from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D from keras.optimizers import SGD from keras import backend as K import cv2, numpy as np import math import numpy, scipy from scipy import interpolate import scipy.ndimage import time # the feature size is of 7x7xp, being p the number of channels feature_size = 7 # the relative scale reduction of the shallower feature map compared to the initial image input scale_reduction_shallower_feature = 16 # the relative scale reduction of the deeper feature map compared to the initial image input scale_reduction_deeper_feature = 32 # scaling of the input image factor_x_input = float(1) factor_y_input = float(1) # Interpolation of 2d features for a single channel of a feature map def interpolate_2d_features(features): out_size = feature_size x = np.arange(features.shape[0]) y = np.arange(features.shape[1]) z = features xx = np.linspace(x.min(), x.max(), out_size) yy = np.linspace(y.min(), y.max(), out_size) new_kernel = interpolate.RectBivariateSpline(x, y, z, kx=1, ky=1) kernel_out = new_kernel(xx, yy) return kernel_out # Interpolation 2d of each channel, so we obtain 3d interpolated feature maps def interpolate_3d_features(features): new_features = np.zeros([512, feature_size, feature_size]) for i in range(features.shape[0]): new_features[i, :, :] = interpolate_2d_features(features[i, :, :]) return new_features def pop_layer(model): if not model.outputs: raise Exception('Sequential model cannot be popped: model is empty.') model.layers.pop() if not model.layers: model.outputs = [] model.inbound_nodes = [] model.outbound_nodes = [] else: model.layers[-1].outbound_nodes = [] model.outputs = [model.layers[-1].output] model.built = False return model def get_convolutional_vgg16_compiled(vgg_weights_path): model_vgg = obtain_compiled_vgg_16(vgg_weights_path) for i in range(0, 6): model_vgg = pop_layer(model_vgg) return model_vgg def get_feature_maps(model, img): return [get_feature_map_4(model, img), get_feature_map_8(model, img)] # get deeper feature map def get_feature_map_8(model, im): im = im.astype(np.float32) dim_ordering = K.image_dim_ordering() if dim_ordering == 'th': # 'RGB'->'BGR' im = im[::-1, :, :] # Zero-center by mean pixel im[0, :, :] -= 103.939 im[1, :, :] -= 116.779 im[2, :, :] -= 123.68 else: # 'RGB'->'BGR' im = im[:, :, ::-1] # Zero-center by mean pixel im[:, :, 0] -= 103.939 im[:, :, 1] -= 116.779 im[:, :, 2] -= 123.68 im = im.transpose((2, 0, 1)) im = np.expand_dims(im, axis=0) inputs = [K.learning_phase()] + model.inputs _convout1_f = K.function(inputs, model.outputs) feature_map = _convout1_f([0] + [im]) feature_map = np.array([feature_map]) feature_map = feature_map[0, 0, 0, :, :, :] return feature_map # get shallower feature map def get_feature_map_4(model, im): im = im.astype(np.float32) dim_ordering = K.image_dim_ordering() if dim_ordering == 'th': # 'RGB'->'BGR' im = im[::-1, :, :] # Zero-center by mean pixel im[0, :, :] -= 103.939 im[1, :, :] -= 116.779 im[2, :, :] -= 123.68 else: # 'RGB'->'BGR' im = im[:, :, ::-1] # Zero-center by mean pixel im[:, :, 0] -= 103.939 im[:, :, 1] -= 116.779 im[:, :, 2] -= 123.68 im = im.transpose((2, 0, 1)) im = np.expand_dims(im, axis=0) inputs = [K.learning_phase()] + model.inputs _convout1_f = K.function(inputs, [model.layers[23].output]) feature_map = _convout1_f([0] + [im]) feature_map = np.array([feature_map]) feature_map = feature_map[0, 0, 0, :, :, :] return feature_map def crop_roi(feature_map, coordinates): return feature_map[:, coordinates[0]:coordinates[0]+coordinates[2], coordinates[1]:coordinates[1]+coordinates[3]] # this method decides whether to use the deeper or the shallower feature map # and then crops and interpolates if necessary the features to obtain a final descriptor of 7x7xp def obtain_descriptor_from_feature_map(feature_maps, region_coordinates): initial_width = region_coordinates[2]*factor_x_input initial_height = region_coordinates[3]*factor_y_input scale_aux = math.sqrt(initial_height*initial_width)/math.sqrt(feature_size*feature_size) if scale_aux > scale_reduction_deeper_feature: scale = scale_reduction_deeper_feature feature_map = feature_maps[1] else: scale = scale_reduction_shallower_feature feature_map = feature_maps[0] new_width = initial_width/scale new_height = initial_height/scale if new_width < feature_size: new_width = feature_size if new_height < feature_size: new_height = feature_size xo = region_coordinates[0]/scale yo = region_coordinates[1]/scale feat = np.array([feature_map]) if new_width + xo > feat.shape[2]: xo = feat.shape[2] - new_width if new_height + yo > feat.shape[3]: yo = feat.shape[3] - new_height if xo < 0: xo = 0 if yo < 0: yo = 0 new_coordinates = np.array([xo, yo, new_width, new_height]) roi = crop_roi(feature_map, new_coordinates) if roi.shape[1] < feature_size & roi.shape[2] < feature_size: features = interpolate_3d_features(roi) elif roi.shape[2] < feature_size: features = interpolate_3d_features(roi) elif roi.shape[1] < feature_size: features = interpolate_3d_features(roi) else: features = extract_features_from_roi(roi) return features # ROI-pooling features def extract_features_from_roi(roi): roi_width = roi.shape[1] roi_height = roi.shape[2] new_width = roi_width / feature_size new_height = roi_height / feature_size pooled_values = np.zeros([feature_size, feature_size, 512]) for j in range(512): for i in range(feature_size): for k in range(feature_size): if k == (feature_size-1) & i == (feature_size-1): patch = roi[j, i * new_width:roi_width, k * new_height:roi_height] elif k == (feature_size-1): patch = roi[j, i * new_width:(i + 1) * new_width, k * new_height:roi_height] elif i == (feature_size-1): patch = roi[j, i * new_width:roi_width, k * new_height:(k + 1) * new_height] else: patch = roi[j, i * new_width:(i + 1) * new_width, k * new_height:(k + 1) * new_height] pooled_values[i, k, j] = np.max(patch) return pooled_values def calculate_all_initial_feature_maps(images, model, image_names): initial_feature_maps = [] for z in range(np.size(image_names)): initial_feature_maps.append(get_feature_maps(model, np.array(images[z]))) return initial_feature_maps def get_image_descriptor_for_image(image, model): im = cv2.resize(image, (224, 224)).astype(np.float32) dim_ordering = K.image_dim_ordering() if dim_ordering == 'th': # 'RGB'->'BGR' im = im[::-1, :, :] # Zero-center by mean pixel im[0, :, :] -= 103.939 im[1, :, :] -= 116.779 im[2, :, :] -= 123.68 else: # 'RGB'->'BGR' im = im[:, :, ::-1] # Zero-center by mean pixel im[:, :, 0] -= 103.939 im[:, :, 1] -= 116.779 im[:, :, 2] -= 123.68 im = im.transpose((2, 0, 1)) im = np.expand_dims(im, axis=0) inputs = [K.learning_phase()] + model.inputs _convout1_f = K.function(inputs, [model.layers[33].output]) return _convout1_f([0] + [im]) def get_conv_image_descriptor_for_image(image, model): im = cv2.resize(image, (224, 224)).astype(np.float32) dim_ordering = K.image_dim_ordering() if dim_ordering == 'th': # 'RGB'->'BGR' im = im[::-1, :, :] # Zero-center by mean pixel im[0, :, :] -= 103.939 im[1, :, :] -= 116.779 im[2, :, :] -= 123.68 else: # 'RGB'->'BGR' im = im[:, :, ::-1] # Zero-center by mean pixel im[:, :, 0] -= 103.939 im[:, :, 1] -= 116.779 im[:, :, 2] -= 123.68 im = im.transpose((2, 0, 1)) im = np.expand_dims(im, axis=0) inputs = [K.learning_phase()] + model.inputs _convout1_f = K.function(inputs, [model.layers[31].output]) return _convout1_f([0] + [im]) def obtain_compiled_vgg_16(vgg_weights_path): model = vgg_16(vgg_weights_path) sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss='categorical_crossentropy') return model def vgg_16(weights_path=None): model = Sequential() model.add(ZeroPadding2D((1, 1), input_shape=(3, 224, 224))) model.add(Convolution2D(64, 3, 3, activation='relu')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(64, 3, 3, activation='relu')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(128, 3, 3, activation='relu')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(128, 3, 3, activation='relu')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(256, 3, 3, activation='relu')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(256, 3, 3, activation='relu')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(256, 3, 3, activation='relu')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) model.add(Flatten()) model.add(Dense(4096, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(4096, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(1000, activation='softmax')) if weights_path: model.load_weights(weights_path) return model