import os from keras.layers import BatchNormalization from keras.layers import Conv2D from keras.layers import Dense from keras.layers import GlobalAveragePooling2D from keras.layers import GlobalMaxPooling2D from keras.layers import Input from keras.layers import MaxPool2D from keras.layers import ReLU from keras.layers import add from keras.models import Model from keras.utils import get_source_inputs from keras import backend as K from keras_applications.imagenet_utils import _obtain_input_shape from octave_conv_block import initial_oct_conv_bn_relu, final_oct_conv_bn_relu, oct_conv_bn_relu def _conv_block(ip, filters, kernel_size=(3, 3), strides=(1, 1), padding='same', bias=False): x = Conv2D(filters, kernel_size, strides=strides, padding=padding, use_bias=bias, kernel_initializer='he_normal')(ip) return x def _conv_bn_relu(ip, filters, kernel_size=(3, 3), strides=(1, 1), padding='same', bias=False, activation=True): channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 x = _conv_block(ip, filters, kernel_size, strides, padding, bias) x = BatchNormalization(axis=channel_axis)(x) if activation: x = ReLU()(x) return x def _octresnet_bottleneck_block(ip, filters, alpha=0.5, strides=(1, 1), downsample_shortcut=False, first_block=False, expansion=4): if first_block: x_high_res, x_low_res = initial_oct_conv_bn_relu(ip, filters, kernel_size=(1, 1), alpha=alpha) x_high, x_low = oct_conv_bn_relu(x_high_res, x_low_res, filters, kernel_size=(3, 3), strides=strides, alpha=alpha) else: x_high_res, x_low_res = ip x_high, x_low = oct_conv_bn_relu(x_high_res, x_low_res, filters, kernel_size=(1, 1), alpha=alpha) x_high, x_low = oct_conv_bn_relu(x_high, x_low, filters, kernel_size=(3, 3), strides=strides, alpha=alpha) final_out_filters = int(filters * expansion) x_high, x_low = oct_conv_bn_relu(x_high, x_low, filters=final_out_filters, kernel_size=(1, 1), alpha=alpha, activation=False) if downsample_shortcut: x_high_res, x_low_res = oct_conv_bn_relu(x_high_res, x_low_res, final_out_filters, kernel_size=(1, 1), strides=strides, alpha=alpha, activation=False) x_high = add([x_high, x_high_res]) x_low = add([x_low, x_low_res]) x_high = ReLU()(x_high) x_low = ReLU()(x_low) return x_high, x_low def _octresnet_final_bottleneck_block(ip, filters, alpha=0.5, strides=(1, 1), downsample_shortcut=False, expansion=4): x_high_res, x_low_res = ip x_high, x_low = oct_conv_bn_relu(x_high_res, x_low_res, filters, kernel_size=(1, 1), alpha=alpha) x_high, x_low = oct_conv_bn_relu(x_high, x_low, filters, kernel_size=(3, 3), strides=strides, alpha=alpha) final_filters = int(filters * expansion) x_high = final_oct_conv_bn_relu(x_high, x_low, final_filters, kernel_size=(1, 1), activation=False) if downsample_shortcut: x_high_res = final_oct_conv_bn_relu(x_high_res, x_low_res, final_filters, kernel_size=(1, 1), strides=strides, activation=False) x = add([x_high, x_high_res]) x = ReLU()(x) return x def _bottleneck_original(ip, filters, strides=(1, 1), downsample_shortcut=False, expansion=4): final_filters = int(filters * expansion) shortcut = ip x = _conv_bn_relu(ip, filters, kernel_size=(1, 1)) x = _conv_bn_relu(x, filters, kernel_size=(3, 3), strides=strides) x = _conv_bn_relu(x, final_filters, kernel_size=(1, 1), activation=False) if downsample_shortcut: shortcut = _conv_block(shortcut, final_filters, kernel_size=(1, 1), strides=strides) x = add([x, shortcut]) x = ReLU()(x) return x def OctaveResNet(layers, include_top=True, weights=None, input_tensor=None, input_shape=None, pooling=None, classes=1000, alpha=0.5, expansion=1, initial_filters=64, initial_strides=False, **kwargs): """ Instantiates a Octave ResNet architecture. # Arguments layers: list of integers defining the depth of the network and the number of octave conv blocks per level of the network. include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `'channels_last'` data format) or `(3, 224, 224)` (with `'channels_first'` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(200, 200, 3)` would be one valid value. pooling: optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. alpha: float between [0, 1]. Defines the ratio of filters allocated to the high frequency and low frequency branches of the octave conv. expansion: int/float. Multiplicative factor to increase the number of filters in each octave block. initial_filters: number of filters in the first convolution layer. Determines how many parameters the network will have. initial_strides: bool to determine whether to apply a strided convolution and max pooling before any octave conv block. Set to True for ImageNet models and False for CIFAR models. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. ` ValueError: If `alpha` is < 0 or > 1. ValueError: If `layers` is not a list or a tuple of integers. """ if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as `"imagenet"` with `include_top`' ' as true, `classes` should be 1000') if alpha < 0. or alpha > 1.: raise ValueError('`alpha` must be between 0 and 1. Current alpha = ' '%f' % alpha) if type(layers) not in [list, tuple]: raise ValueError('`layers` must be a list/tuple of integers. ' 'Current layers = ', layers) # Force convert all layer values to integers layers = [int(x) for x in layers] # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=32, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor if initial_strides: initial_strides = (2, 2) else: initial_strides = (1, 1) x = _conv_bn_relu(img_input, filters=64, kernel_size=(7, 7), strides=initial_strides) if initial_strides: x = MaxPool2D((3, 3), strides=(2, 2), padding='same')(x) num_filters = initial_filters num_blocks = len(layers) for i in range(num_blocks - 1): for j in range(layers[i]): if j == 0: strides = (2, 2) downsample_shortcut = True else: strides = (1, 1) downsample_shortcut = False # first block has no downsample, no shortcut if i == 0 and j == 0: first_block = True strides = (1, 1) downsample_shortcut = True else: first_block = False x = _octresnet_bottleneck_block(x, num_filters, alpha, strides, downsample_shortcut, first_block, expansion) # double number of filters per block num_filters *= 2 # final block for j in range(layers[-1]): if j == 0: strides = (2, 2) x = _octresnet_final_bottleneck_block(x, num_filters, alpha, strides, downsample_shortcut=True, expansion=expansion) else: strides = (1, 1) x = _bottleneck_original(x, num_filters, strides, expansion=expansion) if include_top: x = GlobalAveragePooling2D(name='avg_pool')(x) x = Dense(classes, activation='softmax', name='fc')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D(name='avg_pool')(x) elif pooling == 'max': x = GlobalMaxPooling2D(name='max_pool')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input model = Model(inputs, x, name='OctaveResNet') return model def OctaveResNet50(include_top=True, weights=None, input_tensor=None, input_shape=None, pooling=None, classes=1000, alpha=0.5, expansion=4, initial_filters=64, initial_strides=True, **kwargs): return OctaveResNet([3, 4, 6, 3], include_top, weights, input_tensor, input_shape, pooling, classes, alpha, expansion, initial_filters, initial_strides, **kwargs) def OctaveResNet101(include_top=True, weights=None, input_tensor=None, input_shape=None, pooling=None, classes=1000, alpha=0.5, expansion=4, initial_filters=64, initial_strides=True, **kwargs): return OctaveResNet([3, 4, 23, 3], include_top, weights, input_tensor, input_shape, pooling, classes, alpha, expansion, initial_filters, initial_strides, **kwargs) def OctaveResNet152(include_top=True, weights=None, input_tensor=None, input_shape=None, pooling=None, classes=1000, alpha=0.5, expansion=4, initial_filters=64, initial_strides=True, **kwargs): return OctaveResNet([3, 8, 36, 3], include_top, weights, input_tensor, input_shape, pooling, classes, alpha, expansion, initial_filters, initial_strides, **kwargs) if __name__ == '__main__': model = OctaveResNet50(input_shape=(256, 256, 3), classes=1000, alpha=0.125, expansion=4, initial_filters=64, initial_strides=True) model.summary()