""" Keras RFCN Copyright (c) 2018 Licensed under the MIT License (see LICENSE for details) Written by parap1uie-s@github.com """ ''' This is base class of RFCN Model Contain some functions like load_weights、find_last...etc ''' import re import keras import tensorflow as tf import datetime from KerasRFCN.Data_generator import data_generator import os import KerasRFCN.Utils import numpy as np class BaseModel(object): """docstring for BaseModel""" def __init__(self, arg): super(BaseModel, self).__init__() self.arg = arg def find_last(self): """Finds the last checkpoint file of the last trained model in the model directory. Returns: log_dir: The directory where events and weights are saved checkpoint_path: the path to the last checkpoint file """ # Get directory names. Each directory corresponds to a model dir_names = next(os.walk(self.model_dir))[1] key = self.config.NAME.lower() dir_names = filter(lambda f: f.startswith(key), dir_names) dir_names = sorted(dir_names) if not dir_names: return None, None # Pick last directory dir_name = os.path.join(self.model_dir, dir_names[-1]) # Find the last checkpoint checkpoints = next(os.walk(dir_name))[2] checkpoints = filter(lambda f: f.startswith("Keras-RFCN"), checkpoints) checkpoints = sorted(checkpoints) if not checkpoints: return dir_name, None checkpoint = os.path.join(dir_name, checkpoints[-1]) return dir_name, checkpoint def load_weights(self, filepath, by_name=False, exclude=None): """Modified version of the correspoding Keras function with the addition of multi-GPU support and the ability to exclude some layers from loading. exlude: list of layer names to excluce """ import h5py # Keras 2.2 use saving try: from keras.engine import saving except ImportError: # Keras before 2.2 used the 'topology' namespace. from keras.engine import topology as saving if exclude: by_name = True if h5py is None: raise ImportError('`load_weights` requires h5py.') f = h5py.File(filepath, mode='r') if 'layer_names' not in f.attrs and 'model_weights' in f: f = f['model_weights'] # In multi-GPU training, we wrap the model. Get layers # of the inner model because they have the weights. keras_model = self.keras_model layers = keras_model.inner_model.layers if hasattr(keras_model, "inner_model")\ else keras_model.layers # Exclude some layers if exclude: layers = filter(lambda l: l.name not in exclude, layers) if by_name: saving.load_weights_from_hdf5_group_by_name(f, layers) else: saving.load_weights_from_hdf5_group(f, layers) if hasattr(f, 'close'): f.close() # Update the log directory self.set_log_dir(filepath) def get_imagenet_weights(self): """Downloads ImageNet trained weights from Keras. Returns path to weights file. """ from keras.utils.data_utils import get_file TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/'\ 'releases/download/v0.2/'\ 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', TF_WEIGHTS_PATH_NO_TOP, cache_subdir='models', md5_hash='a268eb855778b3df3c7506639542a6af') return weights_path def compile(self, learning_rate, momentum): """Gets the model ready for training. Adds losses, regularization, and metrics. Then calls the Keras compile() function. """ # Optimizer object optimizer = keras.optimizers.SGD(lr=learning_rate, momentum=momentum, clipnorm=5.0) # Add Losses # First, clear previously set losses to avoid duplication self.keras_model._losses = [] self.keras_model._per_input_losses = {} loss_names = ["rpn_class_loss", "rpn_bbox_loss", "mrcnn_class_loss", "mrcnn_bbox_loss"] for name in loss_names: layer = self.keras_model.get_layer(name) if layer.output in self.keras_model.losses: continue self.keras_model.add_loss( tf.reduce_mean(layer.output, keepdims=True)) # Add L2 Regularization # Skip gamma and beta weights of batch normalization layers. reg_losses = [keras.regularizers.l2(self.config.WEIGHT_DECAY)(w) / tf.cast(tf.size(w), tf.float32) for w in self.keras_model.trainable_weights if 'gamma' not in w.name and 'beta' not in w.name] self.keras_model.add_loss(tf.add_n(reg_losses)) # Compile self.keras_model.compile(optimizer=optimizer, loss=[ None] * len(self.keras_model.outputs)) # Add metrics for losses for name in loss_names: if name in self.keras_model.metrics_names: continue layer = self.keras_model.get_layer(name) self.keras_model.metrics_names.append(name) self.keras_model.metrics_tensors.append(tf.reduce_mean( layer.output, keepdims=True)) def set_trainable(self, layer_regex, keras_model=None, indent=0, verbose=1): """Sets model layers as trainable if their names match the given regular expression. """ # Print message on the first call (but not on recursive calls) if verbose > 0 and keras_model is None: print("Selecting layers to train") keras_model = keras_model or self.keras_model # In multi-GPU training, we wrap the model. Get layers # of the inner model because they have the weights. layers = keras_model.inner_model.layers if hasattr(keras_model, "inner_model")\ else keras_model.layers for layer in layers: # Is the layer a model? if layer.__class__.__name__ == 'Model': print("In model: ", layer.name) self.set_trainable( layer_regex, keras_model=layer, indent=indent + 4) continue if not layer.weights: continue # Is it trainable? trainable = bool(re.fullmatch(layer_regex, layer.name)) # Update layer. If layer is a container, update inner layer. if layer.__class__.__name__ == 'TimeDistributed': layer.layer.trainable = trainable else: layer.trainable = trainable # Print trainble layer names if trainable and verbose > 0: print("{}{:20} ({})".format(" " * indent, layer.name, layer.__class__.__name__)) def set_log_dir(self, model_path=None): """Sets the model log directory and epoch counter. model_path: If None, or a format different from what this code uses then set a new log directory and start epochs from 0. Otherwise, extract the log directory and the epoch counter from the file name. """ # Set date and epoch counter as if starting a new model self.epoch = 0 now = datetime.datetime.now() # If we have a model path with date and epochs use them if model_path: # Continue from we left of. Get epoch and date from the file name # A sample model path might look like: regex = r".*/\w+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})/Keras-RFCN\_\w+(\d{4})\.h5" m = re.match(regex, model_path) if m: now = datetime.datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)), int(m.group(4)), int(m.group(5))) self.epoch = int(m.group(6)) + 1 # Directory for training logs self.log_dir = os.path.join(self.model_dir, "{}{:%Y%m%dT%H%M}".format( self.config.NAME.lower(), now)) # Path to save after each epoch. Include placeholders that get filled by Keras. self.checkpoint_path = os.path.join(self.log_dir, "Keras-RFCN_{}_*epoch*.h5".format( self.config.NAME.lower())) self.checkpoint_path = self.checkpoint_path.replace( "*epoch*", "{epoch:04d}") def train(self, train_dataset, val_dataset, learning_rate, epochs, layers): """Train the model. train_dataset, val_dataset: Training and validation Dataset objects. learning_rate: The learning rate to train with epochs: Number of training epochs. Note that previous training epochs are considered to be done alreay, so this actually determines the epochs to train in total rather than in this particaular call. layers: Allows selecting wich layers to train. It can be: - A regular expression to match layer names to train - One of these predefined values: heaads: The RPN, classifier and mask heads of the network all: All the layers 3+: Train Resnet stage 3 and up 4+: Train Resnet stage 4 and up 5+: Train Resnet stage 5 and up """ assert self.mode == "training", "Create model in training mode." # Pre-defined layer regular expressions layer_regex = { # all layers but the backbone "heads": r"(mrcnn\_.*)|(rpn\_.*)|(score_map\_.*)|(regr\_.*)|(classify\_.*)", # From a specific Resnet stage and up "3+": r"(res3.*)|(bn3.*)|(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(res6.*)|(bn6.*)|(mrcnn\_.*)|(rpn\_.*)|(score_map\_.*)|(regr\_.*)|(classify\_.*)", "4+": r"(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(res6.*)|(bn6.*)|(mrcnn\_.*)|(rpn\_.*)|(score_map\_.*)|(regr\_.*)|(classify\_.*)", "5+": r"(res5.*)|(bn5.*)|(res6.*)|(bn6.*)|(mrcnn\_.*)|(rpn\_.*)|(score_map\_.*)|(regr\_.*)|(classify\_.*)", # All layers "all": ".*", } if layers in layer_regex.keys(): layers = layer_regex[layers] # Data generators train_generator = data_generator(train_dataset, self.config, shuffle=True, batch_size=self.config.BATCH_SIZE) val_generator = data_generator(val_dataset, self.config, shuffle=True, batch_size=self.config.BATCH_SIZE, augment=False) # Callbacks callbacks = [ keras.callbacks.TensorBoard(log_dir=self.log_dir, histogram_freq=0, write_graph=True, write_images=False), keras.callbacks.ModelCheckpoint(self.checkpoint_path, verbose=0, save_weights_only=True, save_best_only=True), keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.01, patience=10, verbose=1, mode='auto', min_delta=0.001, min_lr=0) ] # Train print("\nStarting at epoch {}. LR={}\n".format(self.epoch, learning_rate)) print("Checkpoint Path: {}".format(self.checkpoint_path)) self.set_trainable(layers, verbose=0) self.compile(learning_rate, self.config.LEARNING_MOMENTUM) # Work-around for Windows: Keras fails on Windows when using # multiprocessing workers. See discussion here: # https://github.com/matterport/Mask_RCNN/issues/13#issuecomment-353124009 if os.name is 'nt': workers = 0 else: workers = max(self.config.BATCH_SIZE // 2, 2) self.keras_model.fit_generator( train_generator, initial_epoch=self.epoch, epochs=epochs, steps_per_epoch=self.config.STEPS_PER_EPOCH, callbacks=callbacks, validation_data=next(val_generator), validation_steps=self.config.VALIDATION_STEPS, max_queue_size=100, workers=4, use_multiprocessing=True, ) self.epoch = max(self.epoch, epochs) def detect(self, images, verbose=0): """Runs the detection pipeline. images: List of images, potentially of different sizes. Returns a list of dicts, one dict per image. The dict contains: rois: [N, (y1, x1, y2, x2)] detection bounding boxes class_ids: [N] int class IDs scores: [N] float probability scores for the class IDs """ assert self.mode == "inference", "Create model in inference mode." assert len( images) == self.config.BATCH_SIZE, "len(images) must be equal to BATCH_SIZE" if verbose: print("Processing {} images".format(len(images))) # Mold inputs to format expected by the neural network molded_images, image_metas, windows = self.mold_inputs(images) # Run object detection detections, mrcnn_class, mrcnn_bbox, \ rois, rpn_class, rpn_bbox =\ self.keras_model.predict([molded_images, image_metas], verbose=0) # Process detections results = [] for i, image in enumerate(images): final_rois, final_class_ids, final_scores =\ self.unmold_detections(detections[i], image.shape, windows[i]) results.append({ "rois": final_rois, "class_ids": final_class_ids, "scores": final_scores }) return results def mold_inputs(self, images): """Takes a list of images and modifies them to the format expected as an input to the neural network. images: List of image matricies [height,width,depth]. Images can have different sizes. Returns 3 Numpy matricies: molded_images: [N, h, w, 3]. Images resized and normalized. image_metas: [N, length of meta data]. Details about each image. windows: [N, (y1, x1, y2, x2)]. The portion of the image that has the original image (padding excluded). """ molded_images = [] image_metas = [] windows = [] for image in images: # Resize image to fit the model expected size # TODO: move resizing to mold_image() molded_image, window, scale, padding = KerasRFCN.Utils.resize_image( image, min_dim=self.config.IMAGE_MIN_DIM, max_dim=self.config.IMAGE_MAX_DIM, padding=self.config.IMAGE_PADDING) molded_image = KerasRFCN.Utils.mold_image(molded_image, self.config) # Build image_meta image_meta = KerasRFCN.Utils.compose_image_meta( 0, image.shape, window, np.zeros([self.config.NUM_CLASSES], dtype=np.int32)) # Append molded_images.append(molded_image) windows.append(window) image_metas.append(image_meta) # Pack into arrays molded_images = np.stack(molded_images) image_metas = np.stack(image_metas) windows = np.stack(windows) return molded_images, image_metas, windows def unmold_detections(self, detections, image_shape, window): """Reformats the detections of one image from the format of the neural network output to a format suitable for use in the rest of the application. detections: [N, (y1, x1, y2, x2, class_id, score)] image_shape: [height, width, depth] Original size of the image before resizing window: [y1, x1, y2, x2] Box in the image where the real image is excluding the padding. Returns: boxes: [N, (y1, x1, y2, x2)] Bounding boxes in pixels class_ids: [N] Integer class IDs for each bounding box scores: [N] Float probability scores of the class_id """ # How many detections do we have? # Detections array is padded with zeros. Find the first class_id == 0. zero_ix = np.where(detections[:, 4] == 0)[0] N = zero_ix[0] if zero_ix.shape[0] > 0 else detections.shape[0] # Extract boxes, class_ids, scores boxes = detections[:N, :4] class_ids = detections[:N, 4].astype(np.int32) scores = detections[:N, 5] # Compute scale and shift to translate coordinates to image domain. h_scale = image_shape[0] / (window[2] - window[0]) w_scale = image_shape[1] / (window[3] - window[1]) scale = min(h_scale, w_scale) shift = window[:2] # y, x scales = np.array([scale, scale, scale, scale]) shifts = np.array([shift[0], shift[1], shift[0], shift[1]]) # Translate bounding boxes to image domain boxes = np.multiply(boxes - shifts, scales).astype(np.int32) # Filter out detections with zero area. Often only happens in early # stages of training when the network weights are still a bit random. exclude_ix = np.where( (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) <= 0)[0] if exclude_ix.shape[0] > 0: boxes = np.delete(boxes, exclude_ix, axis=0) class_ids = np.delete(class_ids, exclude_ix, axis=0) scores = np.delete(scores, exclude_ix, axis=0) N = class_ids.shape[0] return boxes, class_ids, scores