import os import sys import time import numpy as np import cv2 import scipy.io from joblib import Parallel, delayed from PIL import Image from tqdm import tqdm def image_channel_means(image_filenames): ''' Calculate the means of RGB channels in image dataset. Support extremely large images of different sizes and arbitrarily large number of images. image_filenames: list of image filenames ''' num_pixels = 0 channel_sums = np.zeros(3, dtype=object) for image_filename in tqdm(image_filenames): image = cv2.imread(image_filename) channel_sums += np.sum(image, axis=(0, 1)) num_pixels += np.prod(image.shape[:2]) channel_means = (channel_sums / num_pixels).astype(float) return channel_means def save_load_means(means_filename, image_filenames, recalculate=False): ''' Calculate and save the means of RGB channels in image dataset if the mean file does not exist. Otherwise read the means directly from the mean file. means_filename: npz filename for image channel means image_filenames: list of image filenames recalculate: recalculate image channel means regardless the existence of mean file ''' if (not os.path.isfile(means_filename)) or recalculate: print('Calculating pixel means for each channel of images...') channel_means = image_channel_means(image_filenames=image_filenames) np.savez(means_filename, channel_means=channel_means) else: channel_means = np.load(means_filename)['channel_means'] return channel_means class RandomStateStack: def __init__(self): self.random_state = np.random.get_state() def __enter__(self): return def __exit__(self, type, value, traceback): np.random.set_state(self.random_state) class Dataset(object): def __init__(self, dataset_filename, images_dir, labels_dir, image_extension='.jpg', label_extension='.png'): self.dataset_filename = dataset_filename self.images_dir = images_dir self.labels_dir = labels_dir self.image_extension = image_extension self.label_extension = label_extension self.image_filenames, self.label_filenames = self.read_dataset() self.size = len(self.image_filenames) def read_dataset(self): image_filenames = [] label_filenames = [] with open(self.dataset_filename, 'r') as file: for line in file: filename = line.strip() image_filename = os.path.join(self.images_dir, filename + self.image_extension) label_filename = os.path.join(self.labels_dir, filename + self.label_extension) image_filenames.append(image_filename) label_filenames.append(label_filename) image_filenames = np.asarray(image_filenames) label_filenames = np.asarray(label_filenames) return image_filenames, label_filenames class Iterator(object): def __init__(self, dataset, minibatch_size, process_func, random_seed=None, scramble=True, num_jobs=2): self.dataset_size = dataset.size self.minibatch_size = minibatch_size if self.minibatch_size > self.dataset_size: print('Warning: dataset size should be no less than minibatch size.') print('Set minibatch size equal to dataset size.') self.minibatch_size = self.dataset_size self.image_filenames, self.label_filenames = self.read_dataset(dataset=dataset, scramble=scramble, random_seed=random_seed) self.current_index = 0 self.process_func = process_func self.num_jobs = num_jobs def read_dataset(self, dataset, scramble, random_seed): idx = np.arange(self.dataset_size) if scramble: if random_seed is not None: np.random.seed(random_seed) np.random.shuffle(idx) image_filenames = dataset.image_filenames[idx] label_filenames = dataset.label_filenames[idx] return image_filenames, label_filenames def reset_index(self): self.current_index = 0 def shuffle_dataset(self, random_seed=None): self.current_index = 0 idx = np.arange(self.dataset_size) if random_seed is not None: np.random.seed(random_seed) np.random.shuffle(idx) self.image_filenames = self.image_filenames[idx] self.label_filenames = self.label_filenames[idx] def next_raw_data(self): image_filename = self.image_filenames[self.current_index] label_filename = self.label_filenames[self.current_index] self.current_index += 1 if self.current_index >= self.dataset_size: self.current_index = 0 image = read_image(image_filename=image_filename) label = read_label(label_filename=label_filename) label = np.expand_dims(label, axis=2) return image, label def next_minibatch(self): image_filenames_minibatch = self.image_filenames[self.current_index: self.current_index + self.minibatch_size] label_filenames_minibatch = self.label_filenames[self.current_index: self.current_index + self.minibatch_size] self.current_index += self.minibatch_size if self.current_index >= self.dataset_size: self.current_index = 0 # Multithread image processing # Reference: https://www.kaggle.com/inoryy/fast-image-pre-process-in-parallel results = Parallel(n_jobs=self.num_jobs)(delayed(self.process_func)(image_filename, label_filename) for image_filename, label_filename in zip(image_filenames_minibatch, label_filenames_minibatch)) images, labels = zip(*results) images = np.asarray(images) labels = np.asarray(labels) return images, labels def read_image(image_filename): if (not os.path.isfile(image_filename)): print("Can't open file-",image_filename) sys.exit() image = cv2.imread(image_filename) return image def read_label(label_filename): if (not os.path.isfile(label_filename)): print("Can't open file-",label_filename) sys.exit() if label_filename.endswith('.mat'): # http://home.bharathh.info/pubs/codes/SBD/download.html mat = scipy.io.loadmat(label_filename) label = mat['GTcls']['Segmentation'][0][0] else: # Magic function to read VOC2012 semantic labelshttps://github.com/tensorflow/models/blob/master/research/deeplab/datasets/remove_gt_colormap.py#L42 label = np.asarray(Image.open(label_filename)) return label def subtract_channel_means(image, channel_means): return image - np.reshape(channel_means, (1, 1, 3)) def add_channel_means(image, channel_means): return image + np.reshape(channel_means, (1, 1, 3)) def flip_image_and_label(image, label): image_flipped = np.fliplr(image) label_flipped = np.fliplr(label) return image_flipped, label_flipped def resize_image_and_label(image, label, output_size): ''' output_size: [height, width] ''' image_resized = cv2.resize(image, (output_size[1], output_size[0]), interpolation=cv2.INTER_LINEAR) label_resized = cv2.resize(label, (output_size[1], output_size[0]), interpolation=cv2.INTER_NEAREST) return image_resized, label_resized def pad_image_and_label(image, label, top, bottom, left, right, pixel_value=0, label_value=255): ''' https://docs.opencv.org/3.0-beta/doc/py_tutorials/py_core/py_basic_ops/py_basic_ops.html#making-borders-for-images-padding ''' image_padded = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=pixel_value) label_padded = cv2.copyMakeBorder(label, top, bottom, left, right, cv2.BORDER_CONSTANT, value=label_value) return image_padded, label_padded def random_crop(image, label, output_size): assert image.shape[0] >= output_size[0] and image.shape[1] >= output_size[1], 'image size smaller than the desired output size.' height_start = np.random.randint(image.shape[0] - output_size[0] + 1) width_start = np.random.randint(image.shape[1] - output_size[1] + 1) height_end = height_start + output_size[0] width_end = width_start + output_size[1] image_cropped = image[height_start:height_end, width_start:width_end] label_cropped = label[height_start:height_end, width_start:width_end] return image_cropped, label_cropped def image_augmentaion(image, label, output_size, min_scale_factor=0.5, max_scale_factor=2.0): original_height = image.shape[0] original_width = image.shape[1] target_height = output_size[0] target_width = output_size[1] scale_factor = np.random.uniform(low=min_scale_factor, high=max_scale_factor) rescaled_size = [round(original_height * scale_factor), round(original_width * scale_factor)] image, label = resize_image_and_label(image=image, label=label, output_size=rescaled_size) # if rescaled_size[0] < target_height: # vertical_pad = round(target_height * 1.5) - rescaled_size[0] # else: # vertical_pad = round(rescaled_size[0] * 0.5) vertical_pad = round(target_height * 1.5) - rescaled_size[0] if vertical_pad < 0: vertical_pad = 0 vertical_pad_up = vertical_pad // 2 vertical_pad_down = vertical_pad - vertical_pad_up # if rescaled_size[1] < target_width: # horizonal_pad = round(target_width * 1.5) - rescaled_size[1] # else: # horizonal_pad = round(rescaled_size[1] * 0.5) horizonal_pad = round(target_width * 1.5) - rescaled_size[1] if horizonal_pad < 0: horizonal_pad = 0 horizonal_pad_left = horizonal_pad // 2 horizonal_pad_right = horizonal_pad - horizonal_pad_left image, label = pad_image_and_label(image=image, label=label, top=vertical_pad_up, bottom=vertical_pad_down, left=horizonal_pad_left, right=horizonal_pad_right, pixel_value=0, label_value=255) image, label = random_crop(image=image, label=label, output_size=output_size) # Flip image and label if np.random.random() < 0.5: image, label = flip_image_and_label(image=image, label=label) label = np.expand_dims(label, axis=2) return image, label class DataPreprocessor(object): def __init__(self, channel_means, output_size=[513, 513], min_scale_factor=0.5, max_scale_factor=2.0): self.channel_means = channel_means self.output_size = output_size self.min_scale_factor = min_scale_factor self.max_scale_factor = max_scale_factor def preprocess(self, image_filename, label_filename): # Read data from file image = read_image(image_filename=image_filename) label = read_label(label_filename=label_filename) # Image normalization image = subtract_channel_means(image=image, channel_means=self.channel_means) image, label = image_augmentaion(image=image, label=label, output_size=self.output_size, min_scale_factor=self.min_scale_factor, max_scale_factor=self.max_scale_factor) return image, label ''' The following image annotition saving codes in the block are slightly modified from Google's official DeepLab repository. https://github.com/tensorflow/models/blob/master/research/deeplab/utils/get_dataset_colormap.py ''' def bit_get(val, idx): ''' Gets the bit value. Args: val: Input value, int or numpy int array. idx: Which bit of the input val. Returns: The "idx"-th bit of input val. ''' return (val >> idx) & 1 def create_pascal_label_colormap(): ''' Creates a label colormap used in PASCAL VOC segmentation benchmark. Returns: A colormap for visualizing segmentation results. Reference: ''' colormap = np.zeros((256, 3), dtype=int) ind = np.arange(256, dtype=int) for shift in reversed(range(8)): for channel in range(3): colormap[:, channel] |= bit_get(ind, channel) << shift ind >>= 3 return colormap def static_vars(**kwargs): def decorate(func): for key, val in kwargs.items(): setattr(func, key, val) return func return decorate @static_vars(colormap=create_pascal_label_colormap()) def label_to_color_image(label): ''' Adds color defined by the dataset colormap to the label. Args: label: A 2D array with integer type, storing the segmentation label. dataset: The colormap used in the dataset. Returns: result: A 2D array with floating type. The element of the array is the color indexed by the corresponding element in the input label to the dataset color map. Raises: ValueError: If label is not of rank 2 or its value is larger than color map maximum entry. ''' if label.ndim != 2: raise ValueError('Expect 2-D input label') if np.max(label) > 255: raise ValueError('label value too large.') return label_to_color_image.colormap[label] def save_annotation(label, filename, add_colormap=True): ''' Saves the given label to image on disk. Args: label: The numpy array to be saved. The data will be converted to uint8 and saved as png image. save_dir: The directory to which the results will be saved. filename: The image filename. add_colormap: Add color map to the label or not. colormap_type: Colormap type for visualization. ''' # Add colormap for visualizing the prediction. colored_label = label_to_color_image(label) if add_colormap else label image = Image.fromarray(colored_label.astype(dtype=np.uint8)) image.save(filename) ''' Evaluation ''' def validation_demo(images, labels, predictions, demo_dir, batch_no): assert images.ndim == 4 and labels.ndim == 3 and predictions.ndim == 3 if not os.path.exists(demo_dir): os.makedirs(demo_dir) for i in range(len(images)): cv2.imwrite(os.path.join(demo_dir, 'image_{}_{}.jpg'.format(batch_no, i)), images[i]) save_annotation(label=labels[i], filename=os.path.join(demo_dir, 'image_{}_{}_label.png'.format(batch_no, i)), add_colormap=True) save_annotation(label=predictions[i], filename=os.path.join(demo_dir, 'image_{}_{}_prediction.png'.format(batch_no, i)), add_colormap=True) def validation_single_demo(image, label, prediction, demo_dir, val_no): if not os.path.exists(demo_dir): os.makedirs(demo_dir) cv2.imwrite(os.path.join(demo_dir, 'image_{}.jpg'.format(val_no)), image) save_annotation(label=label, filename=os.path.join(demo_dir, 'image_{}_label.png'.format(val_no)), add_colormap=True) save_annotation(label=prediction, filename=os.path.join(demo_dir, 'image_{}_prediction.png'.format(val_no)), add_colormap=True) def single_demo(image, prediction, demo_dir, val_no): if not os.path.exists(demo_dir): os.makedirs(demo_dir) cv2.imwrite(os.path.join(demo_dir, 'image_{}.jpg'.format(val_no)), image) save_annotation(label=prediction, filename=os.path.join(demo_dir, 'image_{}_prediction.png'.format(val_no)), add_colormap=True) def count_label_prediction_matches(labels, predictions, num_classes, ignore_label): ''' Pixel intersection-over-union averaged across number of classes. Assuming valid labels are from 0 to num_classes - 1. Support list shaped labels and predictions. ''' num_pixels_union = np.zeros(num_classes) num_pixels_intersection = np.zeros(num_classes) labels = np.asarray(labels) predictions = np.asarray(predictions) assert labels.shape == predictions.shape predictions[labels == ignore_label] = ignore_label for i in range(num_classes): label_class_mask = labels == i prediction_class_mask = predictions == i num_pixels_union[i] = np.sum(label_class_mask | prediction_class_mask) num_pixels_intersection[i] = np.sum(label_class_mask & prediction_class_mask) return num_pixels_union, num_pixels_intersection def mean_intersection_over_union(num_pixels_union, num_pixels_intersection): valid_classes = num_pixels_union > 0 mean_iou = np.mean(num_pixels_intersection[valid_classes] / num_pixels_union[valid_classes]) return mean_iou def multiscale_single_test(image, input_scales, predictor): ''' Predict image semantic segmentation labeling using multi-scale inputs. Inputs: images: numpy array, [height, width, channel], channel = 3. input_scales: list of scale factors. e.g., [0.5, 1.0, 1.5]. predictor: prediction function which takes one scaled image as input and outputs its semantic segmentation labelings. Returns: Averaged predicted logits of multi-scale inputs ''' image_height_raw = image.shape[0] image_width_raw = image.shape[1] multiscale_outputs = [] for input_scale in input_scales: image_height_scaled = round(image_height_raw * input_scale) image_width_scaled = round(image_width_raw * input_scale) image_scaled = cv2.resize(image, (image_width_scaled, image_height_scaled), interpolation=cv2.INTER_LINEAR) output = predictor(inputs=[image_scaled], target_height=image_height_raw, target_width=image_width_raw)[0] multiscale_outputs.append(output) output_mean = np.mean(multiscale_outputs, axis=0) return output_mean def multiscale_single_validate(image, label, input_scales, validator): image_height_raw = image.shape[0] image_width_raw = image.shape[1] multiscale_outputs = [] multiscale_losses = [] for input_scale in input_scales: image_height_scaled = round(image_height_raw * input_scale) image_width_scaled = round(image_width_raw * input_scale) image_scaled = cv2.resize(image, (image_width_scaled, image_height_scaled), interpolation=cv2.INTER_LINEAR) output, loss = validator(inputs=[image_scaled], target_height=image_height_raw, target_width=image_width_raw, labels=[label]) multiscale_outputs.append(output[0]) multiscale_losses.append(loss) output_mean = np.mean(multiscale_outputs, axis=0) loss_mean = np.mean(multiscale_losses) return output_mean, loss_mean ''' def learning_rate_policy(iteration, max_iteration, power = 0.9): return (1 - iteration / max_iteration) ** power ''' if __name__ == '__main__': np.random.seed(0) train_dataset = Dataset(dataset_filename='data/datasets/VOCdevkit/VOC2012/ImageSets/Segmentation/train.txt', images_dir='data/datasets/VOCdevkit/VOC2012/JPEGImages/', labels_dir='data/datasets/VOCdevkit/VOC2012/SegmentationClass/', image_extension='.jpg', label_extension='.png') print(train_dataset.image_filenames) print(train_dataset.size) channel_means = save_load_means(means_filename='channel_means.npz', image_filenames=train_dataset.image_filenames, recalculate=False) print(channel_means) voc2012_preprocessor = DataPreprocessor(channel_means=channel_means, output_size=[513, 513], max_scale_factor=1.5) # Single thread is faster :( train_iterator = Iterator(dataset=train_dataset, minibatch_size=16, process_func=voc2012_preprocessor.preprocess, random_seed=None, scramble=True, num_jobs=1) # Test iterator time_start = time.time() for i in range(10): print(i) images, labels = train_iterator.next_minibatch() # print(images.shape, labels.shape) time_end = time.time() time_elapsed = time_end - time_start print('Time Elapsed: %02d:%02d:%02d' % (time_elapsed // 3600, (time_elapsed % 3600 // 60), (time_elapsed % 60 // 1)))