import os import sys from glob import glob import numpy as np import pandas as pd import cv2 import matplotlib.pyplot as plt from tqdm import tqdm from image_windows import split_image_to_windows, stitch_single_image, stitch_all_images from skimage.io import imshow from sklearn.cluster import KMeans from skimage.segmentation import random_walker from scipy.ndimage.morphology import binary_fill_holes """ Examples: How to K-means train images to 2 clusters based on colors img_df = create_color_features(img_df) img_df, cluster_maker = create_color_clusters(img_df, 2) How to perform the same K-means on test images test_img_df = create_color_features(test_img_df) test_img_df, _ = create_color_clusters(test_img_df, 2, cluster_maker) How to process train/test images, for now process_images(img_df) process_images(test_img_df) How to cluster train/test images to gray/color data frames cluster_train_df_list = split_cluster_to_group(img_df, 2) cluster_test_df_list = split_cluster_to_group(test_img_df, 2) For test images, you should get 53 gray images and 12 color images print(len(cluster_test_df_list[0])) print(len(cluster_test_df_list[1])) """ def rgb_clahe(in_rgb_img): bgr = in_rgb_img[:,:,[2,1,0]] # flip r and b lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB) clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) lab[:,:,0] = clahe.apply(lab[:,:,0]) bgr = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR) return bgr[:,:,[2,1,0]] def rgb_clahe_justl(in_rgb_img): bgr = in_rgb_img[:,:,[2,1,0]] # flip r and b lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB) clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) img = clahe.apply(lab[:,:,0]) return img def invert_image(image): if (np.average(image) > 127): return cv2.bitwise_not(image) return image def mark_contours(mask): """Pass in a 2-dimensional grayscale mask, mark contours of the mask, and return the 2-dimensional grayscale contoured mask """ padded = 2 mask = mask.astype(np.uint8)*255 background = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8) padded_background = np.pad(background.copy(), ((padded,padded), (padded,padded)), 'edge') background_rgb = cv2.cvtColor(padded_background, cv2.COLOR_GRAY2RGB) padded_mask = np.pad(mask.copy(), ((padded,padded), (padded,padded), (0,0)), 'edge') _,thresh = cv2.threshold(padded_mask,127,255,cv2.THRESH_BINARY) _, contours, _ = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) contoured_rgb = cv2.drawContours(background_rgb,contours,-1,(255,255,255),1) contoured_gray = cv2.cvtColor(contoured_rgb, cv2.COLOR_RGB2GRAY) # workaround due to OpenCV issue, the contour starts from the 1st pixel contoured_mask = contoured_gray[padded:-padded,padded:-padded] return contoured_mask def mark_mask_on_image(mask, image, color=(255,0,0)): """ Mark red contours of the given masks on the given image """ mask = mask.astype(np.uint8)*255 image_color = image.copy() if image.ndim is 2: image_color = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) _,thresh = cv2.threshold(mask,127,255,cv2.THRESH_BINARY) _, contours, _ = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) return cv2.drawContours(image_color,contours,-1,color,1) def process_images(img_df): """It adds a column to the given data frame for storing processed images It amplifies regional attributes""" img_df['image_process'] = img_df['image'].map(lambda x: invert_image(rgb_clahe_justl(x))[:,:, np.newaxis]) return img_df def preprocess_image(image): """Preprocesses input image to a consistent input format for the model to learn from. Preprocessing converts the 3 source image RGB channels down to a single gray-scale channel, as well as inverts the background if needed. This ensures that cell the background intensity is more consistent, allowing the neural network to learn distinguishing features better. Args: image (numpy 3d matrix): A numpy array containing a single image. Returns: The input image containing only a single channel and possibly with the background intensity inverted. """ image = rgb_clahe_justl(image) image = invert_image(image) image = image[:,:, np.newaxis] return image def renumber_labels(label_img): """ Re-number nuclei in a labeled image so the nuclei numbers are unique and consecutive. """ new_label = 0 for old_label in np.unique(label_img): if not old_label == new_label: label_img[label_img == old_label] = new_label new_label += 1 return label_img def post_process_image(image, mask, contour): """ Watershed on the markers generated on the sure foreground to find all disconnected objects The (mask - contour) is the true foreground. We set the contour to be unknown area. Index of contour = -1 Index of unkown area = 0 Index of background = 1 -> set back to 0 after watershed Index of found objects > 1 """ kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5)) new_contour = (contour*255).astype(np.uint8) new_mask = (mask*255).astype(np.uint8) new_mask = cv2.morphologyEx(new_mask, cv2.MORPH_OPEN, kernel, iterations=1) _, thresh_mask = cv2.threshold(new_mask,0,255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) _, thresh_contour = cv2.threshold(new_contour,0,255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) sure_background = cv2.dilate(thresh_mask,kernel,iterations=3) sure_foreground = cv2.subtract(thresh_mask, thresh_contour) mask_plus_contour = cv2.add(thresh_mask, thresh_contour) mask_plus_contour = cv2.cvtColor(mask_plus_contour, cv2.COLOR_GRAY2RGB) unknown = cv2.subtract(sure_background, sure_foreground) # Marker labelling output = cv2.connectedComponentsWithStats(sure_foreground) labels = output[1] stats = output[2] # Add one to all labels so that sure background is not 0, 0 is considered unknown by watershed # this way, watershed can distinguish unknown from the background labels = labels + 1 labels[unknown==255] = 0 try: # random walker on thresh_mask leads a lot higher mean IoU but lower LB #labels = random_walker(thresh_mask, labels) # random walker on thresh_mask leads lower mean IoU but higher LB labels = random_walker(mask_plus_contour, labels, multichannel=True) except: labels = cv2.watershed(mask_plus_contour, labels) labels[labels==-1] = 0 labels[labels==1] = 0 labels = labels -1 labels[labels==-1] = 0 # discard nuclei which are too big or too small mean = np.mean(stats[1:,cv2.CC_STAT_AREA]) for i in range(1, labels.max()): if stats[i, cv2.CC_STAT_AREA] > mean*10 or stats[i, cv2.CC_STAT_AREA] < mean/10: labels[labels==i] = 0 labels = renumber_labels(labels) return labels def create_color_features(img_df): img_df['Red'] = img_df['image'].map(lambda x: np.mean(x[:,:,0])) img_df['Green'] = img_df['image'].map(lambda x: np.mean(x[:,:,1])) img_df['Blue'] = img_df['image'].map(lambda x: np.mean(x[:,:,2])) img_df['Gray'] = img_df['image'].map(lambda x: np.mean(x[:,:,0:2])) img_df['Red-Green'] = img_df['image'].map(lambda x: np.mean(x[:,:,0]-x[:,:,1])) img_df['Red-Green-Sd'] = img_df['image'].map(lambda x: np.std(x[:,:,0]-x[:,:,1])) return img_df def create_color_clusters(img_df, cluster_count = 2, cluster_maker=None, colors=['Green', 'Red-Green', 'Red-Green-Sd']): """ Cluster images based on color features. cluster_count: K of K-means cluster_maker: previous k-means model colors: categories for clustering images, by default it splits images to color and grayscale clusters """ if cluster_maker is None: cluster_maker = KMeans(cluster_count, random_state=42) cluster_maker.fit(img_df[colors]) img_df['cluster-id'] = np.argmin(cluster_maker.transform(img_df[colors]),-1) return img_df, cluster_maker def split_cluster_to_group(img_df, cluster_count, column='cluster-id'): """ Pass a data frame and return a list of clustered data frames For example, it returns a grayscale img_df and a color img_df """ cluster_df_list = [] grouper = img_df.groupby([column]) for _, cluster_df in grouper: cluster_df_list.append(cluster_df) assert(len(cluster_df_list) == cluster_count) return cluster_df_list