python source code of pascalvoc

import numpy as np
import pandas as pd
import xmltodict
from skimage import io, transform, color, draw
from collections import defaultdict


class PascalVOC(object):
    """
    Pascal VOC dataset utility.

    Arguments
    ---------
        voc_dir: string
            Indicating path of the Pascal VOC devkit.
    """

    img_idx = 0
    lbl_idx = 1
    labels = [
        'person', 'bird', 'cat', 'cow', 'dog', 'horse', 'sheep',
        'aeroplane', 'bicycle', 'boat', 'bus', 'car', 'motorbike', 'train',
        'bottle', 'chair', 'diningtable', 'pottedplant', 'sofa', 'tvmonitor'
    ]
    label2idx = {lbl: idx for idx, lbl in enumerate(labels)}
    idx2label = {idx: lbl for idx, lbl in enumerate(labels)}
    img_size = (224, 224)

    def __init__(self, voc_dir):
        self.voc_dir = voc_dir.rstrip('/')
        self.imageset_dir = './data/dataset/'
        self.img_dir = voc_dir + '/JPEGImages/'
        self.bbox_dir = voc_dir + '/Annotations/'
        self.segmentation_dir = voc_dir + '/SegmentationObject/'
        self.feature_dir = './data/features/'
        self.label_dir = './data/labels/'
        self.feature_prefix = 'vgg_features_'
        self.label_prefix = 'labels_'
        self.trainset_name = 'segmentation_train.txt'
        self.testset_name = 'segmentation_test.txt'
        self.trainset, self.testset = self._load()
        self.mb_idx = 0

    def next_image_minibatch(self, size, random=True, reset=False):
        X = self.trainset

        if random:
            mb = X.sample(size)
        else:
            if reset:
                self.mb_idx = 0

            mb = X[self.mb_idx:self.mb_idx+size]
            self.mb_idx += size

            if self.mb_idx >= X.size:
                self.mb_idx = 0

        return self.load_images(mb), self.load_annotations(mb)

    def get_test_data(self, size, random=True):
        if random:
            imgs = self.testset.sample(size)
        else:
            imgs = self.testset.head(size)

        X_img = self.load_images(imgs)
        X, y = self.load_features_testset()

        idxes = imgs.index.tolist()
        X, y = X[idxes], y[idxes]

        y_seg = self.load_segmentation_label()

        return X_img, X, y, y_seg

    def get_data_by_name(self, name):
        imgs = self.test_set[self.test_set[0].isin(name)]

        X_img = self.load_images(imgs)
        X, y = self.load_features_testset()

        idxes = imgs.index.tolist()
        X, y = X[idxes], y[idxes]

        return X_img, X, y

    def load_images(self, img_names):
        X = [transform.resize(io.imread(self._img_path(img)), self.img_size)
             for img
             in img_names[self.img_idx]]

        return np.array(X)

    def load_annotations(self, img_names):
        y = [np.column_stack(self.get_class_bbox(img))
             for img
             in img_names[self.img_idx]]

        return np.array(y)

    def load_segmentation_label(self):
        return np.load(self.label_dir + 'labels_segmentation.npy')

    def load_segmentation_label_from_imgs(self, img_names):
        def preprocess(img_name):
            img = io.imread(self.segmentation_dir + '/' + img_name + '.png')
            img = transform.resize(img, self.img_size)
            img = color.rgb2grey(img)
            img = (img != 0)
            return img

        y = [preprocess(img) for img in img_names[self.img_idx]]

        return np.array(y)

    def draw_bbox(self, img, bbox, color=[1, 0, 0], line_width=3):
        xmin, ymin, xmax, ymax = bbox
        h, w = img.shape[:2]

        xmin = int(round(xmin * w))
        xmax = int(round(xmax * w))
        ymin = int(round(ymin * h))
        ymax = int(round(ymax * h))

        img_bbox = np.copy(img)

        img_bbox[ymin-line_width:ymin, xmin-line_width:xmax+line_width] = color
        img_bbox[ymax:ymax+line_width, xmin-line_width:xmax+line_width] = color
        img_bbox[ymin-line_width:ymax+line_width, xmin-line_width:xmin] = color
        img_bbox[ymin-line_width:ymax+line_width, xmax:xmax+line_width] = color

        return img_bbox

    def get_class_bbox(self, img_name):
        with open(self._label_path(img_name), 'r') as f:
            xml = xmltodict.parse(f.read())

        img_size = xml['annotation']['size']
        img_w, img_h = float(img_size['width']), float(img_size['height'])

        objs = xml['annotation']['object']

        if type(objs) is not list:
            objs = [objs]

        clses = np.zeros_like(self.labels, dtype=np.float)
        bboxes = np.zeros(shape=[len(self.labels), 4], dtype=np.float)
        bbox_cls = defaultdict(list)

        for obj in objs:
            idx = self.label2idx[obj['name']]
            clses[idx] = 1

            bndbox = obj['bndbox']
            bbox = (bndbox['xmin'], bndbox['ymin'], bndbox['xmax'], bndbox['ymax'])
            bbox = self._normalize_bbox(bbox, (img_w, img_h))
            bbox = np.array(bbox, dtype=np.float)
            bbox_cls[idx].append(bbox)

        for k, v in bbox_cls.items():
            sample_idx = np.random.randint(0, len(v))
            bboxes[k] = v[sample_idx]

        return clses, bboxes

    def load_features_trainset(self):
        return self._load_features(self.trainset_name)

    def load_features_testset(self):
        return self._load_features(self.testset_name)

    def segmentation_accuracy(self, y_pred, y_true):
        return np.mean(y_pred == y_true)

    def segmentation_precision(self, y_pred, y_true):
        tp = np.sum(y_true & y_pred)
        fp = np.sum(~y_true & y_pred)
        return tp / (tp + fp + 1e-8)

    def segmentation_recall(self, y_pred, y_true):
        tp = np.sum(y_true & y_pred)
        fn = np.sum(y_true & ~y_pred)
        return tp / (tp + fn + 1e-8)

    def segmentation_prec_rec_f1(self, y_pred, y_true):
        p = self.segmentation_precision(y_pred, y_true)
        r = self.segmentation_recall(y_pred, y_true)
        f1 = 2 * p * r / (p + r + 1e-8)
        return p, r, f1

    def _load_features(self, dataset_name):
        dataset_name = dataset_name.split('.')[0]
        X = np.load(self.feature_dir + self.feature_prefix + dataset_name + '.npy')
        y = np.load(self.label_dir + self.label_prefix + dataset_name + '.npy')
        return X, y

    def _load(self):
        train = self._read_dataset(self.imageset_dir + self.trainset_name)
        test = self._read_dataset(self.imageset_dir + self.testset_name)
        return train, test

    def _read_dataset(self, filename):
        return pd.read_csv(filename, header=None, delim_whitespace=True)

    def _img_path(self, img):
        return '{}/{}.jpg'.format(self.img_dir, img)

    def _label_path(self, img):
        return '{}/{}.xml'.format(self.bbox_dir, img)

    def _normalize_bbox(self, bbox, img_dim):
        w, h = img_dim
        xmin, ymin, xmax, ymax = bbox

        def normalize(x, s):
            return float(x) / s

        xmin, ymin = normalize(xmin, w), normalize(ymin, h)
        xmax, ymax = normalize(xmax, w), normalize(ymax, h)

        return [xmin, ymin, xmax, ymax]