# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import os import random import numpy as np import mxnet as mx from mxnet import nd def transform(data, target_wd, target_ht, is_train, box): """Crop and normnalize an image nd array.""" if box is not None: x, y, w, h = box data = data[y:min(y+h, data.shape[0]), x:min(x+w, data.shape[1])] # Resize to target_wd * target_ht. data = mx.image.imresize(data, target_wd, target_ht) # Normalize in the same way as the pre-trained model. data = data.astype(np.float32) / 255.0 data = (data - mx.nd.array([0.485, 0.456, 0.406])) / mx.nd.array([0.229, 0.224, 0.225]) if is_train: if random.random() < 0.5: data = nd.flip(data, axis=1) data, _ = mx.image.random_crop(data, (224, 224)) else: data, _ = mx.image.center_crop(data, (224, 224)) # Transpose from (target_wd, target_ht, 3) # to (3, target_wd, target_ht). data = nd.transpose(data, (2, 0, 1)) # If image is greyscale, repeat 3 times to get RGB image. if data.shape[0] == 1: data = nd.tile(data, (3, 1, 1)) return data.reshape((1,) + data.shape) class CUB200Iter(mx.io.DataIter): """Iterator for the CUB200-2011 dataset. Parameters ---------- data_path : str, The path to dataset directory. batch_k : int, Number of images per class in a batch. batch_size : int, Batch size. batch_size : tupple, Data shape. E.g. (3, 224, 224). is_train : bool, Training data or testig data. Training batches are randomly sampled. Testing batches are loaded sequentially until reaching the end. """ def __init__(self, data_path, batch_k, batch_size, data_shape, is_train): super(CUB200Iter, self).__init__(batch_size) self.data_shape = (batch_size,) + data_shape self.batch_size = batch_size self.provide_data = [('data', self.data_shape)] self.batch_k = batch_k self.is_train = is_train self.train_image_files = [[] for _ in range(100)] self.test_image_files = [] self.test_labels = [] self.boxes = {} self.test_count = 0 with open(os.path.join(data_path, 'images.txt'), 'r') as f_img, \ open(os.path.join(data_path, 'image_class_labels.txt'), 'r') as f_label, \ open(os.path.join(data_path, 'bounding_boxes.txt'), 'r') as f_box: for line_img, line_label, line_box in zip(f_img, f_label, f_box): fname = os.path.join(data_path, 'images', line_img.strip().split()[-1]) label = int(line_label.strip().split()[-1]) - 1 box = [int(float(v)) for v in line_box.split()[-4:]] self.boxes[fname] = box # Following "Deep Metric Learning via Lifted Structured Feature Embedding" paper, # we use the first 100 classes for training, and the remaining for testing. if label < 100: self.train_image_files[label].append(fname) else: self.test_labels.append(label) self.test_image_files.append(fname) self.n_test = len(self.test_image_files) def get_image(self, img, is_train): """Load and transform an image.""" img_arr = mx.image.imread(img) img_arr = transform(img_arr, 256, 256, is_train, self.boxes[img]) return img_arr def sample_train_batch(self): """Sample a training batch (data and label).""" batch = [] labels = [] num_groups = self.batch_size // self.batch_k # For CUB200, we use the first 100 classes for training. sampled_classes = np.random.choice(100, num_groups, replace=False) for i in range(num_groups): img_fnames = np.random.choice(self.train_image_files[sampled_classes[i]], self.batch_k, replace=False) batch += [self.get_image(img_fname, is_train=True) for img_fname in img_fnames] labels += [sampled_classes[i] for _ in range(self.batch_k)] return nd.concatenate(batch, axis=0), labels def get_test_batch(self): """Sample a testing batch (data and label).""" batch_size = self.batch_size batch = [self.get_image(self.test_image_files[(self.test_count*batch_size + i) % len(self.test_image_files)], is_train=False) for i in range(batch_size)] labels = [self.test_labels[(self.test_count*batch_size + i) % len(self.test_image_files)] for i in range(batch_size)] return nd.concatenate(batch, axis=0), labels def reset(self): """Reset an iterator.""" self.test_count = 0 def next(self): """Return a batch.""" if self.is_train: data, labels = self.sample_train_batch() else: if self.test_count * self.batch_size < len(self.test_image_files): data, labels = self.get_test_batch() self.test_count += 1 else: self.test_count = 0 raise StopIteration return mx.io.DataBatch(data=[data], label=[labels]) def cub200_iterator(data_path, batch_k, batch_size, data_shape): """Return training and testing iterator for the CUB200-2011 dataset.""" return (CUB200Iter(data_path, batch_k, batch_size, data_shape, is_train=True), CUB200Iter(data_path, batch_k, batch_size, data_shape, is_train=False))