Python load cifar100

23 Python code examples are found related to " load cifar100". You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: datagen.py    From Adversarial-Autoencoder with MIT License 6 votes vote down vote up
def load_cifar(args):
    path = 'data/cifar'
    kwargs = {'num_workers': 1, 'pin_memory': True, 'drop_last': True}
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
        ])  
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
        ])  
    trainset = torchvision.datasets.CIFAR10(root=path, train=True,
            download=True, transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size,
            shuffle=True, **kwargs)
    testset = torchvision.datasets.CIFAR10(root=path, train=False,
            download=True, transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=100,
            shuffle=True, **kwargs)
    return trainloader, testloader 
Example 2
Source File: dataset.py    From conceptors with GNU General Public License v3.0 6 votes vote down vote up
def load_CIFAR_100(filename,
                   file_type="train",
                   label_type="fine_labels"):
  """
  Load CIFAR 100 data set
  
  @param filename: file name of CIFAR 100
  @param file_type: "train" or "test"
  @param label_type: "fine_labels" or "coarse_labels"
  """
  datadict=pickle.load(open(filename, "rb"));
  
  X=datadict["data"];
  Y=datadict[label_type];
  
  if file_type=="train":
    X=X.reshape(50000, 3, 32, 32).transpose(0,2,3,1).astype("float");
  elif file_type=="test":
    X=X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float");
    
  Y=np.array(Y);
    
  return X, Y; 
Example 3
Source File: datahandler.py    From wae with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def load_cifar_batch(fpath, label_key='labels'):
    """Internal utility for parsing CIFAR data.

    # Arguments
        fpath: path the file to parse.
        label_key: key for label data in the retrieve
            dictionary.

    # Returns
        A tuple `(data, labels)`.
    """
    f = utils.o_gfile(fpath, 'rb')
    if sys.version_info < (3,):
        d = cPickle.load(f)
    else:
        d = cPickle.load(f, encoding='bytes')
        # decode utf8
        d_decoded = {}
        for k, v in d.items():
            d_decoded[k.decode('utf8')] = v
        d = d_decoded
    f.close()
    data = d['data']
    labels = d[label_key]

    data = data.reshape(data.shape[0], 3, 32, 32)
    return data, labels 
Example 4
Source File: datagen.py    From Adversarial-Autoencoder with MIT License 5 votes vote down vote up
def load_cifar_hidden(args, c_idx):
    path = 'data/cifar'
    kwargs = {'num_workers': 2, 'pin_memory': True, 'drop_last': True}
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
        ])  
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
        ])  
    def get_classes(target, labels):
        label_indices = []
        for i in range(len(target)):
            if target[i][1] in labels:
                label_indices.append(i)
        return label_indices

    trainset = torchvision.datasets.CIFAR10(root=path, train=True,
            download=True, transform=transform_train)
    train_hidden = torch.utils.data.Subset(trainset, get_classes(trainset, c_idx))
    trainloader = torch.utils.data.DataLoader(train_hidden, batch_size=args.batch_size,
            shuffle=True, **kwargs)

    testset = torchvision.datasets.CIFAR10(root=path, train=False,
            download=True, transform=transform_test)
    test_hidden = torch.utils.data.Subset(testset, get_classes(testset, c_idx))
    testloader = torch.utils.data.DataLoader(test_hidden, batch_size=100,
            shuffle=True, **kwargs)
    return trainloader, testloader 
Example 5
Source File: utils.py    From bruno with MIT License 5 votes vote down vote up
def load_cifar(data_dir, subset='train'):
    download_and_extract_cifar(data_dir)
    if subset == 'train':
        train_data = [unpickle(os.path.join(data_dir, 'cifar-10-batches-py', 'data_batch_' + str(i))) for i in
                      range(1, 6)]
        trainx = np.concatenate([d['x'] for d in train_data], axis=0)
        trainy = np.concatenate([d['y'] for d in train_data], axis=0)
        return trainx, trainy
    elif subset == 'test':
        test_data = unpickle(os.path.join(data_dir, 'cifar-10-batches-py', 'test_batch'))
        testx = test_data['x']
        testy = test_data['y']
        return testx, testy
    else:
        raise NotImplementedError('subset should be either train or test') 
Example 6
Source File: loader.py    From GoogLeNet-Inception with MIT License 5 votes vote down vote up
def load_cifar(cifar_path, batch_size=64, subtract_mean=True):
    """ function for create Dataflow objects for CIFAR-10

        Args:
            cifar_path (str): directory of CIFAR-10 data
            batch_size (int): number of images read from Dataflow for each batch
            substract_mean (bool): whether subtract each channel by average of training set

        Returns:
            CIFAR (object) of training and testing set.
            Batch images and label can be access by
            CIFAR.next_batch_dict()['image'] and 
            CIFAR.next_batch_dict()['label']
    """

    train_data = CIFAR(
        data_dir=cifar_path,
        shuffle=True,
        batch_dict_name=['image', 'label'],
        data_type='train',
        channel_mean=None,
        subtract_mean=subtract_mean,
        augment=True,
        # pf=preprocess,
        )
    train_data.setup(epoch_val=0, batch_size=batch_size)

    valid_data = CIFAR(
        data_dir=cifar_path,
        shuffle=False,
        batch_dict_name=['image', 'label'],
        data_type='valid',
        channel_mean=train_data.channel_mean,
        subtract_mean=subtract_mean,
        augment=False,
        # pf=pf_test,
        )
    valid_data.setup(epoch_val=0, batch_size=batch_size)

    return train_data, valid_data 
Example 7
Source File: dataset.py    From conceptors with GNU General Public License v3.0 5 votes vote down vote up
def load_CIFAR_batch(filename):
    """
    load single batch of cifar-10 dataset
    
    code is adapted from CS231n assignment kit
    
    @param filename: string of file name in cifar
    @return: X, Y: data and labels of images in the cifar batch
    """
    
    with open(filename, 'r') as f:
        datadict=pickle.load(f);
        
        X=datadict['data'];
        Y=datadict['labels'];
        
        X=X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float");
        Y=np.array(Y);
        
        return X, Y; 
Example 8
Source File: data_utils.py    From Machine-Learning-with-TensorFlow-1.x with MIT License 5 votes vote down vote up
def load_cifar_10_pickle(pickle_file, image_depth):
    fo = open(pickle_file, 'rb')
    dict = pickle.load(fo)
    fo.close()
    return ((dict['data'].astype(float) - image_depth / 2) / (image_depth)), dict['labels'] 
Example 9
Source File: data_utils.py    From Machine-Learning-with-TensorFlow-1.x with MIT License 5 votes vote down vote up
def load_cifar_10_from_pickles(train_pickle_files, test_pickle_files, pickle_batch_size, image_size, image_depth,
                               num_of_channels):
    all_train_data = np.ndarray(shape=(pickle_batch_size * len(train_pickle_files),
                                       image_size * image_size * num_of_channels),
                                dtype=np.float32)

    all_train_labels = np.ndarray(shape=pickle_batch_size * len(train_pickle_files), dtype=object)

    all_test_data = np.ndarray(shape=(pickle_batch_size * len(test_pickle_files),
                                      image_size * image_size * num_of_channels),
                               dtype=np.float32)
    all_test_labels = np.ndarray(shape=pickle_batch_size * len(test_pickle_files), dtype=object)

    print('Started loading training data')
    for index, train_pickle_file in enumerate(train_pickle_files):
        all_train_data[index * pickle_batch_size: (index + 1) * pickle_batch_size, :], \
        all_train_labels[index * pickle_batch_size: (index + 1) * pickle_batch_size] = \
            load_cifar_10_pickle(train_pickle_file, image_depth)
    print('Finished loading training data\n')

    print('Started loading testing data')
    for index, test_pickle_file in enumerate(test_pickle_files):
        all_test_data[index * pickle_batch_size: (index + 1) * pickle_batch_size, :], \
        all_test_labels[index * pickle_batch_size: (index + 1) * pickle_batch_size] = \
            load_cifar_10_pickle(test_pickle_file, image_depth)
    print('Finished loading testing data')

    return all_train_data, all_train_labels, all_test_data, all_test_labels 
Example 10
Source File: cifar.py    From Jacinle with MIT License 5 votes vote down vote up
def load_cifar(data_dir, nr_classes=10):
    assert nr_classes in (10, 100)

    data_file = 'cifar-{}-python.tar.gz'.format(nr_classes)
    origin = cifar_web_address + data_file
    dataset = osp.join(data_dir, data_file)
    if nr_classes == 10:
        folder_name = 'cifar-10-batches-py'
        filenames = ['data_batch_{}'.format(i) for i in range(1, 6)]
        filenames.append('test_batch')
    else:
        folder_name = 'cifar-100-python'
        filenames = ['train', 'test']

    if not osp.isdir(osp.join(data_dir, folder_name)):
        if not osp.isfile(dataset):
            download(origin, data_dir, data_file)
        tarfile.open(dataset, 'r:gz').extractall(data_dir)

    filenames = list(map(lambda x: osp.join(data_dir, folder_name, x), filenames))

    train_set = _read_cifar(filenames[:-1], nr_classes)
    test_set = _read_cifar([filenames[-1]], nr_classes)

    return train_set, test_set 
Example 11
Source File: data.py    From m-phate with GNU General Public License v3.0 5 votes vote down vote up
def load_cifar():
    (x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
    y_train = keras.utils.to_categorical(y_train, num_classes=10)
    y_test = keras.utils.to_categorical(y_test, num_classes=10)
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train = x_train.reshape(x_train.shape[0], -1) / 255
    x_test = x_test.reshape(x_test.shape[0], -1) / 255
    return x_train, x_test, y_train, y_test 
Example 12
Source File: main.py    From DiscriminativeActiveLearning with MIT License 5 votes vote down vote up
def load_cifar_10():
    """
    load and pre-process the CIFAR-10 data
    """

    dirname = ''  # TODO: your path here

    num_train_samples = 50000

    x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
    y_train = np.empty((num_train_samples,), dtype='uint8')

    for i in range(1, 6):
        fpath = os.path.join(dirname, 'data_batch_' + str(i))
        (x_train[(i - 1) * 10000: i * 10000, :, :, :],
         y_train[(i - 1) * 10000: i * 10000]) = load_batch(fpath)

    fpath = os.path.join(dirname, 'test_batch')
    x_test, y_test = load_batch(fpath)

    y_train = np.reshape(y_train, (len(y_train), 1))
    y_test = np.reshape(y_test, (len(y_test), 1))

    # standardise the dataset:
    x_train = x_train.astype('float32') / 255
    x_test = x_test.astype('float32') / 255

    if K.image_data_format() == 'channels_last':
        x_train = x_train.transpose(0, 2, 3, 1)
        x_test = x_test.transpose(0, 2, 3, 1)

    # shuffle the data:
    perm = np.random.permutation(x_train.shape[0])
    x_train = x_train[perm]
    y_train = y_train[perm]

    return (x_train, y_train), (x_test, y_test) 
Example 13
Source File: main.py    From DiscriminativeActiveLearning with MIT License 5 votes vote down vote up
def load_cifar_100(label_mode='fine'):
    """
    load and pre-process the CIFAR-100 data
    """

    dirname = ''  # TODO: your path here

    fpath = os.path.join(dirname, 'train')
    x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels')

    fpath = os.path.join(dirname, 'test')
    x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels')

    y_train = np.reshape(y_train, (len(y_train), 1))
    y_test = np.reshape(y_test, (len(y_test), 1))

    # standardise the dataset:
    x_train = np.array(x_train).astype('float32') / 255
    x_test = np.array(x_test).astype('float32') / 255

    if K.image_data_format() == 'channels_last':
        x_train = x_train.transpose(0, 2, 3, 1)
        x_test = x_test.transpose(0, 2, 3, 1)

    # shuffle the data:
    perm = np.random.permutation(x_train.shape[0])
    x_train = x_train[perm]
    y_train = y_train[perm]

    return (x_train, y_train), (x_test, y_test) 
Example 14
Source File: data_utils.py    From cs231n-practice with MIT License 5 votes vote down vote up
def load_CIFAR_batch(filename):
  """ load single batch of cifar """
  with open(filename, 'r') as f:
    datadict = pickle.load(f)
    X = datadict['data']
    Y = datadict['labels']
    X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
    Y = np.array(Y)
    return X, Y 
Example 15
Source File: datagen.py    From udl with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def load_cifar_batch(file):
    
    fo = open(file, 'rb')
    dict = cPickle.load(fo)
    fo.close()
    return dict 
Example 16
Source File: util.py    From proxprop with MIT License 5 votes vote down vote up
def load_CIFAR_batch(filename):
    """ load single batch of cifar """
    with open(filename, 'rb') as f:
        datadict = pickle.load(f, encoding='latin1')
        X = datadict['data']
        Y = datadict['labels']
        X = X.reshape(10000, 3, 32, 32).astype('float32')
        Y = np.array(Y)
    return X, Y 
Example 17
Source File: data_utils.py    From deligan with MIT License 5 votes vote down vote up
def load_CIFAR_batch(filename):
  """ load single batch of cifar """
  with open(filename, 'rb') as f:
    datadict = pickle.load(f)
    X = datadict['data']
    Y = datadict['labels']
    X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
    Y = np.array(Y)
    return X, Y 
Example 18
Source File: data_utils.py    From resnet-cifar10-caffe with MIT License 5 votes vote down vote up
def load_CIFAR_batch(filename, pad=True):
  """ load single batch of cifar """
  with open(filename, 'rb') as f:
    datadict = pickle.load(f)
    X = datadict['data']
    Y = datadict['labels']
    X = X.reshape(10000, 3, 32, 32).astype(np.uint8)
    padded = np.zeros((10000, 3, 40, 40), dtype=np.uint8)
    padded[:,:,:,:] = 128
    padded[:,:,4:-4, 4:-4] = X
    Y = np.array(Y, dtype=np.int64) 
    if not pad:
      return X, Y
    return padded, Y 
Example 19
Source File: cifar_loader.py    From flow-gan with MIT License 4 votes vote down vote up
def load_cifar(data_dir="data/cifar_data/"):
    if not os.path.exists(data_dir):
        print('creating folder', data_dir)
        os.makedirs(data_dir)
    maybe_download_and_extract(data_dir)
    train_data = [unpickle(os.path.join(data_dir,'cifar-10-batches-py','data_batch_' + str(i))) for i in range(1,6)]
    skip_first_500 = [0 for x in range(10)]
    trainx_list = []
    trainy_list = []
    valx_list = []
    valy_list = []
    for row in train_data:
        for dx, dy in zip(row['x'], row['y']):
        # print(d['y'])
            if skip_first_500[dy] < 500:
                valx_list.append(dx)
                valy_list.append(dy)
                skip_first_500[dy] += 1
                continue
            trainx_list.append(dx)
            trainy_list.append(dy)
    trainx = np.array(trainx_list)
    trainy = np.array(trainy_list)
    valx = np.array(valx_list)
    valy = np.array(valy_list)
    
    test_data = unpickle(os.path.join(data_dir,'cifar-10-batches-py','test_batch'))
    testx = test_data['x']
    testy = test_data['y']
    trainx = trainx/255.0
    valx = valx/255.0
    testx = testx/255.0
    print("max: " + str(np.amax(trainx)))
    print("min: " + str(np.amin(trainx)))
    print("max: " + str(np.amax(testx)))
    print("min: " + str(np.amin(testx)))
    print("max: " + str(np.amax(valx)))
    print("min: " + str(np.amin(valx)))
    # (N,3,32,32) -> (N,32,32,3)
    return np.transpose(trainx, (0,2,3,1)), \
    np.transpose(valx, (0,2,3,1)), \
    np.transpose(testx, (0,2,3,1)) 
Example 20
Source File: cifar10.py    From biva-pytorch with MIT License 4 votes vote down vote up
def load_cifar(root, levels=256, with_y=False):
    dataset = 'cifar-10-python.tar.gz'
    data_dir, data_file = os.path.split(dataset)
    if data_dir == "" and not os.path.isfile(dataset):
        # Check if dataset is in the data directory.
        new_path = os.path.join(root, dataset)
        if os.path.isfile(new_path) or data_file == 'cifar-10-python.tar.gz':
            dataset = new_path

    if (not os.path.isfile(dataset)) and data_file == 'cifar-10-python.tar.gz':
        origin = (
            'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
        )
        print("Downloading data from {}...".format(origin))
        urlretrieve(origin, dataset)

    f = tarfile.open(dataset, 'r:gz')
    b1 = pkl.load(f.extractfile("cifar-10-batches-py/data_batch_1"), encoding="bytes")
    b2 = pkl.load(f.extractfile("cifar-10-batches-py/data_batch_2"), encoding="bytes")
    b3 = pkl.load(f.extractfile("cifar-10-batches-py/data_batch_3"), encoding="bytes")
    b4 = pkl.load(f.extractfile("cifar-10-batches-py/data_batch_4"), encoding="bytes")
    b5 = pkl.load(f.extractfile("cifar-10-batches-py/data_batch_5"), encoding="bytes")
    test = pkl.load(f.extractfile("cifar-10-batches-py/test_batch"), encoding="bytes")
    train_x = np.concatenate([b1[b'data'], b2[b'data'], b3[b'data'], b4[b'data'], b5[b'data']], axis=0) / 255.
    train_x = np.asarray(train_x, dtype='float32')
    train_t = np.concatenate([np.array(b1[b'labels']),
                              np.array(b2[b'labels']),
                              np.array(b3[b'labels']),
                              np.array(b4[b'labels']),
                              np.array(b5[b'labels'])], axis=0)

    test_x = test[b'data'] / 255.
    test_x = np.asarray(test_x, dtype='float32')
    test_t = np.array(test[b'labels'])
    f.close()

    train_x = train_x.reshape((train_x.shape[0], 3, 32, 32))
    test_x = test_x.reshape((test_x.shape[0], 3, 32, 32))
    train_x = quantisize(train_x, levels) / (levels - 1.)
    test_x = quantisize(test_x, levels) / (levels - 1.)

    if with_y:
        return (train_x, train_t), (test_x, test_t)
    return train_x, test_x 
Example 21
Source File: image.py    From perceptron-benchmark with Apache License 2.0 4 votes vote down vote up
def load_cifar_image(shape=(32, 32), dtype=np.float32,
            bounds=(0, 1), data_format='channels_last',
            fname='cifar0.png', normalize=True):
    """Return the sample mnist image for testing

    Parameters
    ----------
    shape : list of integers
        The shape of the returned image.
    dype : np.type
        The type for loading the image
    bounds : float tuple
        the range of loaded image before normalization
    data_format : str
        "channels_first" or "channels_last"
    fname : str
        The name of sample image
    normalize : Bool
        Whether the image is needed to be normalized.
    """
    from PIL import Image

    path = os.path.join(os.path.dirname(__file__), 'images/%s' % fname)
    image = Image.open(path)
    image = np.asarray(image, dtype=dtype)
    if(data_format == 'channels_first'):
        image = image.reshape([3]+list(shape))
    else:
        image = image.reshape(list(shape)+[3])

    if bounds != (0, 255):
        image /= 255.

    if(normalize):
        mean = np.array([0.485, 0.456, 0.406]).reshape(3,1,1)
        std = np.array([0.225, 0.225, 0.225]).reshape(3,1,1)
        image = image - mean
        image = image / std

    image = np.asarray(image, dtype=dtype)

    return image 
Example 22
Source File: utils.py    From bachbot with MIT License 4 votes vote down vote up
def load_cifar(flatten=True, labels=False):
    '''Load the CIFAR10 image dataset.'''
    def extract(name):
        logging.info('extracting data from %s', name)
        h = tar.extractfile(name)
        if sys.version_info < (3, ):
            d = pickle.load(h)
        else:
            d = pickle.load(h, encoding='bytes')
            for k in list(d):
                d[k.decode('utf8')] = d[k]
        h.close()
        img = d['data'].reshape(
            (-1, 3, 32, 32)).transpose((0, 2, 3, 1)).astype('f') / 128 - 1
        if flatten:
            img = img.reshape((-1, 32 * 32 * 3))
        d['data'] = img
        return d

    fn = find('cifar10.tar.gz', 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz')
    tar = tarfile.open(fn)

    imgs = []
    labs = []
    for i in range(1, 6):
        d = extract('cifar-10-batches-py/data_batch_{}'.format(i))
        imgs.extend(d['data'])
        labs.extend(d['labels'])
    timg = np.asarray(imgs[:40000])
    tlab = np.asarray(labs[:40000], 'i')
    vimg = np.asarray(imgs[40000:])
    vlab = np.asarray(labs[40000:], 'i')

    d = extract('cifar-10-batches-py/test_batch')
    simg = d['data']
    slab = d['labels']

    tar.close()

    if labels:
        return (timg, tlab), (vimg, vlab), (simg, slab)
    return (timg, ), (vimg, ), (simg, )