Python numpy.load() Examples

The following are 30 code examples of numpy.load(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module numpy , or try the search function .
Example #1
Source File: cache.py    From vergeml with MIT License 10 votes vote down vote up
def _deserialize(self, data, type_):

        if self.compress:
        # decompress the data if needed
            data = lz4.frame.decompress(data)

        if type_ == _NUMPY:
        # deserialize numpy arrays
            buf = io.BytesIO(data)
            data = np.load(buf)

        elif type_ == _PICKLE:
        # deserialize other python objects
            data = pickle.loads(data)

        else:
        # Otherwise we just return data as it is (bytes)
            pass

        return data 
Example #2
Source File: data_loader.py    From medicaldetectiontoolkit with Apache License 2.0 6 votes vote down vote up
def generate_train_batch(self):

        batch_data, batch_segs, batch_pids, batch_targets = [], [], [], []
        class_targets_list =  [v['class_target'] for (k, v) in self._data.items()]

        #samples patients towards equilibrium of foreground classes on a roi-level (after randomly sampling the ratio "batch_sample_slack).
        batch_ixs = dutils.get_class_balanced_patients(
            class_targets_list, self.batch_size, self.cf.head_classes - 1, slack_factor=self.cf.batch_sample_slack)
        patients = list(self._data.items())

        for b in batch_ixs:

            patient = patients[b][1]
            all_data = np.load(patient['data'], mmap_mode='r')
            data = all_data[0]
            seg = all_data[1].astype('uint8')
            batch_pids.append(patient['pid'])
            batch_targets.append(patient['class_target'])
            batch_data.append(data[np.newaxis])
            batch_segs.append(seg[np.newaxis])

        data = np.array(batch_data)
        seg = np.array(batch_segs).astype(np.uint8)
        class_target = np.array(batch_targets)
        return {'data': data, 'seg': seg, 'pid': batch_pids, 'class_target': class_target} 
Example #3
Source File: BasePythonDataLayer.py    From Caffe-Python-Data-Layer with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def set_mean(self):
        if self._mean_file:
            if type(self._mean_file) is str:
                # read image mean from file
                try:
                    # if it is a pickle file
                    self._mean = np.load(self._mean_file)
                except (IOError):
                    blob = caffe_pb2.BlobProto()
                    blob_str = open(self._mean_file, 'rb').read()
                    blob.ParseFromString(blob_str)
                    self._mean = np.array(caffe.io.blobproto_to_array(blob))[0]
            else:
                self._mean = self._mean_file
                self._mean = np.array(self._mean)
        else:
            self._mean = None 
Example #4
Source File: encoding_images.py    From face-attendance-machine with Apache License 2.0 6 votes vote down vote up
def load_encodings():
    """
    加载保存的历史人脸向量,以及name向量,并返回
    :return:
    """
    known_face_encodings = np.load(KNOWN_FACE_ENCODINGS)
    known_face_names = np.load(KNOWN_FACE_NANE)
    if not os.path.exists(KNOWN_FACE_NANE) or not os.path.exists(KNOWN_FACE_ENCODINGS):
        encoding_images(data_path)
    aa = [file for file in os.listdir(data_path) if os.path.isfile(os.path.join(data_path, file)) and file.endswith("npy")]
    # ("known_face_encodings_") or file.startswith("known_face_name_"))
    for data in aa:
        if data.startswith('known_face_encodings_'):
            tmp_face_encodings = np.load(os.path.join(data_path,data))
            known_face_encodings = np.concatenate((known_face_encodings, tmp_face_encodings), axis=0)
            print("load ", data)
        elif data.startswith('known_face_name_'):
            tmp_face_name = np.load(os.path.join(data_path, data))
            known_face_names = np.concatenate((known_face_names, tmp_face_name), axis=0)
            print("load ", data)
        else:
            print('skip to load original ', data)
    return known_face_encodings,known_face_names 
Example #5
Source File: BasePythonDataLayer.py    From Caffe-Python-Data-Layer with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def setup(self, bottom, top):
        layer_params = yaml.load(self.param_str)
        self._layer_params = layer_params
        # default batch_size = 256
        self._batch_size = int(layer_params.get('batch_size', 256))
        self._resize = layer_params.get('resize', -1)
        self._mean_file = layer_params.get('mean_file', None)
        self._source_type = layer_params.get('source_type', 'CSV')
        self._shuffle = layer_params.get('shuffle', False)
        # read image_mean from file and preload all data into memory
        # will read either file or array into self._mean
        self.set_mean()
        self.preload_db()
        self._compressed = self._layer_params.get('compressed', True)
        if not self._compressed:
            self.decompress_data() 
Example #6
Source File: dataset_tool.py    From disentangling_conditional_gans with MIT License 6 votes vote down vote up
def create_cifar100(tfrecord_dir, cifar100_dir):
    print('Loading CIFAR-100 from "%s"' % cifar100_dir)
    import pickle
    with open(os.path.join(cifar100_dir, 'train'), 'rb') as file:
        data = pickle.load(file, encoding='latin1')
    images = data['data'].reshape(-1, 3, 32, 32)
    labels = np.array(data['fine_labels'])
    assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8
    assert labels.shape == (50000,) and labels.dtype == np.int32
    assert np.min(images) == 0 and np.max(images) == 255
    assert np.min(labels) == 0 and np.max(labels) == 99
    onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32)
    onehot[np.arange(labels.size), labels] = 1.0

    with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr:
        order = tfr.choose_shuffled_order()
        for idx in range(order.size):
            tfr.add_image(images[order[idx]])
        tfr.add_labels(onehot[order])

#---------------------------------------------------------------------------- 
Example #7
Source File: json_serializers.py    From dustmaps with GNU General Public License v2.0 6 votes vote down vote up
def deserialize_ndarray_npy(d):
    """
    Deserializes a JSONified :obj:`numpy.ndarray` that was created using numpy's
    :obj:`save` function.

    Args:
        d (:obj:`dict`): A dictionary representation of an :obj:`ndarray` object, created
            using :obj:`numpy.save`.

    Returns:
        An :obj:`ndarray` object.
    """
    with io.BytesIO() as f:
        f.write(json.loads(d['npy']).encode('latin-1'))
        f.seek(0)
        return np.load(f) 
Example #8
Source File: data.py    From VSE-C with MIT License 6 votes vote down vote up
def __init__(self, data_path, data_split, vocab, cap_suffix='caps'):
        self.vocab = vocab
        loc = data_path + '/'

        # Captions
        self.captions = []
        with open(loc+'%s_%s.txt' % (data_split, cap_suffix), 'rb') as f:
            for line in f:
                tmp = line.strip()
                if type(tmp) == bytes:
                    tmp = bytes.decode(tmp)
                self.captions.append(tmp)

        # Image features
        self.images = np.load(loc+'%s_ims.npy' % data_split)
        self.length = len(self.captions)
        # rkiros data has redundancy in images, we divide by 5, 10crop doesn't
        if self.images.shape[0] != self.length:
            self.im_div = 5
        else:
            self.im_div = 1
        # the development set for coco is large and so validation would be slow
        if data_split == 'dev':
            self.length = 5000 
Example #9
Source File: data_loader.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def load_mnist(training_num=50000):
    data_path = os.path.join(os.path.dirname(os.path.realpath('__file__')), 'mnist.npz')
    if not os.path.isfile(data_path):
        from six.moves import urllib
        origin = (
            'https://github.com/sxjscience/mxnet/raw/master/example/bayesian-methods/mnist.npz'
        )
        print('Downloading data from %s to %s' % (origin, data_path))
        ctx = ssl._create_unverified_context()
        with urllib.request.urlopen(origin, context=ctx) as u, open(data_path, 'wb') as f:
            f.write(u.read())
        print('Done!')
    dat = numpy.load(data_path)
    X = (dat['X'][:training_num] / 126.0).astype('float32')
    Y = dat['Y'][:training_num]
    X_test = (dat['X_test'] / 126.0).astype('float32')
    Y_test = dat['Y_test']
    Y = Y.reshape((Y.shape[0],))
    Y_test = Y_test.reshape((Y_test.shape[0],))
    return X, Y, X_test, Y_test 
Example #10
Source File: utils.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def load_params(dir_path="", epoch=None, name=""):
    prefix = os.path.join(dir_path, name)
    _, param_loading_path, _ = get_saving_path(prefix, epoch)
    while not os.path.isfile(param_loading_path):
        logging.info("in load_param, %s Not Found!" % param_loading_path)
        time.sleep(60)
    save_dict = nd.load(param_loading_path)
    arg_params = {}
    aux_params = {}
    for k, v in save_dict.items():
        tp, name = k.split(':', 1)
        if tp == 'arg':
            arg_params[name] = v
        if tp == 'aux':
            aux_params[name] = v
    return arg_params, aux_params, param_loading_path 
Example #11
Source File: test_forward.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def test_consistency(dump=False):
    shape = (299, 299)
    _get_model()
    _get_data(shape)
    if dump:
        _dump_images(shape)
        gt = None
    else:
        gt = {n: mx.nd.array(a) for n, a in np.load('data/inception-v3-dump.npz').items()}
    data = np.load('data/test_images_%d_%d.npy'%shape)
    sym, arg_params, aux_params = mx.model.load_checkpoint('model/Inception-7', 1)
    arg_params['data'] = data
    arg_params['softmax_label'] = np.random.randint(low=1, high=1000, size=(data.shape[0],))
    ctx_list = [{'ctx': mx.gpu(0), 'data': data.shape, 'type_dict': {'data': data.dtype}},
                {'ctx': mx.cpu(0), 'data': data.shape, 'type_dict': {'data': data.dtype}}]
    gt = check_consistency(sym, ctx_list, arg_params=arg_params, aux_params=aux_params,
                           tol=1e-3, grad_req='null', raise_on_err=False, ground_truth=gt)
    if dump:
        np.savez('data/inception-v3-dump.npz', **{n: a.asnumpy() for n, a in gt.items()}) 
Example #12
Source File: input.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def extract_mnist_data(filename, num_images, image_size, pixel_depth):
  """
  Extract the images into a 4D tensor [image index, y, x, channels].

  Values are rescaled from [0, 255] down to [-0.5, 0.5].
  """
  # if not os.path.exists(file):
  if not tf.gfile.Exists(filename+".npy"):
    with gzip.open(filename) as bytestream:
      bytestream.read(16)
      buf = bytestream.read(image_size * image_size * num_images)
      data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
      data = (data - (pixel_depth / 2.0)) / pixel_depth
      data = data.reshape(num_images, image_size, image_size, 1)
      np.save(filename, data)
      return data
  else:
    with tf.gfile.Open(filename+".npy", mode='r') as file_obj:
      return np.load(file_obj) 
Example #13
Source File: preparation.py    From cvpr2018-hnd with MIT License 6 votes vote down vote up
def is_image_file(id, dataset, dtype, filename):
    filename_lower = filename.lower()
    if any(filename_lower.endswith(ext) for ext in IMG_EXTENSIONS):
        if dtype == 'novel':
            try:
                default_loader(filename)
                return True
            except OSError:
                print('{filename} failed to load'.format(filename=filename))
                with open('taxonomy/{dataset}/corrupted_{dtype}_{id:d}.txt' \
                          .format(dataset=dataset, dtype=dtype, id=id), 'a') as f:
                    f.write(filename + '\n')
                return False
        else:
            return True
    else:
        return False 
Example #14
Source File: dataloader_utils.py    From medicaldetectiontoolkit with Apache License 2.0 5 votes vote down vote up
def convert_to_npy(npz_file):
    identifier = os.path.split(npz_file)[1][:-4]
    if not os.path.isfile(npz_file[:-4] + ".npy"):
        a = np.load(npz_file)[identifier]
        np.save(npz_file[:-4] + ".npy", a) 
Example #15
Source File: predictor.py    From medicaldetectiontoolkit with Apache License 2.0 5 votes vote down vote up
def __init__(self, cf, net, logger, mode):

        self.cf = cf
        self.logger = logger

        # mode is 'val' for patient-based validation/monitoring and 'test' for inference.
        self.mode = mode

        # model instance. In validation mode, contains parameters of current epoch.
        self.net = net

        # rank of current epoch loaded (for temporal averaging). this info is added to each prediction,
        # for correct weighting during consolidation.
        self.rank_ix = '0'

        # number of ensembled models. used to calculate the number of expected predictions per position
        # during consolidation of predictions. Default is 1 (no ensembling, e.g. in validation).
        self.n_ens = 1

        if self.mode == 'test':
            try:
                self.epoch_ranking = np.load(os.path.join(self.cf.fold_dir, 'epoch_ranking.npy'))[:cf.test_n_epochs]
            except:
                raise RuntimeError('no epoch ranking file in fold directory. '
                                   'seems like you are trying to run testing without prior training...')
            self.n_ens = cf.test_n_epochs
            if self.cf.test_aug:
                self.n_ens *= 4 
Example #16
Source File: utils.py    From Recipes with MIT License 5 votes vote down vote up
def load_pickle_data_test():
    fo_test = open('data/cifar-10-batches-py/test_batch', 'rb')
    dict_test = cPickle.load(fo_test)
    fo_test.close()
    test_X = dict_test['data']
    test_y = dict_test['labels']
    test_y = np.hstack(test_y).astype('int32')

    test_X = test_X.reshape(test_X.shape[0], 3, PIXELS, PIXELS).astype('float32')

    pixel_mean = np.load('data/pixel_mean.npy')
    test_X -= pixel_mean

    return test_X, test_y 
Example #17
Source File: Deep_Residual_Learning_CIFAR-10.py    From Recipes with MIT License 5 votes vote down vote up
def unpickle(file):
    import cPickle
    fo = open(file, 'rb')
    dict = cPickle.load(fo)
    fo.close()
    return dict 
Example #18
Source File: actions.py    From rasa-faq-bot with MIT License 5 votes vote down vote up
def encode_standard_question():
    bc = BertClient()
    data = json.load(open("./data/nlu/faq.json", "rt", encoding="utf-8"))
    standard_questions = [each['q'] for each in data]
    print("Standard question size", len(standard_questions))
    print("Start to calculate encoder....")
    standard_questions_encoder = bc.encode(standard_questions)
    np.save("./data/standard_questions", standard_questions_encoder)
    standard_questions_encoder_len = np.sqrt(np.sum(standard_questions_encoder * standard_questions_encoder, axis=1))
    np.save("./data/standard_questions_len", standard_questions_encoder_len) 
Example #19
Source File: pack_dataset.py    From medicaldetectiontoolkit with Apache License 2.0 5 votes vote down vote up
def convert_to_npy(npz_file):
    if not os.path.isfile(npz_file[:-3] + "npy"):
        a = np.load(npz_file)['data']
        np.save(npz_file[:-3] + "npy", a) 
Example #20
Source File: actions.py    From rasa-faq-bot with MIT License 5 votes vote down vote up
def __init__(self):
        super(ActionGetFAQAnswer, self).__init__()
        self.bc = BertClient()
        self.faq_data = json.load(open("./data/nlu/faq.json", "rt", encoding="utf-8"))
        self.standard_questions_encoder = np.load("./data/standard_questions.npy")
        self.standard_questions_encoder_len = np.load("./data/standard_questions_len.npy")
        print(self.standard_questions_encoder.shape) 
Example #21
Source File: pointmass.py    From cs294-112_hws with MIT License 5 votes vote down vote up
def visualize(self, states, itr, dirname):
        if states is None:
            states = np.load(os.path.join(dirname, '{}.npy'.format(itr)))
        indices = np.array([int(self.preprocess(s)) for s in states])
        a = np.zeros(int(self.grid_size))
        for i in indices:
            a[i] += 1
        max_freq = np.max(a)
        a/=float(max_freq)  # normalize
        a = np.reshape(a, (self.scale, self.scale))
        ax = sns.heatmap(a)
        plt.savefig(os.path.join(dirname, '{}.png'.format(itr)))
        plt.close() 
Example #22
Source File: cross_entropy.py    From Pytorch-Project-Template with MIT License 5 votes vote down vote up
def __init__(self, config=None):
        super(CrossEntropyLoss, self).__init__()
        if config == None:
            self.loss = nn.CrossEntropyLoss()
        else:
            class_weights = np.load(config.class_weights)
            self.loss = nn.CrossEntropyLoss(ignore_index=config.ignore_index,
                                      weight=torch.from_numpy(class_weights.astype(np.float32)),
                                      size_average=True, reduce=True) 
Example #23
Source File: demo_collect_and_playback_data.py    From robosuite with MIT License 5 votes vote down vote up
def playback_trajectory(env, ep_dir):
    """Playback data from an episode.

    Args:
        ep_dir: The path to the directory containing data for an episode.
    """

    # first reload the model from the xml
    xml_path = os.path.join(ep_dir, "model.xml")
    with open(xml_path, "r") as f:
        env.reset_from_xml_string(f.read())

    state_paths = os.path.join(ep_dir, "state_*.npz")

    # read states back, load them one by one, and render
    t = 0
    for state_file in sorted(glob(state_paths)):
        print(state_file)
        dic = np.load(state_file)
        states = dic["states"]
        for state in states:
            env.sim.set_state_from_flattened(state)
            env.sim.forward()
            env.render()
            t += 1
            if t % 100 == 0:
                print(t) 
Example #24
Source File: pretrain.py    From OpenNRE with MIT License 5 votes vote down vote up
def get_model(model_name, root_path=default_root_path):
    check_root()
    ckpt = os.path.join(root_path, 'pretrain/nre/' + model_name + '.pth.tar')
    if model_name == 'wiki80_cnn_softmax':
        download_pretrain(model_name, root_path=root_path)
        download('glove', root_path=root_path)
        download('wiki80', root_path=root_path)
        wordi2d = json.load(open(os.path.join(root_path, 'pretrain/glove/glove.6B.50d_word2id.json')))
        word2vec = np.load(os.path.join(root_path, 'pretrain/glove/glove.6B.50d_mat.npy'))
        rel2id = json.load(open(os.path.join(root_path, 'benchmark/wiki80/wiki80_rel2id.json')))
        sentence_encoder = encoder.CNNEncoder(token2id=wordi2d,
                                                     max_length=40,
                                                     word_size=50,
                                                     position_size=5,
                                                     hidden_size=230,
                                                     blank_padding=True,
                                                     kernel_size=3,
                                                     padding_size=1,
                                                     word2vec=word2vec,
                                                     dropout=0.5)
        m = model.SoftmaxNN(sentence_encoder, len(rel2id), rel2id)
        m.load_state_dict(torch.load(ckpt, map_location='cpu')['state_dict'])
        return m
    elif model_name == 'wiki80_bert_softmax':
        download_pretrain(model_name, root_path=root_path)
        download('bert_base_uncased', root_path=root_path)
        download('wiki80', root_path=root_path)
        rel2id = json.load(open(os.path.join(root_path, 'benchmark/wiki80/wiki80_rel2id.json')))
        sentence_encoder = encoder.BERTEncoder(
            max_length=80, pretrain_path=os.path.join(root_path, 'pretrain/bert-base-uncased'))
        m = model.SoftmaxNN(sentence_encoder, len(rel2id), rel2id)
        m.load_state_dict(torch.load(ckpt, map_location='cpu')['state_dict'])
        return m
    else:
        raise NotImplementedError 
Example #25
Source File: molecular_example.py    From OpenFermion-Cirq with Apache License 2.0 5 votes vote down vote up
def make_h6_1_3() -> Tuple[RestrictedHartreeFockObjective,
                           of.MolecularData,
                           np.ndarray,
                           np.ndarray,
                           np.ndarray]:
    # load the molecule from moelcular data
    import openfermioncirq.experiments.hfvqe as hfvqe
    h6_1_3_path = os.path.join(
        hfvqe.__path__[0],
        'molecular_data/hydrogen_chains/h_6_sto-3g/bond_distance_1.3')

    molfile = os.path.join(h6_1_3_path, 'H6_sto-3g_singlet_linear_r-1.3.hdf5')
    molecule = of.MolecularData(filename=molfile)
    molecule.load()

    S = np.load(os.path.join(h6_1_3_path, 'overlap.npy'))
    Hcore = np.load(os.path.join(h6_1_3_path, 'h_core.npy'))
    TEI = np.load(os.path.join(h6_1_3_path, 'tei.npy'))

    _, X = sp.linalg.eigh(Hcore, S)
    obi = of.general_basis_change(Hcore, X, (1, 0))
    tbi = np.einsum('psqr', of.general_basis_change(TEI, X, (1, 0, 1, 0)))
    molecular_hamiltonian = generate_hamiltonian(obi, tbi,
                                                 molecule.nuclear_repulsion)

    rhf_objective = RestrictedHartreeFockObjective(molecular_hamiltonian,
                                                   molecule.n_electrons)

    scipy_result = rhf_minimization(rhf_objective)

    return rhf_objective, molecule, scipy_result.x, obi, tbi 
Example #26
Source File: molecular_example_odd_qubits.py    From OpenFermion-Cirq with Apache License 2.0 5 votes vote down vote up
def make_h3_2_5() -> Tuple[RestrictedHartreeFockObjective, of.MolecularData, np.
                           ndarray, np.ndarray, np.ndarray]:
    # load the molecule from moelcular data
    h3_2_5_path = os.path.join(
        hfvqe.__path__[0],
        'molecular_data/hydrogen_chains/h_3_p_sto-3g/bond_distance_2.5')

    molfile = os.path.join(h3_2_5_path,
                           'H3_plus_sto-3g_singlet_linear_r-2.5.hdf5')
    molecule = of.MolecularData(filename=molfile)
    molecule.load()

    S = np.load(os.path.join(h3_2_5_path, 'overlap.npy'))
    Hcore = np.load(os.path.join(h3_2_5_path, 'h_core.npy'))
    TEI = np.load(os.path.join(h3_2_5_path, 'tei.npy'))

    _, X = sp.linalg.eigh(Hcore, S)
    obi = of.general_basis_change(Hcore, X, (1, 0))
    tbi = np.einsum('psqr', of.general_basis_change(TEI, X, (1, 0, 1, 0)))
    molecular_hamiltonian = generate_hamiltonian(obi, tbi,
                                                 molecule.nuclear_repulsion)

    rhf_objective = RestrictedHartreeFockObjective(molecular_hamiltonian,
                                                   molecule.n_electrons)

    scipy_result = rhf_minimization(rhf_objective)
    return rhf_objective, molecule, scipy_result.x, obi, tbi 
Example #27
Source File: input.py    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def extract_mnist_labels(filename, num_images):
  """
  Extract the labels into a vector of int64 label IDs.
  """
  # if not os.path.exists(file):
  if not tf.gfile.Exists(filename+".npy"):
    with gzip.open(filename) as bytestream:
      bytestream.read(8)
      buf = bytestream.read(1 * num_images)
      labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int32)
      np.save(filename, labels)
    return labels
  else:
    with tf.gfile.Open(filename+".npy", mode='r') as file_obj:
      return np.load(file_obj) 
Example #28
Source File: cache.py    From vergeml with MIT License 5 votes vote down vote up
def read(self, file, path):
        """Read the content index from file.
        """
        pos, = struct.unpack('<Q', file.read(8))
        if pos == 0:
            raise VergeMLError("Invalid cache file: {}".format(path))
        file.seek(pos)
        self.index, self.meta, self.info = pickle.load(file) 
Example #29
Source File: utils.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 5 votes vote down vote up
def load_npz(path):
    with numpy.load(path) as data:
        ret = {k: data[k] for k in data.keys()}
        return ret 
Example #30
Source File: cifar10.py    From Recipes with MIT License 5 votes vote down vote up
def load_dataset(path):
    download_dataset(path)

    # training data
    data = [np.load(os.path.join(path, 'cifar-10-batches-py',
                                 'data_batch_%d' % (i + 1))) for i in range(5)]
    X_train = np.vstack([d['data'] for d in data])
    y_train = np.hstack([np.asarray(d['labels'], np.int8) for d in data])

    # test data
    data = np.load(os.path.join(path, 'cifar-10-batches-py', 'test_batch'))
    X_test = data['data']
    y_test = np.asarray(data['labels'], np.int8)

    # reshape
    X_train = X_train.reshape(-1, 3, 32, 32)
    X_test = X_test.reshape(-1, 3, 32, 32)

    # normalize
    try:
        mean_std = np.load(os.path.join(path, 'cifar-10-mean_std.npz'))
        mean = mean_std['mean']
        std = mean_std['std']
    except IOError:
        mean = X_train.mean(axis=(0, 2, 3), keepdims=True).astype(np.float32)
        std = X_train.std(axis=(0, 2, 3), keepdims=True).astype(np.float32)
        np.savez(os.path.join(path, 'cifar-10-mean_std.npz'),
                 mean=mean, std=std)
    X_train = (X_train - mean) / std
    X_test = (X_test - mean) / std

    return X_train, y_train, X_test, y_test