Python six.moves.cPickle.load() Examples

The following are 30 code examples of six.moves.cPickle.load(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module six.moves.cPickle , or try the search function .
Example #1
Source File: read_LaMemDataset.py    From Colorization.tensorflow with MIT License 6 votes vote down vote up
def read_dataset(data_dir):
    pickle_filename = "lamem.pickle"
    pickle_filepath = os.path.join(data_dir, pickle_filename)
    if not os.path.exists(pickle_filepath):
        utils.maybe_download_and_extract(data_dir, DATA_URL, is_tarfile=True)
        lamem_folder = (DATA_URL.split("/")[-1]).split(os.path.extsep)[0]
        result = {'images': create_image_lists(os.path.join(data_dir, lamem_folder))}
        print ("Pickling ...")
        with open(pickle_filepath, 'wb') as f:
            pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
    else:
        print ("Found pickle file!")

    with open(pickle_filepath, 'rb') as f:
        result = pickle.load(f)
        training_records = result['images']
        del result

    return training_records 
Example #2
Source File: test_pickle_store.py    From arctic with GNU Lesser General Public License v2.1 6 votes vote down vote up
def test_read_backward_compatibility():
    """Test backwards compatibility with a pickled file that's created with Python 2.7.3,
    Numpy 1.7.1_ahl2 and Pandas 0.14.1
    """
    fname = path.join(path.dirname(__file__), "data", "test-data.pkl")

    # For newer versions; verify that unpickling fails when using cPickle
    if PANDAS_VERSION >= LooseVersion("0.16.1"):
        if sys.version_info[0] >= 3:
            with pytest.raises(UnicodeDecodeError), open(fname) as fh:
                cPickle.load(fh)
        else:
            with pytest.raises(TypeError), open(fname) as fh:
                cPickle.load(fh)

    # Verify that PickleStore() uses a backwards compatible unpickler.
    store = PickleStore()

    with open(fname) as fh:
        # PickleStore compresses data with lz4
        version = {'blob': compressHC(fh.read())}
    df = store.read(sentinel.arctic_lib, version, sentinel.symbol)

    expected = pd.DataFrame(range(4), pd.date_range(start="20150101", periods=4))
    assert (df == expected).all().all() 
Example #3
Source File: datasets.py    From models with Apache License 2.0 6 votes vote down vote up
def read_data_files(self, subset='train'):
    """Reads from data file and returns images and labels in a numpy array."""
    assert self.data_dir, ('Cannot call `read_data_files` when using synthetic '
                           'data')
    if subset == 'train':
      filenames = [os.path.join(self.data_dir, 'data_batch_%d' % i)
                   for i in xrange(1, 6)]
    elif subset == 'validation':
      filenames = [os.path.join(self.data_dir, 'test_batch')]
    else:
      raise ValueError('Invalid data subset "%s"' % subset)

    inputs = []
    for filename in filenames:
      with gfile.Open(filename, 'r') as f:
        inputs.append(cPickle.load(f))
    # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the
    # input format.
    all_images = np.concatenate(
        [each_input['data'] for each_input in inputs]).astype(np.float32)
    all_labels = np.concatenate(
        [each_input['labels'] for each_input in inputs])
    return all_images, all_labels 
Example #4
Source File: load.py    From polyglot with GNU General Public License v3.0 6 votes vote down vote up
def load_embeddings(lang="en", task="embeddings", type="cw", normalize=False):
  """Return a word embeddings object for `lang` and of type `type`

  Args:
    lang (string): language code.
    task (string): parameters that define task.
    type (string): skipgram, cw, cbow ...
    noramlized (boolean): returns noramlized word embeddings vectors.
  """
  src_dir = "_".join((type, task)) if type else task
  p = locate_resource(src_dir, lang)
  e = Embedding.load(p)
  if type == "cw":
    e.apply_expansion(CaseExpander)
    e.apply_expansion(DigitExpander)
  if type == "sgns":
    e.apply_expansion(CaseExpander)
  if type == "ue":
    e.apply_expansion(CaseExpander)
  if normalize:
    e.normalize_words(inplace=True)
  return e 
Example #5
Source File: datasets.py    From models with Apache License 2.0 6 votes vote down vote up
def read_data_files(self, subset='train'):
    """Reads from data file and returns images and labels in a numpy array."""
    assert self.data_dir, ('Cannot call `read_data_files` when using synthetic '
                           'data')
    if subset == 'train':
      filenames = [os.path.join(self.data_dir, 'data_batch_%d' % i)
                   for i in xrange(1, 6)]
    elif subset == 'validation':
      filenames = [os.path.join(self.data_dir, 'test_batch')]
    else:
      raise ValueError('Invalid data subset "%s"' % subset)

    inputs = []
    for filename in filenames:
      with gfile.Open(filename, 'r') as f:
        inputs.append(cPickle.load(f))
    # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the
    # input format.
    all_images = np.concatenate(
        [each_input['data'] for each_input in inputs]).astype(np.float32)
    all_labels = np.concatenate(
        [each_input['labels'] for each_input in inputs])
    return all_images, all_labels 
Example #6
Source File: chineselib.py    From ctw-baseline with MIT License 6 votes vote down vote up
def load_data(self, FLAGS):
        self.FLAGS = FLAGS
        with open(settings.CATES) as f:
            cates = json.load(f)
        text2cate = {c['text']: c['cate_id'] for c in cates}
        self.num_samples = 0
        self.labels = [[] for i in range(self.num_classes)]
        with open(FLAGS.dataset_dir, 'rb') as f:
            all = cPickle.load(f)
        for image, text in all:
            label = text2cate.get(text)
            assert label is not None
            if label is None or label >= settings.NUM_CHAR_CATES:
                self.labels[settings.NUM_CHAR_CATES].append(image)
            else:  # label < settings.NUM_CHAR_CATES:
                self.labels[label].append(image)
            self.num_samples += 1
        for label in self.labels:
            assert 0 < len(label)
        # self.preview() 
Example #7
Source File: accuracy_datasets.py    From models with Apache License 2.0 6 votes vote down vote up
def read_data_files(self, subset='train'):
    """Reads from data file and returns images and labels in a numpy array."""
    assert self.data_dir, ('Cannot call `read_data_files` when using synthetic '
                           'data')
    if subset == 'train':
      filenames = [os.path.join(self.data_dir, 'data_batch_%d' % i)
                   for i in xrange(1, 6)]
    elif subset == 'validation':
      filenames = [os.path.join(self.data_dir, 'test_batch')]
    else:
      raise ValueError('Invalid data subset "%s"' % subset)

    inputs = []
    for filename in filenames:
      with gfile.Open(filename, 'r') as f:
        inputs.append(cPickle.load(f))
    # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the
    # input format.
    all_images = np.concatenate(
        [each_input['data'] for each_input in inputs]).astype(np.float32)
    all_labels = np.concatenate(
        [each_input['labels'] for each_input in inputs])
    return all_images, all_labels 
Example #8
Source File: train.py    From densecap-tensorflow with MIT License 6 votes vote down vote up
def from_snapshot(self, sess, sfile, nfile):
        print('Restoring model snapshots from {:s}'.format(sfile))
        self.saver.restore(sess, sfile)
        print('Restored.')
        # Needs to restore the other hyper-parameters/states for training, (TODO xinlei) I have
        # tried my best to find the random states so that it can be recovered exactly
        # However the Tensorflow state is currently not available
        with open(nfile, 'rb') as fid:
            st0 = pickle.load(fid)
            cur = pickle.load(fid)
            perm = pickle.load(fid)
            cur_val = pickle.load(fid)
            perm_val = pickle.load(fid)
            last_snapshot_iter = pickle.load(fid)

            np.random.set_state(st0)
            self.data_layer._cur = cur
            self.data_layer._perm = perm
            self.data_layer_val._cur = cur_val
            self.data_layer_val._perm = perm_val

        return last_snapshot_iter 
Example #9
Source File: test_function.py    From D-VAE with MIT License 6 votes vote down vote up
def test_function_dump():
    v = theano.tensor.vector()
    fct1 = theano.function([v], v + 1)

    try:
        tmpdir = tempfile.mkdtemp()
        fname = os.path.join(tmpdir, 'test_function_dump.pkl')
        theano.function_dump(fname, [v], v + 1)
        with open(fname, 'rb') as f:
            l = pickle.load(f)
    finally:
        if tmpdir is not None:
            shutil.rmtree(tmpdir)

    fct2 = theano.function(**l)
    x = [1, 2, 3]
    assert numpy.allclose(fct1(x), fct2(x)) 
Example #10
Source File: visual_genome.py    From densecap-tensorflow with MIT License 6 votes vote down vote up
def _load_image_set_index(self, ext='json'):
        """
        Load the indexes listed in this dataset's image set file.
        """
        if cfg.LIMIT_RAM:
            if ext == 'json':
                path = pjoin(cfg.SPLIT_DIR, 'densecap_splits.json')
                with open(path, 'r') as f:
                    # NOTE: the return index has entries with INT type
                    image_index = json.load(f)[self._image_set]
                    print ("loading splits from {}".format(path))
            elif ext == 'txt':
                path = pjoin(cfg.SPLIT_DIR, '%s.txt' % self._image_set)
                with open(path, 'r') as f:
                    image_index = [line.strip() for line in f.readlines()]
                print ("loading splits from {}".format(path))
        else:
            image_index = [key for key in self._gt_regions]

        print("Number of examples: {}".format(len(image_index)))
        return image_index 
Example #11
Source File: sample.py    From char-rnn-tensorflow with MIT License 6 votes vote down vote up
def sample(args):
    with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f:
        saved_args = cPickle.load(f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'rb') as f:
        chars, vocab = cPickle.load(f)
    #Use most frequent char if no prime is given
    if args.prime == '':
        args.prime = chars[0]
    model = Model(saved_args, training=False)
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())
        ckpt = tf.train.get_checkpoint_state(args.save_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            data = model.sample(sess, chars, vocab, args.n, args.prime,
                               args.sample).encode('utf-8')
            print(data.decode("utf-8")) 
Example #12
Source File: prepare_notmnist.py    From Neural-Network-Programming-with-TensorFlow with MIT License 5 votes vote down vote up
def merge_datasets(pickle_files, train_size, valid_size=0):
  num_classes = len(pickle_files)
  valid_dataset, valid_labels = make_arrays(valid_size, image_size)
  train_dataset, train_labels = make_arrays(train_size, image_size)
  vsize_per_class = valid_size // num_classes
  tsize_per_class = train_size // num_classes
    
  start_v, start_t = 0, 0
  end_v, end_t = vsize_per_class, tsize_per_class
  end_l = vsize_per_class+tsize_per_class
  for label, pickle_file in enumerate(pickle_files):       
    try:
      with open(pickle_file, 'rb') as f:
        letter_set = pickle.load(f)
        np.random.shuffle(letter_set)
        if valid_dataset is not None:
          valid_letter = letter_set[:vsize_per_class, :, :]
          valid_dataset[start_v:end_v, :, :] = valid_letter
          valid_labels[start_v:end_v] = label
          start_v += vsize_per_class
          end_v += vsize_per_class
                    
        train_letter = letter_set[vsize_per_class:end_l, :, :]
        train_dataset[start_t:end_t, :, :] = train_letter
        train_labels[start_t:end_t] = label
        start_t += tsize_per_class
        end_t += tsize_per_class
    except Exception as e:
      print('Unable to process data from', pickle_file, ':', e)
      raise
    
  return valid_dataset, valid_labels, train_dataset, train_labels 
Example #13
Source File: io.py    From KL-Loss with Apache License 2.0 5 votes vote down vote up
def save_object(obj, file_name, pickle_format=2):
    """Save a Python object by pickling it.

Unless specifically overridden, we want to save it in Pickle format=2 since this
will allow other Python2 executables to load the resulting Pickle. When we want
to completely remove Python2 backward-compatibility, we can bump it up to 3. We
should never use pickle.HIGHEST_PROTOCOL as far as possible if the resulting
file is manifested or used, external to the system.
    """
    file_name = os.path.abspath(file_name)
    with open(file_name, 'wb') as f:
        pickle.dump(obj, f, pickle_format) 
Example #14
Source File: load.py    From polyglot with GNU General Public License v3.0 5 votes vote down vote up
def load_pos_model(lang="en", version="2"):
  """Return a part of speech tagger parameters for `lang` and of version `version`

  Args:
    lang (string): language code.
    version (string): version of the parameters to be used.
  """
  src_dir = "pos{}".format(version)
  p = locate_resource(src_dir, lang)
  fh = _open(p)
  return dict(np.load(fh)) 
Example #15
Source File: cifar10.py    From TensorflowFramework with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def read(self, mode, params):
    """Create an instance of the dataset object."""
    batches = {
      tf.estimator.ModeKeys.TRAIN: TRAIN_BATCHES,
      tf.estimator.ModeKeys.EVAL: TEST_BATCHES
    }[mode]

    all_images = []
    all_labels = []

    for batch in batches:
      with open("%s%s%s" % (LOCAL_DIR, DATA_DIR, batch), "rb") as fo:
        dict = cPickle.load(fo)
        images = np.array(dict["data"])
        labels = np.array(dict["labels"])

        num = images.shape[0]
        images = np.reshape(images, [num, 3, IMAGE_SIZE, IMAGE_SIZE])
        images = np.transpose(images, [0, 2, 3, 1])
        print("Loaded %d examples." % num)

        all_images.append(images)
        all_labels.append(labels)

    all_images = np.concatenate(all_images)
    all_labels = np.concatenate(all_labels)

    return tf.data.Dataset.from_tensor_slices((all_images, all_labels)) 
Example #16
Source File: load.py    From polyglot with GNU General Public License v3.0 5 votes vote down vote up
def load_transliteration_table(lang="en", version="2"):
  """Return a morfessor model for `lang` and of version `version`

  Args:
    lang (string): language code.
    version (string): version of the parameters to be used.
  """
  src_dir = "transliteration{}".format(version)
  p = locate_resource(src_dir, lang)
  file_handler = _open(p)
  return pickle.load(file_handler) 
Example #17
Source File: load.py    From polyglot with GNU General Public License v3.0 5 votes vote down vote up
def load_unified_pos_model(lang="en"):
  src_dir = "unipos"
  p = locate_resource(src_dir, lang)
  return dict(np.load(p)) 
Example #18
Source File: load.py    From polyglot with GNU General Public License v3.0 5 votes vote down vote up
def load_ner_model(lang="en", version="2"):
  """Return a named entity extractor parameters for `lang` and of version `version`

  Args:
    lang (string): language code.
    version (string): version of the parameters to be used.
  """
  src_dir = "ner{}".format(version)
  p = locate_resource(src_dir, lang)
  fh = _open(p)
  try:
    return pickle.load(fh)
  except UnicodeDecodeError:
    fh.seek(0)
    return pickle.load(fh, encoding='latin1') 
Example #19
Source File: 1_prepare_pickle.py    From Neural-Network-Programming-with-TensorFlow with MIT License 5 votes vote down vote up
def merge_datasets(pickle_files, train_size, valid_size=0):
  num_classes = len(pickle_files)
  valid_dataset, valid_labels = make_arrays(valid_size, image_size, 3)
  train_dataset, train_labels = make_arrays(train_size, image_size, 3)
  vsize_per_class = valid_size // num_classes
  tsize_per_class = train_size // num_classes
    
  start_v, start_t = 0, 0
  end_v, end_t = vsize_per_class, tsize_per_class
  end_l = vsize_per_class+tsize_per_class
  for label, pickle_file in enumerate(pickle_files):       
    try:
      with open(pickle_file, 'rb') as f:
        letter_set = pickle.load(f)
        # let's shuffle the letters to have random validation and training set
        np.random.shuffle(letter_set)
        if valid_dataset is not None:
          valid_letter = letter_set[:vsize_per_class, :, :]
          valid_dataset[start_v:end_v, :, :] = valid_letter
          valid_labels[start_v:end_v] = label
          start_v += vsize_per_class
          end_v += vsize_per_class
                    
        train_letter = letter_set[vsize_per_class:end_l, :, :]
        train_dataset[start_t:end_t, :, :] = train_letter
        train_labels[start_t:end_t] = label
        start_t += tsize_per_class
        end_t += tsize_per_class
    except Exception as e:
      print('Unable to process data from', pickle_file, ':', e)
      raise
    
  return valid_dataset, valid_labels, train_dataset, train_labels 
Example #20
Source File: utils.py    From AAT with MIT License 5 votes vote down vote up
def pickle_load(f):
    """ Load a pickle.
    Parameters
    ----------
    f: file-like object
    """
    if six.PY3:
        return cPickle.load(f, encoding='latin-1')
    else:
        return cPickle.load(f) 
Example #21
Source File: visual_genome.py    From densecap-tensorflow with MIT License 5 votes vote down vote up
def gt_roidb_unlim_ram(self):
        """
        Return the database of ground-truth regions of interest.

        This function loads/saves from/to a cache file to speed up future calls.
        """
        cache_file = pjoin(self._data_path, self._image_set + '_gt_roidb.pkl')
        cache_file_phrases = pjoin(self._data_path, self._image_set + '_gt_phrases.pkl')
        if os.path.exists(cache_file) and USE_CACHE:
            with open(cache_file, 'rb') as fid:
                roidb = cPickle.load(fid)
            print('{} gt roidb loaded from {}'.format(self._image_set, cache_file))
            return roidb

        gt_roidb = [self._load_vg_annotation(index) for index in self._image_index]
        gt_phrases = {}
        for k, v in six.iteritems(self._gt_regions):
            for reg in v['regions']:
                gt_phrases[reg['region_id']] = self._line_to_stream(reg['phrase_tokens'])

        with open(cache_file, 'wb') as fid:
            cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL)
        with open(cache_file_phrases, 'wb') as fid:
            cPickle.dump(gt_phrases, fid, cPickle.HIGHEST_PROTOCOL)
        print('wrote gt roidb to {}'.format(cache_file))
        # print('wrote gt phrases to {}'.format(cache_file_phrases))
        return gt_roidb 
Example #22
Source File: sample.py    From YellowFin with Apache License 2.0 5 votes vote down vote up
def sample(args):
    with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f:
        saved_args = cPickle.load(f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'rb') as f:
        chars, vocab = cPickle.load(f)
    model = Model(saved_args, training=False)
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())
        ckpt = tf.train.get_checkpoint_state(args.save_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            print(model.sample(sess, chars, vocab, args.n, args.prime,
                               args.sample).encode('utf-8')) 
Example #23
Source File: visual_genome.py    From densecap-tensorflow with MIT License 5 votes vote down vote up
def image_path_from_index(self, index):
        """
        Construct an image path from the image's "index" identifier.
        """
        if cfg.LIMIT_RAM:
            # load region from a json file
            with open(pjoin(self.region_imset_path, '%s.json' % index), 'r') as f:
                image_path = json.load(f)['path']
        else:
            image_path = self._gt_regions[str(index)]['path']

        assert os.path.exists(image_path), \
            'Path does not exist: {}'.format(image_path)
        return image_path 
Example #24
Source File: utils.py    From AAT with MIT License 5 votes vote down vote up
def if_use_feat(caption_model):
    # Decide if load attention feature according to caption model
    if caption_model in ['show_tell', 'all_img', 'fc', 'newfc']:
        use_att, use_fc = False, True
    elif caption_model == 'language_model':
        use_att, use_fc = False, False
    elif caption_model in ['topdown', 'aoa', 'aat']:
        use_fc, use_att = True, True
    else:
        use_att, use_fc = True, False
    return use_fc, use_att

# Input: seq, N*D numpy array, with element 0 .. vocab_size. 0 is END token. 
Example #25
Source File: minibatch.py    From densecap-tensorflow with MIT License 5 votes vote down vote up
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(scale_inds)
    processed_ims = []
    im_scales = []
    if cfg.LIMIT_RAM:
        # roidb is the pickle file path
        assert num_images == 1, "LIMIT_RAM version, it has to be one image."
        with open(roidb, 'rb') as f:
            roidb = [cPickle.load(f)]

    for i in xrange(num_images):
        im = cv2.imread(roidb[i]['image'])
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales, roidb 
Example #26
Source File: dataset.py    From EasyPR-python with Apache License 2.0 5 votes vote down vote up
def __init__(self, dataset_params, phase):

        # process params
        self.data_path = str(dataset_params['path'])
        self.label_path = os.path.join(self.data_path, phase + '.pickle')
        self.batch_size = int(dataset_params['batch_size'])
        self.thread_num = int(dataset_params['thread_num'])
        self.gray = False

        if 'gray' in dataset_params:
            self.gray = dataset_params['gray']

        # record and image_label queue
        self.record_queue = Queue(maxsize=10000)
        self.image_label_queue = Queue(maxsize=512)

        with open(self.label_path, 'rb') as f:
            result = pickle.load(f)

        self.record_list = result  # {'name', 'label', 'subdir'}
        self.record_point = 0
        self.record_number = len(self.record_list)
        if self.batch_size == -1:
            self.batch_size = self.record_number

        self.num_batch_per_epoch = int(self.record_number / self.batch_size)

        t_record_producer = Thread(target=self.record_producer)
        t_record_producer.daemon = True
        t_record_producer.start()

        for i in range(self.thread_num):
            t = Thread(target=self.record_customer)
            t.daemon = True
            t.start() 
Example #27
Source File: pickle_read_test.py    From densecap-tensorflow with MIT License 5 votes vote down vote up
def pickle_test():
    DEFAULT_PATH = '/home/joe/git/visual_genome_test'
    cache = pjoin(DEFAULT_PATH, '1.2_cache/pre_gt_roidb', '1.pkl')
    cache_flip = pjoin(DEFAULT_PATH, '1.2_cache/pre_gt_roidb', '1_flip.pkl')
    ori = pjoin(DEFAULT_PATH, '1.2', 'pre_gt_roidb.pkl')
    phra = pjoin(DEFAULT_PATH, '1.2', 'pre_gt_phrases.pkl')
    with open(cache, 'rb') as fc:
        data_cache = cPickle.load(fc)
    with open(cache_flip, 'rb') as f:
        data_flip = cPickle.load(f)
    with open(ori, 'rb') as fo:
        data_ori = cPickle.load(fo)
    with open(phra, 'rb') as fp:
        data_phra = cPickle.load(fp)
    # from IPython import embed;
    # embed()

    print(data_cache)
    print ('flip------------------')
    print(data_flip)
    print ('ori------------------')
    print(data_ori)
    print("data ori length:", len(data_ori))
    print ('phrase------------------')
    print (data_phra)
    # print (data_phra[2239]) 
Example #28
Source File: read_FlowersDataset.py    From Colorization.tensorflow with MIT License 5 votes vote down vote up
def read_dataset(data_dir):
    pickle_filename = "flowers_data.pickle"
    pickle_filepath = os.path.join(data_dir, pickle_filename)
    if not os.path.exists(pickle_filepath):
        utils.maybe_download_and_extract(data_dir, DATA_URL, is_tarfile=True)
        flower_folder = os.path.splitext(DATA_URL.split("/")[-1])[0]
        result = create_image_lists(os.path.join(data_dir, flower_folder))
        print "Training set: %d" % len(result['train'])
        print "Test set: %d" % len(result['test'])
        print "Validation set: %d" % len(result['validation'])
        print "Pickling ..."
        with open(pickle_filepath, 'wb') as f:
            pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
    else:
        print "Found pickle file!"

    with open(pickle_filepath, 'rb') as f:
        result = pickle.load(f)
        training_images = result['train']
        testing_images = result['test']
        validation_images = result['validation']

        del result

    print ("Training: %d, Validation: %d, Test: %d" % (
        len(training_images), len(validation_images), len(testing_images)))
    return training_images, testing_images, validation_images 
Example #29
Source File: 1_prepare_pickle_200.py    From Neural-Network-Programming-with-TensorFlow with MIT License 5 votes vote down vote up
def merge_datasets(pickle_files, train_size, valid_size=0):
  num_classes = len(pickle_files)
  valid_dataset, valid_labels = make_arrays(valid_size, image_size, 3)
  train_dataset, train_labels = make_arrays(train_size, image_size, 3)
  vsize_per_class = valid_size // num_classes
  tsize_per_class = train_size // num_classes
    
  start_v, start_t = 0, 0
  end_v, end_t = vsize_per_class, tsize_per_class
  end_l = vsize_per_class+tsize_per_class
  for label, pickle_file in enumerate(pickle_files):       
    try:
      with open(pickle_file, 'rb') as f:
        letter_set = pickle.load(f)
        # let's shuffle the letters to have random validation and training set
        np.random.shuffle(letter_set)
        if valid_dataset is not None:
          valid_letter = letter_set[:vsize_per_class, :, :]
          valid_dataset[start_v:end_v, :, :] = valid_letter
          valid_labels[start_v:end_v] = label
          start_v += vsize_per_class
          end_v += vsize_per_class
                    
        train_letter = letter_set[vsize_per_class:end_l, :, :]
        train_dataset[start_t:end_t, :, :] = train_letter
        train_labels[start_t:end_t] = label
        start_t += tsize_per_class
        end_t += tsize_per_class
    except Exception as e:
      print('Unable to process data from', pickle_file, ':', e)
      raise
    
  return valid_dataset, valid_labels, train_dataset, train_labels 
Example #30
Source File: visual_genome.py    From densecap-tensorflow with MIT License 5 votes vote down vote up
def _load_rpn_roidb(self, gt_roidb):
        filename = self.config['rpn_file']
        print('loading {}'.format(filename))
        assert os.path.exists(filename), \
            'layers data not found at: {}'.format(filename)
        with open(filename, 'rb') as f:
            box_list = cPickle.load(f)
        return self.create_roidb_from_box_list(box_list, gt_roidb)